cleanup and refactoring

This commit is contained in:
CDaut 2024-05-25 11:53:25 +02:00
parent 2302158928
commit 76f6bf62a4
Signed by: clara
GPG key ID: 223391B52FAD4463
1285 changed files with 757994 additions and 8 deletions

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,155 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "appwindowprofiler_vk.hpp"
#include "context_vk.hpp"
#include "error_vk.hpp"
#include <nvh/misc.hpp>
#include <nvh/nvprint.hpp>
#define GLFW_INCLUDE_NONE
#include <GLFW/glfw3.h>
#ifdef _WIN32
#define GLFW_EXPOSE_NATIVE_WIN32
#include <GLFW/glfw3native.h>
#include <vulkan/vulkan_win32.h>
#else
#define GLFW_EXPOSE_NATIVE_X11
#include <GLFW/glfw3native.h>
#include <xcb/xcb.h>
// Prevent clang format from "organizing" the includes.
#include <vulkan/vulkan_xcb.h>
#endif
namespace nvvk {
void AppWindowProfilerVK::contextInit()
{
//m_contextWindow.init(&m_deviceInfo, this);
ContextCreateInfo contextInfo = m_contextInfo;
m_swapVsync = false;
contextInfo.addInstanceExtension(VK_KHR_SURFACE_EXTENSION_NAME, false);
#ifdef _WIN32
contextInfo.addInstanceExtension(VK_KHR_WIN32_SURFACE_EXTENSION_NAME, false);
#else
contextInfo.addInstanceExtension(VK_KHR_XCB_SURFACE_EXTENSION_NAME, false);
#endif
contextInfo.addDeviceExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, false);
if(!m_context.init(contextInfo))
{
LOGE("FATAL ERROR: failed to create Vulkan context\n");
exit(-1);
return;
}
// Construct the surface description:
VkResult result;
#ifdef _WIN32
HWND hWnd = glfwGetWin32Window(m_internal);
HINSTANCE hInstance = GetModuleHandle(NULL);
VkWin32SurfaceCreateInfoKHR createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR;
createInfo.pNext = NULL;
createInfo.hinstance = hInstance;
createInfo.hwnd = hWnd;
result = vkCreateWin32SurfaceKHR(m_context.m_instance, &createInfo, nullptr, &m_surface);
#else // _WIN32
result = glfwCreateWindowSurface(m_context.m_instance, m_internal, NULL, &m_surface);
#endif // _WIN32
assert(result == VK_SUCCESS);
m_context.setGCTQueueWithPresent(m_surface);
m_swapChain.init(m_context.m_device, m_context.m_physicalDevice, m_context.m_queueGCT, m_context.m_queueGCT.familyIndex, m_surface);
m_swapChain.update(getWidth(), getHeight(), m_swapVsync);
m_windowState.m_swapSize[0] = m_swapChain.getWidth();
m_windowState.m_swapSize[1] = m_swapChain.getHeight();
m_profilerVK.init(m_context.m_device, m_context.m_physicalDevice);
m_profilerVK.setLabelUsage(m_context.hasInstanceExtension(VK_EXT_DEBUG_UTILS_EXTENSION_NAME));
}
void AppWindowProfilerVK::contextDeinit()
{
VkResult result = vkDeviceWaitIdle(m_context.m_device);
if(nvvk::checkResult(result, __FILE__, __LINE__))
{
exit(-1);
}
m_profilerVK.deinit();
m_swapChain.deinit();
vkDestroySurfaceKHR(m_context.m_instance, m_surface, nullptr);
m_context.deinit();
}
void AppWindowProfilerVK::contextSync()
{
VkResult result = vkDeviceWaitIdle(m_context.m_device);
if(nvvk::checkResult(result, __FILE__, __LINE__))
{
exit(-1);
}
}
void AppWindowProfilerVK::swapResize(int width, int height)
{
if((m_swapChain.getUpdateWidth() != width) || (m_swapChain.getUpdateHeight() != height))
{
m_swapChain.update(width, height, m_swapVsync);
m_windowState.m_swapSize[0] = m_swapChain.getWidth();
m_windowState.m_swapSize[1] = m_swapChain.getHeight();
}
}
void AppWindowProfilerVK::swapPrepare()
{
if(!m_swapChain.acquire())
{
LOGE("error: vulkan swapchain acqiure failed, try -vsync 1\n");
exit(-1);
}
}
void AppWindowProfilerVK::swapBuffers()
{
m_swapChain.present(m_context.m_queueGCT);
}
void AppWindowProfilerVK::swapVsync(bool swapVsync)
{
if(m_swapVsync != swapVsync)
{
m_swapChain.update(getWidth(), getHeight(), swapVsync);
m_swapVsync = swapVsync;
}
}
const char* AppWindowProfilerVK::contextGetDeviceName()
{
return m_context.m_physicalInfo.properties10.deviceName;
}
} // namespace nvvk

View file

@ -0,0 +1,87 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef NV_WINDOWPROFILER_GL_INCLUDED
#define NV_WINDOWPROFILER_GL_INCLUDED
#include <nvh/appwindowprofiler.hpp>
#include <nvvk/context_vk.hpp>
#include <nvvk/profiler_vk.hpp>
#include <nvvk/swapchain_vk.hpp>
namespace nvvk {
//////////////////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::AppWindowProfilerVK
nvvk::AppWindowProfilerVK derives from nvh::AppWindowProfiler
and overrides the context and swapbuffer functions.
The nvh class itself provides several utilities and
command line options to run automated benchmarks etc.
To influence the vulkan instance/device creation modify
`m_contextInfo` prior running AppWindowProfiler::run,
which triggers instance, device, window, swapchain creation etc.
The class comes with a nvvk::ProfilerVK instance that references the
AppWindowProfiler::m_profiler's data.
@DOC_END */
#define NV_PROFILE_VK_SECTION(name, cmd) const nvvk::ProfilerVK::Section _tempTimer(m_profilerVK, name, cmd)
#define NV_PROFILE_VK_SPLIT() m_profilerVK.accumulationSplit()
class AppWindowProfilerVK : public nvh::AppWindowProfiler
{
public:
AppWindowProfilerVK(bool singleThreaded = true)
: nvh::AppWindowProfiler(singleThreaded)
, m_profilerVK(&m_profiler)
{
}
bool m_swapVsync = false;
ContextCreateInfo m_contextInfo{};
Context m_context{};
SwapChain m_swapChain{};
VkSurfaceKHR m_surface{};
ProfilerVK m_profilerVK{};
int run(const std::string& name, int argc, const char** argv, int width, int height)
{
return AppWindowProfiler::run(name, argc, argv, width, height, false);
}
virtual void contextInit() override;
virtual void contextDeinit() override;
virtual void contextSync() override;
virtual const char* contextGetDeviceName() override;
virtual void swapResize(int width, int height) override;
virtual void swapPrepare() override;
virtual void swapBuffers() override;
virtual void swapVsync(bool state) override;
};
} // namespace nvvk
#endif

View file

@ -0,0 +1,132 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <platform.h>
#include <vector>
#include <vulkan/vulkan_core.h>
namespace nvvk {
//////////////////////////////////////////////////////////////////////////
/** @DOC_START
The utilities in this file provide a more direct approach, we encourage to use
higher-level mechanisms also provided in the allocator / memorymanagement classes.
# functions in nvvk
- makeBufferCreateInfo : wraps setup of VkBufferCreateInfo (implicitly sets VK_BUFFER_USAGE_TRANSFER_DST_BIT)
- makeBufferViewCreateInfo : wraps setup of VkBufferViewCreateInfo
- createBuffer : wraps vkCreateBuffer
- createBufferView : wraps vkCreateBufferView
- getBufferDeviceAddressKHR : wraps vkGetBufferDeviceAddressKHR
- getBufferDeviceAddress : wraps vkGetBufferDeviceAddress
```cpp
VkBufferCreateInfo bufferCreate = makeBufferCreateInfo (size, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT);
VkBuffer buffer = createBuffer(device, bufferCreate);
VkBufferView bufferView = createBufferView(device, makeBufferViewCreateInfo(buffer, VK_FORMAT_R8G8B8A8_UNORM, size));
```
@DOC_END */
// implicitly sets VK_BUFFER_USAGE_TRANSFER_DST_BIT
inline VkBufferCreateInfo makeBufferCreateInfo(VkDeviceSize size, VkBufferUsageFlags usage, VkBufferCreateFlags flags = 0)
{
VkBufferCreateInfo createInfo = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
createInfo.size = size;
createInfo.usage = usage | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
createInfo.flags = flags;
return createInfo;
}
inline VkBufferViewCreateInfo makeBufferViewCreateInfo(VkBuffer buffer,
VkFormat format,
VkDeviceSize range,
VkDeviceSize offset = 0,
VkBufferViewCreateFlags flags = 0)
{
VkBufferViewCreateInfo createInfo = {VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO};
createInfo.buffer = buffer;
createInfo.offset = offset;
createInfo.range = range;
createInfo.flags = flags;
createInfo.format = format;
return createInfo;
}
inline VkBufferViewCreateInfo makeBufferViewCreateInfo(const VkDescriptorBufferInfo& descrInfo,
VkFormat fmt,
VkBufferViewCreateFlags flags = 0)
{
VkBufferViewCreateInfo createInfo = {VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO};
createInfo.buffer = descrInfo.buffer;
createInfo.offset = descrInfo.offset;
createInfo.range = descrInfo.range;
createInfo.flags = flags;
createInfo.format = fmt;
return createInfo;
}
inline VkDeviceAddress getBufferDeviceAddressKHR(VkDevice device, VkBuffer buffer)
{
if(buffer == VK_NULL_HANDLE)
return 0ULL;
VkBufferDeviceAddressInfo info = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR};
info.buffer = buffer;
return vkGetBufferDeviceAddressKHR(device, &info);
}
inline VkDeviceAddress getBufferDeviceAddress(VkDevice device, VkBuffer buffer)
{
if(buffer == VK_NULL_HANDLE)
return 0ULL;
VkBufferDeviceAddressInfo info = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO};
info.buffer = buffer;
return vkGetBufferDeviceAddress(device, &info);
}
//////////////////////////////////////////////////////////////////////////
// these use pass by value so one can easily chain createBuffer(device, makeBufferCreateInfo(...));
inline VkBuffer createBuffer(VkDevice device, VkBufferCreateInfo info)
{
VkBuffer buffer;
VkResult result = vkCreateBuffer(device, &info, nullptr, &buffer);
assert(result == VK_SUCCESS);
return buffer;
}
inline VkBufferView createBufferView(VkDevice device, VkBufferViewCreateInfo info)
{
VkBufferView bufferView;
VkResult result = vkCreateBufferView(device, &info, nullptr, &bufferView);
assert(result == VK_SUCCESS);
return bufferView;
}
} // namespace nvvk

View file

@ -0,0 +1,357 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include <assert.h>
#include "buffersuballocator_vk.hpp"
#include "debug_util_vk.hpp"
#include "error_vk.hpp"
namespace nvvk {
//////////////////////////////////////////////////////////////////////////
void BufferSubAllocator::init(MemAllocator* memAllocator,
VkDeviceSize blockSize,
VkBufferUsageFlags bufferUsageFlags,
VkMemoryPropertyFlags memPropFlags,
bool mapped,
const std::vector<uint32_t>& sharingQueueFamilyIndices)
{
assert(!m_device);
m_memAllocator = memAllocator;
m_device = memAllocator->getDevice();
m_blockSize = std::min(blockSize, ((uint64_t(1) << Handle::BLOCKBITS) - 1) * uint64_t(BASE_ALIGNMENT));
m_bufferUsageFlags = bufferUsageFlags;
m_memoryPropFlags = memPropFlags;
m_memoryTypeIndex = ~0;
m_keepLastBlock = true;
m_mapped = mapped;
m_sharingQueueFamilyIndices = sharingQueueFamilyIndices;
m_freeBlockIndex = INVALID_ID_INDEX;
m_usedSize = 0;
m_allocatedSize = 0;
}
void BufferSubAllocator::deinit()
{
if(!m_memAllocator)
return;
free(false);
m_blocks.clear();
m_memAllocator = nullptr;
}
BufferSubAllocator::Handle BufferSubAllocator::subAllocate(VkDeviceSize size, uint32_t align)
{
uint32_t usedOffset;
uint32_t usedSize;
uint32_t usedAligned;
uint32_t blockIndex = INVALID_ID_INDEX;
// if size either doesn't fit in the bits within the handle
// or we are bigger than the default block size, we use a full dedicated block
// for this allocation
bool isDedicated = Handle::needsDedicated(size, align) || size > m_blockSize;
if(!isDedicated)
{
// Find the first non-dedicated block that can fit the allocation
for(uint32_t i = 0; i < (uint32_t)m_blocks.size(); i++)
{
Block& block = m_blocks[i];
if(!block.isDedicated && block.buffer && block.range.subAllocate((uint32_t)size, align, usedOffset, usedAligned, usedSize))
{
blockIndex = block.index;
break;
}
}
}
if(blockIndex == INVALID_ID_INDEX)
{
if(m_freeBlockIndex != INVALID_ID_INDEX)
{
Block& block = m_blocks[m_freeBlockIndex];
m_freeBlockIndex = setIndexValue(block.index, m_freeBlockIndex);
blockIndex = block.index;
}
else
{
uint32_t newIndex = (uint32_t)m_blocks.size();
m_blocks.resize(m_blocks.size() + 1);
Block& block = m_blocks[newIndex];
block.index = newIndex;
blockIndex = newIndex;
}
Block& block = m_blocks[blockIndex];
block.size = std::max(m_blockSize, size);
if(!isDedicated)
{
// only adjust size if not dedicated.
// warning this lowers from 64 bit to 32 bit size, which should be fine given
// such big allocations will trigger the dedicated path
block.size = block.range.alignedSize((uint32_t)block.size);
}
VkResult result = allocBlock(block, blockIndex, block.size);
NVVK_CHECK(result);
if(result != VK_SUCCESS)
{
freeBlock(block);
return Handle();
}
block.isDedicated = isDedicated;
if(!isDedicated)
{
// Dedicated blocks don't allow for subranges, so don't initialize the range allocator
block.range.init((uint32_t)block.size);
block.range.subAllocate((uint32_t)size, align, usedOffset, usedAligned, usedSize);
m_regularBlocks++;
}
}
Handle sub;
if(!sub.setup(blockIndex, isDedicated ? 0 : usedOffset, isDedicated ? size : uint64_t(usedSize), isDedicated))
{
return Handle();
}
// append used space for stats
m_usedSize += sub.getSize();
return sub;
}
void BufferSubAllocator::subFree(Handle sub)
{
if(!sub)
return;
Block& block = getBlock(sub.blockIndex);
bool isDedicated = sub.isDedicated();
if(!isDedicated)
{
block.range.subFree(uint32_t(sub.getOffset()), uint32_t(sub.getSize()));
}
m_usedSize -= sub.getSize();
if(isDedicated || (block.range.isEmpty() && (!m_keepLastBlock || m_regularBlocks > 1)))
{
if(!isDedicated)
{
m_regularBlocks--;
}
freeBlock(block);
}
}
float BufferSubAllocator::getUtilization(VkDeviceSize& allocatedSize, VkDeviceSize& usedSize) const
{
allocatedSize = m_allocatedSize;
usedSize = m_usedSize;
return float(double(usedSize) / double(allocatedSize));
}
bool BufferSubAllocator::fitsInAllocated(VkDeviceSize size, uint32_t alignment) const
{
if(Handle::needsDedicated(size, alignment))
{
return false;
}
for(const auto& block : m_blocks)
{
if(block.buffer && !block.isDedicated)
{
if(block.range.isAvailable((uint32_t)size, (uint32_t)alignment))
{
return true;
}
}
}
return false;
}
void BufferSubAllocator::free(bool onlyEmpty)
{
for(uint32_t i = 0; i < (uint32_t)m_blocks.size(); i++)
{
Block& block = m_blocks[i];
if(block.buffer && (!onlyEmpty || (!block.isDedicated && block.range.isEmpty())))
{
freeBlock(block);
}
}
if(!onlyEmpty)
{
m_blocks.clear();
m_freeBlockIndex = INVALID_ID_INDEX;
}
}
void BufferSubAllocator::freeBlock(Block& block)
{
m_allocatedSize -= block.size;
vkDestroyBuffer(m_device, block.buffer, nullptr);
if(block.mapping)
{
m_memAllocator->unmap(block.memory);
}
m_memAllocator->freeMemory(block.memory);
if(!block.isDedicated)
{
block.range.deinit();
}
block.memory = NullMemHandle;
block.buffer = VK_NULL_HANDLE;
block.mapping = nullptr;
block.isDedicated = false;
// update the block.index with the current head of the free list
// pop its old value
m_freeBlockIndex = setIndexValue(block.index, m_freeBlockIndex);
}
VkResult BufferSubAllocator::allocBlock(Block& block, uint32_t index, VkDeviceSize size)
{
std::string debugName = m_debugName + ":block:" + std::to_string(index);
VkResult result;
VkBufferCreateInfo createInfo = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
createInfo.size = size;
createInfo.usage = m_bufferUsageFlags;
createInfo.sharingMode = m_sharingQueueFamilyIndices.size() > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE;
createInfo.pQueueFamilyIndices = m_sharingQueueFamilyIndices.data();
createInfo.queueFamilyIndexCount = static_cast<uint32_t>(m_sharingQueueFamilyIndices.size());
VkBuffer buffer = VK_NULL_HANDLE;
result = vkCreateBuffer(m_device, &createInfo, nullptr, &buffer);
if(result != VK_SUCCESS)
{
NVVK_CHECK(result);
return result;
}
nvvk::DebugUtil(m_device).setObjectName(buffer, debugName);
VkMemoryRequirements2 memReqs = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
VkBufferMemoryRequirementsInfo2 bufferReqs = {VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2};
bufferReqs.buffer = buffer;
vkGetBufferMemoryRequirements2(m_device, &bufferReqs, &memReqs);
if(m_memoryTypeIndex == ~0)
{
VkPhysicalDeviceMemoryProperties memoryProperties;
vkGetPhysicalDeviceMemoryProperties(m_memAllocator->getPhysicalDevice(), &memoryProperties);
VkMemoryPropertyFlags memProps = m_memoryPropFlags;
// Find an available memory type that satisfies the requested properties.
for(uint32_t memoryTypeIndex = 0; memoryTypeIndex < memoryProperties.memoryTypeCount; ++memoryTypeIndex)
{
if((memReqs.memoryRequirements.memoryTypeBits & (1 << memoryTypeIndex))
&& (memoryProperties.memoryTypes[memoryTypeIndex].propertyFlags & memProps) == memProps)
{
m_memoryTypeIndex = memoryTypeIndex;
break;
}
}
}
if(m_memoryTypeIndex == ~0)
{
assert(0 && "could not find memoryTypeIndex\n");
vkDestroyBuffer(m_device, buffer, nullptr);
return VK_ERROR_INCOMPATIBLE_DRIVER;
}
MemAllocateInfo memAllocateInfo(memReqs.memoryRequirements, m_memoryPropFlags, false);
memAllocateInfo.setDebugName(debugName);
MemHandle memory = m_memAllocator->allocMemory(memAllocateInfo, &result);
if(result != VK_SUCCESS)
{
assert(0 && "could not allocate buffer\n");
vkDestroyBuffer(m_device, buffer, nullptr);
return result;
}
MemAllocator::MemInfo memInfo = m_memAllocator->getMemoryInfo(memory);
VkBindBufferMemoryInfo bindInfos = {VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO};
bindInfos.buffer = buffer;
bindInfos.memory = memInfo.memory;
bindInfos.memoryOffset = memInfo.offset;
result = vkBindBufferMemory2(m_device, 1, &bindInfos);
if(result == VK_SUCCESS)
{
if(m_mapped)
{
block.mapping = m_memAllocator->mapT<uint8_t>(memory);
}
else
{
block.mapping = nullptr;
}
if(!m_mapped || block.mapping)
{
if(m_bufferUsageFlags & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT)
{
VkBufferDeviceAddressInfo info = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO};
info.buffer = buffer;
block.address = vkGetBufferDeviceAddress(m_device, &info);
}
block.memory = memory;
block.buffer = buffer;
m_allocatedSize += block.size;
return result;
}
}
// error case
NVVK_CHECK(result);
vkDestroyBuffer(m_device, buffer, nullptr);
m_memAllocator->freeMemory(memory);
return result;
}
} // namespace nvvk

View file

@ -0,0 +1,281 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <platform.h>
#include <vector>
#include <string>
#include <vulkan/vulkan_core.h>
#include <nvh/trangeallocator.hpp>
#include "memallocator_vk.hpp"
namespace nvvk {
//////////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::BufferSubAllocator
nvvk::BufferSubAllocator provides buffer sub allocation using larger buffer blocks.
The blocks are one VkBuffer each and are allocated via the
provided [nvvk::MemAllocator](#class-nvvkmemallocator).
The requested buffer space is sub-allocated and recycled in blocks internally.
This way we avoid creating lots of small VkBuffers and can avoid calling the Vulkan
API at all, when there are blocks with sufficient empty space.
While Vulkan is more efficient than previous APIs, creating lots
of objects for it, is still not good for overall performance. It will result
into more cache misses and use more system memory over all.
Be aware that each sub-allocation is always BASE_ALIGNMENT aligned.
A custom alignment during allocation can be requested, it will ensure
that the returned sub-allocation range of offset & size can account for
the original requested size fitting within and respecting the requested
This, however, means the regular offset and may not match the requested
alignment, and the regular size can be bigger to account for the shift
caused by manual alignment.
It is therefore necessary to pass the alignment that was used at allocation time
to the query functions as well.
```cpp
// alignment <= BASE_ALIGNMENT
handle = subAllocator.subAllocate(size);
binding = subAllocator.getSubBinding(handle);
// alignment > BASE_ALIGNMENT
handle = subAllocator.subAllocate(size, alignment);
binding = subAllocator.getSubBinding(handle, alignment);
```
@DOC_END */
class BufferSubAllocator
{
private:
static const uint32_t INVALID_ID_INDEX = ~0;
static const uint32_t BASE_ALIGNMENT = 16; // could compromise between max block size and typical requests
public:
class Handle
{
friend class BufferSubAllocator;
private:
static const uint32_t BLOCKBITS = 26;
// if we cannot pack size and offset each into 26 bits (after adjusting for base alignment)
// we need a dedicated block just for this
static bool needsDedicated(uint64_t size, uint64_t alignment)
{
return ((size + (alignment > 16 ? alignment : 0)) >= (uint64_t((1 << BLOCKBITS)) * uint64_t(BASE_ALIGNMENT)));
}
union
{
struct
{
uint64_t blockIndex : 11; // 2047 blocks, typical blockSize 64 MB or more, should be enough
uint64_t offset : BLOCKBITS;
uint64_t size : BLOCKBITS;
uint64_t dedicated : 1; // 0 dedicated or not
};
uint64_t raw;
};
uint64_t getOffset() const { return dedicated == 1 ? 0 : offset * uint64_t(BASE_ALIGNMENT); }
uint64_t getSize() const { return dedicated == 1 ? offset + (size << BLOCKBITS) : size * uint64_t(BASE_ALIGNMENT); }
uint32_t getBlockIndex() const { return uint32_t(blockIndex); }
bool isDedicated() const { return dedicated == 1; }
bool setup(uint32_t blockIndex_, uint64_t offset_, uint64_t size_, bool dedicated_)
{
const uint64_t blockBitsMask = ((1ULL << BLOCKBITS) - 1);
assert((blockIndex_ & ~((1ULL << 11) - 1)) == 0);
blockIndex = blockIndex_ & ((1ULL << 11) - 1);
if(dedicated_)
{
dedicated = 1;
offset = size_ & blockBitsMask;
size = (size_ >> BLOCKBITS) & blockBitsMask;
}
else
{
dedicated = 0;
offset = (offset_ / uint64_t(BASE_ALIGNMENT)) & blockBitsMask;
size = (size_ / uint64_t(BASE_ALIGNMENT)) & blockBitsMask;
}
return (getBlockIndex() == blockIndex_ && getOffset() == offset_ && getSize() == size_);
}
public:
Handle() { raw = ~uint64_t(0); }
bool isValid() const { return raw != ~uint64_t(0); }
bool isEqual(const Handle& other) const
{
return blockIndex == other.blockIndex && offset == other.offset && dedicated == other.dedicated && size == other.size;
}
explicit operator bool() const { return isValid(); }
friend bool operator==(const Handle& lhs, const Handle& rhs) { return rhs.isEqual(lhs); }
};
//////////////////////////////////////////////////////////////////////////
BufferSubAllocator(BufferSubAllocator const&) = delete;
BufferSubAllocator& operator=(BufferSubAllocator const&) = delete;
BufferSubAllocator() { m_debugName = "nvvk::BufferSubAllocator:" + std::to_string((uint64_t)this); }
BufferSubAllocator(MemAllocator* memAllocator,
VkDeviceSize blockSize,
VkBufferUsageFlags bufferUsageFlags,
VkMemoryPropertyFlags memPropFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
bool mapped = false,
const std::vector<uint32_t>& sharingQueueFamilyIndices = std::vector<uint32_t>())
{
init(memAllocator, blockSize, bufferUsageFlags, memPropFlags, mapped, sharingQueueFamilyIndices);
}
~BufferSubAllocator() { deinit(); }
void init(MemAllocator* memallocator,
VkDeviceSize blockSize,
VkBufferUsageFlags bufferUsageFlags,
VkMemoryPropertyFlags memPropFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
bool mapped = false,
const std::vector<uint32_t>& sharingQueues = std::vector<uint32_t>());
void deinit();
void setDebugName(const std::string& name) { m_debugName = name; }
void setKeepLastBlockOnFree(bool state) { m_keepLastBlock = state; }
// alignment will be BASE_ALIGNMENT byte at least
// alignment must be power of 2
Handle subAllocate(VkDeviceSize size, uint32_t alignment = BASE_ALIGNMENT);
void subFree(Handle sub);
struct Binding
{
VkBuffer buffer;
uint64_t offset;
uint64_t size;
VkDeviceAddress address;
};
// sub allocation was aligned to BASE_ALIGNMENT
Binding getSubBinding(Handle handle)
{
Binding binding;
binding.offset = handle.getOffset();
binding.size = handle.getSize();
binding.buffer = m_blocks[handle.getBlockIndex()].buffer;
binding.address = m_blocks[handle.getBlockIndex()].address + binding.offset;
return binding;
}
// sub allocation alignment was custom
Binding getSubBinding(Handle handle, uint32_t alignment)
{
Binding binding;
binding.offset = (handle.getOffset() + (uint64_t(alignment) - 1)) & ~(uint64_t(alignment) - 1);
binding.size = handle.getSize() - (binding.offset - handle.getOffset());
binding.buffer = m_blocks[handle.getBlockIndex()].buffer;
binding.address = m_blocks[handle.getBlockIndex()].address + binding.offset;
return binding;
}
void* getSubMapping(Handle handle, uint32_t alignment = BASE_ALIGNMENT) const
{
return m_blocks[handle.getBlockIndex()].mapping
+ ((handle.getOffset() + (uint64_t(alignment) - 1)) & ~(uint64_t(alignment) - 1));
}
uint32_t getSubBlockIndex(Handle handle) const { return handle.getBlockIndex(); }
VkBuffer getBlockBuffer(uint32_t blockIndex) const { return m_blocks[blockIndex].buffer; }
float getUtilization(VkDeviceSize& allocatedSize, VkDeviceSize& usedSize) const;
bool fitsInAllocated(VkDeviceSize size, uint32_t alignment = BASE_ALIGNMENT) const;
void free(bool onlyEmpty);
protected:
// - Block stores VkBuffers that we sub-allocate the staging space from
// To recycle Block structures within the arrays
// we use a linked list of array indices. The "index" element
// in the struct refers to the next free list item, or itself
// when in use.
// A block is "dedicated" if it only holds a single allocation.
// This can happen if we cannot encode the offset/size into the
// bits that the Handle provides for this, or when the size
// of the allocation is bigger than our preferred block size.
struct Block
{
uint32_t index = INVALID_ID_INDEX;
VkDeviceSize size = 0;
VkBuffer buffer = VK_NULL_HANDLE;
nvh::TRangeAllocator<BASE_ALIGNMENT> range;
MemHandle memory = NullMemHandle;
uint8_t* mapping = nullptr;
VkDeviceAddress address = 0;
bool isDedicated = false;
};
MemAllocator* m_memAllocator = nullptr;
VkDevice m_device = VK_NULL_HANDLE;
uint32_t m_memoryTypeIndex;
VkDeviceSize m_blockSize;
VkBufferUsageFlags m_bufferUsageFlags;
VkMemoryPropertyFlags m_memoryPropFlags;
std::vector<uint32_t> m_sharingQueueFamilyIndices;
bool m_mapped;
bool m_keepLastBlock = false;
std::vector<Block> m_blocks;
uint32_t m_regularBlocks = 0;
uint32_t m_freeBlockIndex; // linked list to next free block
VkDeviceSize m_allocatedSize;
VkDeviceSize m_usedSize;
std::string m_debugName;
uint32_t setIndexValue(uint32_t& index, uint32_t newValue)
{
uint32_t oldValue = index;
index = newValue;
return oldValue;
}
Block& getBlock(uint32_t index)
{
Block& block = m_blocks[index];
assert(block.index == index);
return block;
}
void freeBlock(Block& block);
VkResult allocBlock(Block& block, uint32_t id, VkDeviceSize size);
};
} // namespace nvvk

View file

@ -0,0 +1,456 @@
/*
* Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include <algorithm>
#include <platform.h>
#include "commands_vk.hpp"
#include "error_vk.hpp"
namespace nvvk {
uint32_t makeAccessMaskPipelineStageFlags(uint32_t accessMask, VkPipelineStageFlags supportedShaderBits)
{
static const uint32_t accessPipes[] = {
VK_ACCESS_INDIRECT_COMMAND_READ_BIT,
VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
VK_ACCESS_INDEX_READ_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
VK_ACCESS_UNIFORM_READ_BIT,
supportedShaderBits,
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT,
supportedShaderBits,
VK_ACCESS_SHADER_WRITE_BIT,
supportedShaderBits,
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_ACCESS_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT,
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
VK_ACCESS_TRANSFER_READ_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_ACCESS_HOST_READ_BIT,
VK_PIPELINE_STAGE_HOST_BIT,
VK_ACCESS_HOST_WRITE_BIT,
VK_PIPELINE_STAGE_HOST_BIT,
VK_ACCESS_MEMORY_READ_BIT,
0,
VK_ACCESS_MEMORY_WRITE_BIT,
0,
#if VK_NV_device_generated_commands
VK_ACCESS_COMMAND_PREPROCESS_READ_BIT_NV,
VK_PIPELINE_STAGE_COMMAND_PREPROCESS_BIT_NV,
VK_ACCESS_COMMAND_PREPROCESS_WRITE_BIT_NV,
VK_PIPELINE_STAGE_COMMAND_PREPROCESS_BIT_NV,
#endif
#if VK_NV_ray_tracing
VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NV,
VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_NV | supportedShaderBits | VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV,
VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_NV,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV,
#endif
};
if(!accessMask)
{
return VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
}
uint32_t pipes = 0;
for(uint32_t i = 0; i < NV_ARRAY_SIZE(accessPipes); i += 2)
{
if(accessPipes[i] & accessMask)
{
pipes |= accessPipes[i + 1];
}
}
assert(pipes != 0);
return pipes;
}
void cmdBegin(VkCommandBuffer cmd, VkCommandBufferUsageFlags flags)
{
VkCommandBufferBeginInfo beginInfo{VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO};
beginInfo.flags = flags;
VkResult res = vkBeginCommandBuffer(cmd, &beginInfo);
assert(res == VK_SUCCESS);
}
//////////////////////////////////////////////////////////////////////////
void CommandPool::init(VkDevice device, uint32_t familyIndex, VkCommandPoolCreateFlags flags, VkQueue defaultQueue)
{
assert(!m_device);
m_device = device;
VkCommandPoolCreateInfo info = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO};
info.flags = flags;
info.queueFamilyIndex = familyIndex;
vkCreateCommandPool(m_device, &info, nullptr, &m_commandPool);
if(defaultQueue)
{
m_queue = defaultQueue;
}
else
{
vkGetDeviceQueue(device, familyIndex, 0, &m_queue);
}
}
void CommandPool::deinit()
{
if(m_commandPool)
{
vkDestroyCommandPool(m_device, m_commandPool, nullptr);
m_commandPool = VK_NULL_HANDLE;
}
m_device = VK_NULL_HANDLE;
}
VkCommandBuffer CommandPool::createCommandBuffer(VkCommandBufferLevel level /*= VK_COMMAND_BUFFER_LEVEL_PRIMARY*/,
bool begin,
VkCommandBufferUsageFlags flags /*= VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT*/,
const VkCommandBufferInheritanceInfo* pInheritanceInfo /*= nullptr*/)
{
VkCommandBufferAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO};
allocInfo.level = level;
allocInfo.commandPool = m_commandPool;
allocInfo.commandBufferCount = 1;
VkCommandBuffer cmd;
vkAllocateCommandBuffers(m_device, &allocInfo, &cmd);
if(begin)
{
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = flags;
beginInfo.pInheritanceInfo = pInheritanceInfo;
vkBeginCommandBuffer(cmd, &beginInfo);
}
return cmd;
}
void CommandPool::destroy(size_t count, const VkCommandBuffer* cmds)
{
vkFreeCommandBuffers(m_device, m_commandPool, (uint32_t)count, cmds);
}
void CommandPool::submitAndWait(size_t count, const VkCommandBuffer* cmds, VkQueue queue)
{
submit(count, cmds, queue);
VkResult result = vkQueueWaitIdle(queue);
if(nvvk::checkResult(result, __FILE__, __LINE__))
{
exit(-1);
}
vkFreeCommandBuffers(m_device, m_commandPool, (uint32_t)count, cmds);
}
void CommandPool::submit(size_t count, const VkCommandBuffer* cmds, VkQueue queue, VkFence fence)
{
for(size_t i = 0; i < count; i++)
{
vkEndCommandBuffer(cmds[i]);
}
VkSubmitInfo submit = {VK_STRUCTURE_TYPE_SUBMIT_INFO};
submit.pCommandBuffers = cmds;
submit.commandBufferCount = (uint32_t)count;
vkQueueSubmit(queue, 1, &submit, fence);
}
void CommandPool::submit(size_t count, const VkCommandBuffer* cmds, VkFence fence)
{
submit(count, cmds, m_queue, fence);
}
void CommandPool::submit(const std::vector<VkCommandBuffer>& cmds, VkFence fence)
{
submit(cmds.size(), cmds.data(), m_queue, fence);
}
//////////////////////////////////////////////////////////////////////////
void RingFences::init(VkDevice device, uint32_t ringSize)
{
assert(!m_device);
m_device = device;
m_cycleIndex = 0;
m_cycleSize = ringSize;
m_fences.resize(ringSize);
for(uint32_t i = 0; i < m_cycleSize; i++)
{
VkFenceCreateInfo info = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO};
info.flags = 0;
NVVK_CHECK(vkCreateFence(device, &info, nullptr, &m_fences[i].fence));
m_fences[i].active = false;
}
}
void RingFences::deinit()
{
if(!m_device)
return;
for(uint32_t i = 0; i < m_cycleSize; i++)
{
vkDestroyFence(m_device, m_fences[i].fence, nullptr);
}
m_fences.clear();
m_device = VK_NULL_HANDLE;
}
VkFence RingFences::getFence()
{
m_fences[m_cycleIndex].active = true;
return m_fences[m_cycleIndex].fence;
}
void RingFences::setCycleAndWait(uint32_t cycle)
{
// set cycle
m_cycleIndex = cycle % m_cycleSize;
Entry& entry = m_fences[m_cycleIndex];
if(entry.active)
{
// ensure the cycle we will use now has completed
VkResult result = vkWaitForFences(m_device, 1, &entry.fence, VK_TRUE, ~0ULL);
if(nvvk::checkResult(result, __FILE__, __LINE__))
{
exit(-1);
}
entry.active = false;
}
vkResetFences(m_device, 1, &entry.fence);
}
//////////////////////////////////////////////////////////////////////////
void RingCommandPool::init(VkDevice device, uint32_t queueFamilyIndex, VkCommandPoolCreateFlags flags, uint32_t ringSize)
{
assert(!m_device);
m_device = device;
m_cycleIndex = 0;
m_cycleSize = ringSize;
m_flags = flags;
m_familyIndex = queueFamilyIndex;
m_pools.resize(ringSize);
for(uint32_t i = 0; i < m_cycleSize; i++)
{
VkCommandPoolCreateInfo info = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO};
info.queueFamilyIndex = queueFamilyIndex;
info.flags = flags;
NVVK_CHECK(vkCreateCommandPool(m_device, &info, nullptr, &m_pools[i].pool));
}
}
void RingCommandPool::deinit()
{
if(!m_device)
return;
for(uint32_t i = 0; i < m_cycleSize; i++)
{
Entry& entry = m_pools[i];
if(!entry.cmds.empty())
{
vkFreeCommandBuffers(m_device, entry.pool, uint32_t(entry.cmds.size()), entry.cmds.data());
vkResetCommandPool(m_device, entry.pool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
entry.cmds.clear();
}
vkDestroyCommandPool(m_device, entry.pool, nullptr);
}
m_device = VK_NULL_HANDLE;
}
void RingCommandPool::setCycle(uint32_t cycle)
{
m_cycleIndex = cycle % m_cycleSize;
Entry& entry = m_pools[m_cycleIndex];
if(!entry.cmds.empty())
{
vkFreeCommandBuffers(m_device, entry.pool, uint32_t(entry.cmds.size()), entry.cmds.data());
vkResetCommandPool(m_device, entry.pool, 0);
entry.cmds.clear();
}
}
VkCommandBuffer RingCommandPool::createCommandBuffer(VkCommandBufferLevel level /*= VK_COMMAND_BUFFER_LEVEL_PRIMARY*/,
bool begin,
VkCommandBufferUsageFlags flags /*= VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT*/,
const VkCommandBufferInheritanceInfo* pInheritanceInfo /*= nullptr*/)
{
Entry& cycle = m_pools[m_cycleIndex];
VkCommandBufferAllocateInfo info = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO};
info.commandBufferCount = 1;
info.commandPool = cycle.pool;
info.level = level;
VkCommandBuffer cmd;
vkAllocateCommandBuffers(m_device, &info, &cmd);
cycle.cmds.push_back(cmd);
if(begin)
{
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = flags;
beginInfo.pInheritanceInfo = pInheritanceInfo;
vkBeginCommandBuffer(cmd, &beginInfo);
}
return cmd;
}
const VkCommandBuffer* RingCommandPool::createCommandBuffers(VkCommandBufferLevel level, uint32_t count)
{
Entry& cycle = m_pools[m_cycleIndex];
VkCommandBufferAllocateInfo info = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO};
info.commandBufferCount = count;
info.commandPool = cycle.pool;
info.level = level;
size_t begin = cycle.cmds.size();
cycle.cmds.resize(begin + count);
VkCommandBuffer* cmds = cycle.cmds.data() + begin;
vkAllocateCommandBuffers(m_device, &info, cmds);
return cmds;
}
//////////////////////////////////////////////////////////////////////////
void BatchSubmission::init(VkQueue queue)
{
assert(m_waits.empty() && m_waitFlags.empty() && m_signals.empty() && m_commands.empty());
m_queue = queue;
}
void BatchSubmission::enqueue(uint32_t num, const VkCommandBuffer* cmdbuffers)
{
for(uint32_t i = 0; i < num; i++)
{
m_commands.push_back(cmdbuffers[i]);
}
}
void BatchSubmission::enqueue(VkCommandBuffer cmdbuffer)
{
m_commands.push_back(cmdbuffer);
}
void BatchSubmission::enqueueSignal(VkSemaphore sem)
{
m_signals.push_back(sem);
}
void BatchSubmission::enqueueWait(VkSemaphore sem, VkPipelineStageFlags flag)
{
m_waits.push_back(sem);
m_waitFlags.push_back(flag);
}
VkResult BatchSubmission::execute(VkFence fence /*= nullptr*/, uint32_t deviceMask)
{
VkResult res = VK_SUCCESS;
if(m_queue && (fence || !m_commands.empty() || !m_signals.empty() || !m_waits.empty()))
{
VkSubmitInfo submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO};
submitInfo.commandBufferCount = uint32_t(m_commands.size());
submitInfo.signalSemaphoreCount = uint32_t(m_signals.size());
submitInfo.waitSemaphoreCount = uint32_t(m_waits.size());
submitInfo.pCommandBuffers = m_commands.data();
submitInfo.pSignalSemaphores = m_signals.data();
submitInfo.pWaitSemaphores = m_waits.data();
submitInfo.pWaitDstStageMask = m_waitFlags.data();
std::vector<uint32_t> deviceMasks;
std::vector<uint32_t> deviceIndices;
VkDeviceGroupSubmitInfo deviceGroupInfo = {VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO};
if(deviceMask != 0)
{
// Allocate an array big enough to hold the mask for all three parameters
deviceMasks.resize(m_commands.size(), deviceMask);
deviceIndices.resize(std::max(m_signals.size(), m_waits.size()), 0); // Only perform semaphore actions on device zero
submitInfo.pNext = &deviceGroupInfo;
deviceGroupInfo.commandBufferCount = submitInfo.commandBufferCount;
deviceGroupInfo.pCommandBufferDeviceMasks = deviceMasks.data();
deviceGroupInfo.signalSemaphoreCount = submitInfo.signalSemaphoreCount;
deviceGroupInfo.pSignalSemaphoreDeviceIndices = deviceIndices.data();
deviceGroupInfo.waitSemaphoreCount = submitInfo.waitSemaphoreCount;
deviceGroupInfo.pWaitSemaphoreDeviceIndices = deviceIndices.data();
}
res = vkQueueSubmit(m_queue, 1, &submitInfo, fence);
m_commands.clear();
m_waits.clear();
m_waitFlags.clear();
m_signals.clear();
}
return res;
}
void BatchSubmission::waitIdle() const
{
VkResult result = vkQueueWaitIdle(m_queue);
if(nvvk::checkResult(result, __FILE__, __LINE__))
{
exit(-1);
}
}
} // namespace nvvk

View file

@ -0,0 +1,568 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <platform.h>
#include <vector>
#include <vulkan/vulkan_core.h>
namespace nvvk {
//--------------------------------------------------------------------------------------------------
/** @DOC_START
# functions in nvvk
- makeAccessMaskPipelineStageFlags : depending on accessMask returns appropriate VkPipelineStageFlagBits
- cmdBegin : wraps vkBeginCommandBuffer with VkCommandBufferUsageFlags and implicitly handles VkCommandBufferBeginInfo setup
- makeSubmitInfo : VkSubmitInfo struct setup using provided arrays of signals and commandbuffers, leaving rest zeroed
@DOC_END */
// useful for barriers, derive all compatible stage flags from an access mask
uint32_t makeAccessMaskPipelineStageFlags(uint32_t accessMask,
VkPipelineStageFlags supportedShaderBits = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT
| VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT
| VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
| VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
void cmdBegin(VkCommandBuffer cmd, VkCommandBufferUsageFlags flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
inline VkSubmitInfo makeSubmitInfo(uint32_t numCmds, VkCommandBuffer* cmds, uint32_t numSignals, VkSemaphore* signals)
{
VkSubmitInfo submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO};
submitInfo.pCommandBuffers = cmds;
submitInfo.commandBufferCount = numCmds;
submitInfo.pSignalSemaphores = signals;
submitInfo.signalSemaphoreCount = numSignals;
return submitInfo;
}
//--------------------------------------------------------------------------------------------------
/** @DOC_START
# class nvvk::CommandPool
nvvk::CommandPool stores a single VkCommandPool and provides utility functions
to create VkCommandBuffers from it.
Example:
```cpp
{
nvvk::CommandPool cmdPool;
cmdPool.init(...);
// some setup/one shot work
{
vkCommandBuffer cmd = scopePool.createAndBegin();
... record commands ...
// trigger execution with a blocking operation
// not recommended for performance
// but useful for sample setup
scopePool.submitAndWait(cmd, queue);
}
// other cmds you may batch, or recycle
std::vector<VkCommandBuffer> cmds;
{
vkCommandBuffer cmd = scopePool.createAndBegin();
... record commands ...
cmds.push_back(cmd);
}
{
vkCommandBuffer cmd = scopePool.createAndBegin();
... record commands ...
cmds.push_back(cmd);
}
// do some form of batched submission of cmds
// after completion destroy cmd
cmdPool.destroy(cmds.size(), cmds.data());
cmdPool.deinit();
}
```
@DOC_END */
class CommandPool
{
public:
CommandPool(CommandPool const&) = delete;
CommandPool& operator=(CommandPool const&) = delete;
CommandPool() {}
~CommandPool() { deinit(); }
// if defaultQueue is null, uses first queue from familyIndex as default
CommandPool(VkDevice device,
uint32_t familyIndex,
VkCommandPoolCreateFlags flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
VkQueue defaultQueue = VK_NULL_HANDLE)
{
init(device, familyIndex, flags, defaultQueue);
}
// if defaultQueue is null, uses first queue from familyIndex as default
void init(VkDevice device,
uint32_t familyIndex,
VkCommandPoolCreateFlags flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
VkQueue defaultQueue = VK_NULL_HANDLE);
void deinit();
VkCommandBuffer createCommandBuffer(VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
bool begin = true,
VkCommandBufferUsageFlags flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
const VkCommandBufferInheritanceInfo* pInheritanceInfo = nullptr);
// free cmdbuffers from this pool
void destroy(size_t count, const VkCommandBuffer* cmds);
void destroy(const std::vector<VkCommandBuffer>& cmds) { destroy(cmds.size(), cmds.data()); }
void destroy(VkCommandBuffer cmd) { destroy(1, &cmd); }
VkCommandPool getCommandPool() const { return m_commandPool; }
// Ends command buffer recording and submits to queue, if 'fence' is not
// VK_NULL_HANDLE, it will be used to signal the completion of the command
// buffer execution. Does NOT destroy the command buffers! This is not
// optimal use for queue submission asity may lead to a large number of
// vkQueueSubmit() calls per frame. . Consider batching submissions up via
// FencedCommandPools and BatchedSubmission classes down below.
void submit(size_t count, const VkCommandBuffer* cmds, VkQueue queue, VkFence fence = VK_NULL_HANDLE);
void submit(size_t count, const VkCommandBuffer* cmds, VkFence fence = VK_NULL_HANDLE);
void submit(const std::vector<VkCommandBuffer>& cmds, VkFence fence = VK_NULL_HANDLE);
// Non-optimal usage pattern using wait for idles, avoid in production use.
// Consider batching submissions up via FencedCommandPools and
// BatchedSubmission classes down below. Ends command buffer recording and
// submits to queue, waits for queue idle and destroys cmds.
void submitAndWait(size_t count, const VkCommandBuffer* cmds, VkQueue queue);
void submitAndWait(const std::vector<VkCommandBuffer>& cmds, VkQueue queue)
{
submitAndWait(cmds.size(), cmds.data(), queue);
}
void submitAndWait(VkCommandBuffer cmd, VkQueue queue) { submitAndWait(1, &cmd, queue); }
// ends and submits to default queue, waits for queue idle and destroys cmds
void submitAndWait(size_t count, const VkCommandBuffer* cmds) { submitAndWait(count, cmds, m_queue); }
void submitAndWait(const std::vector<VkCommandBuffer>& cmds) { submitAndWait(cmds.size(), cmds.data(), m_queue); }
void submitAndWait(VkCommandBuffer cmd) { submitAndWait(1, &cmd, m_queue); }
protected:
VkDevice m_device = VK_NULL_HANDLE;
VkQueue m_queue = VK_NULL_HANDLE;
VkCommandPool m_commandPool = VK_NULL_HANDLE;
};
//--------------------------------------------------------------------------------------------------
/** @DOC_START
# class nvvk::ScopeCommandBuffer
nvvk::ScopeCommandBuffer provides a single VkCommandBuffer that lives within the scope
and is directly submitted and deleted when the scope is left.
Not recommended for efficiency, since it results in a blocking
operation, but aids sample writing.
Example:
```cpp
{
ScopeCommandBuffer cmd(device, queueFamilyIndex, queue);
... do stuff
vkCmdCopyBuffer(cmd, ...);
}
```
@DOC_END */
class ScopeCommandBuffer : public CommandPool
{
public:
// if queue is null, uses first queue from familyIndex
ScopeCommandBuffer(VkDevice device, uint32_t familyIndex, VkQueue queue = VK_NULL_HANDLE)
{
CommandPool::init(device, familyIndex, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queue);
m_cmd = createCommandBuffer();
}
~ScopeCommandBuffer() { submitAndWait(m_cmd); }
operator VkCommandBuffer() const { return m_cmd; };
private:
VkCommandBuffer m_cmd;
};
//--------------------------------------------------------------------------------------------------
/** @DOC_START
# class **nvvk::Ring...**
In real-time processing, the CPU typically generates commands
in advance to the GPU and send them in batches for execution.
To avoid having the CPU to wait for the GPU'S completion and let it "race ahead"
we make use of double, or tripple-buffering techniques, where we cycle through
a pool of resources every frame. We know that those resources are currently
not in use by the GPU and can therefore manipulate them directly.
Especially in Vulkan it is the developer's responsibility to avoid such
access of resources that are in-flight.
The "Ring" classes cycle through a pool of resources. The default value
is set to allow two frames in-flight, assuming one fence is used per-frame.
@DOC_END */
// typically the driver will not let the CPU race ahead more than two frames of GPU
// during swapchain operations.
static const uint32_t DEFAULT_RING_SIZE = 3;
//--------------------------------------------------------------------------------------------------
/** @DOC_START
# class nvvk::RingFences
nvvk::RingFences recycles a fixed number of fences, provides information in which cycle
we are currently at, and prevents accidental access to a cycle in-flight.
A typical frame would start by "setCycleAndWait", which waits for the
requested cycle to be available.
@DOC_END */
class RingFences
{
public:
RingFences(RingFences const&) = delete;
RingFences& operator=(RingFences const&) = delete;
RingFences() {}
RingFences(VkDevice device, uint32_t ringSize = DEFAULT_RING_SIZE) { init(device, ringSize); }
~RingFences() { deinit(); }
void init(VkDevice device, uint32_t ringSize = DEFAULT_RING_SIZE);
void deinit();
void reset()
{
VkDevice device = m_device;
uint32_t ringSize = m_cycleSize;
deinit();
init(device, ringSize);
}
// ensures the availability of the passed cycle
void setCycleAndWait(uint32_t cycle);
// get current cycle fence
VkFence getFence();
// query current cycle index
uint32_t getCycleIndex() const { return m_cycleIndex; }
uint32_t getCycleSize() const { return m_cycleSize; }
private:
struct Entry
{
VkFence fence;
bool active;
};
uint32_t m_cycleIndex{0};
uint32_t m_cycleSize{0};
std::vector<Entry> m_fences;
VkDevice m_device = VK_NULL_HANDLE;
};
//--------------------------------------------------------------------------------------------------
/** @DOC_START
## class nvvk::RingCommandPool
nvvk::RingCommandPool manages a fixed cycle set of VkCommandBufferPools and
one-shot command buffers allocated from them.
The usage of multiple command buffer pools also means we get nice allocation
behavior (linear allocation from frame start to frame end) without fragmentation.
If we were using a single command pool over multiple frames, it could fragment easily.
You must ensure cycle is available manually, typically by keeping in sync
with ring fences.
Example:
```cpp
{
frame++;
// wait until we can use the new cycle
// (very rare if we use the fence at then end once per-frame)
ringFences.setCycleAndWait( frame );
// update cycle state, allows recycling of old resources
ringPool.setCycle( frame );
VkCommandBuffer cmd = ringPool.createCommandBuffer(...);
... do stuff / submit etc...
VkFence fence = ringFences.getFence();
// use this fence in the submit
vkQueueSubmit(...fence..);
}
```
@DOC_END */
class RingCommandPool
{
public:
RingCommandPool(RingCommandPool const&) = delete;
RingCommandPool& operator=(RingCommandPool const&) = delete;
RingCommandPool(VkDevice device,
uint32_t queueFamilyIndex,
VkCommandPoolCreateFlags flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
uint32_t ringSize = DEFAULT_RING_SIZE)
{
init(device, queueFamilyIndex, flags, ringSize);
}
RingCommandPool() {}
~RingCommandPool() { deinit(); }
void init(VkDevice device,
uint32_t queueFamilyIndex,
VkCommandPoolCreateFlags flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
uint32_t ringSize = DEFAULT_RING_SIZE);
void deinit();
void reset()
{
VkDevice device = m_device;
VkCommandPoolCreateFlags flags = m_flags;
uint32_t queueFamilyIndex = m_familyIndex;
uint32_t ringSize = m_cycleSize;
deinit();
init(device, queueFamilyIndex, flags, ringSize);
}
// call when cycle has changed, prior creating command buffers
// resets old pools etc.
void setCycle(uint32_t cycle);
// ensure proper cycle or frame is set prior these
VkCommandBuffer createCommandBuffer(VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
bool begin = true,
VkCommandBufferUsageFlags flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
const VkCommandBufferInheritanceInfo* pInheritanceInfo = nullptr);
// pointer is only valid until next create
const VkCommandBuffer* createCommandBuffers(VkCommandBufferLevel level, uint32_t count);
protected:
struct Entry
{
VkCommandPool pool{};
std::vector<VkCommandBuffer> cmds;
};
uint32_t m_cycleIndex{0};
uint32_t m_cycleSize{0};
std::vector<Entry> m_pools;
VkDevice m_device = VK_NULL_HANDLE;
VkCommandPoolCreateFlags m_flags{0};
uint32_t m_familyIndex{0};
};
//--------------------------------------------------------------------------------------------------
/** @DOC_START
# class nvvk::BatchSubmission
nvvk::BatchSubmission batches the submission arguments of VkSubmitInfo for VkQueueSubmit.
vkQueueSubmit is a rather costly operation (depending on OS)
and should be avoided to be done too often (e.g. < 10 per frame). Therefore
this utility class allows adding commandbuffers, semaphores etc. and
submit them later in a batch.
When using manual locks, it can also be useful to feed commandbuffers
from different threads and then later kick it off.
Example
```cpp
// within upload logic
{
semTransfer = handleUpload(...);
// for example trigger async upload on transfer queue here
vkQueueSubmit(... semTransfer ...);
// tell next frame's batch submission
// that its commandbuffers should wait for transfer
// to be completed
graphicsSubmission.enqueWait(semTransfer)
}
// within present logic
{
// for example ensure the next frame waits until proper present semaphore was triggered
graphicsSubmission.enqueueWait(presentSemaphore);
}
// within drawing logic
{
// enqueue some graphics work for submission
graphicsSubmission.enqueue(getSceneCmdBuffer());
graphicsSubmission.enqueue(getUiCmdBuffer());
graphicsSubmission.execute(frameFence);
}
```
@DOC_END */
class BatchSubmission
{
private:
VkQueue m_queue = nullptr;
std::vector<VkSemaphore> m_waits;
std::vector<VkPipelineStageFlags> m_waitFlags;
std::vector<VkSemaphore> m_signals;
std::vector<VkCommandBuffer> m_commands;
public:
BatchSubmission(BatchSubmission const&) = delete;
BatchSubmission& operator=(BatchSubmission const&) = delete;
BatchSubmission() {}
BatchSubmission(VkQueue queue) { init(queue); }
uint32_t getCommandBufferCount() const { return uint32_t(m_commands.size()); }
VkQueue getQueue() const { return m_queue; }
// can change queue if nothing is pending
void init(VkQueue queue);
void enqueue(uint32_t num, const VkCommandBuffer* cmdbuffers);
void enqueue(VkCommandBuffer cmdbuffer);
void enqueueSignal(VkSemaphore sem);
void enqueueWait(VkSemaphore sem, VkPipelineStageFlags flag);
// submits the work and resets internal state
VkResult execute(VkFence fence = nullptr, uint32_t deviceMask = 0);
void waitIdle() const;
};
//////////////////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::FencedCommandPools
nvvk::FencedCommandPools container class contains the typical utilities to handle
command submission. It contains RingFences, RingCommandPool and BatchSubmission
with a convenient interface.
@DOC_END */
class FencedCommandPools : protected RingFences, protected RingCommandPool, protected BatchSubmission
{
public:
FencedCommandPools(FencedCommandPools const&) = delete;
FencedCommandPools& operator=(FencedCommandPools const&) = delete;
FencedCommandPools() {}
~FencedCommandPools() { deinit(); }
FencedCommandPools(VkDevice device,
VkQueue queue,
uint32_t queueFamilyIndex,
VkCommandPoolCreateFlags flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
uint32_t ringSize = DEFAULT_RING_SIZE)
{
init(device, queue, queueFamilyIndex, flags, ringSize);
}
void init(VkDevice device,
VkQueue queue,
uint32_t queueFamilyIndex,
VkCommandPoolCreateFlags flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
uint32_t ringSize = DEFAULT_RING_SIZE)
{
RingFences::init(device, ringSize);
RingCommandPool::init(device, queueFamilyIndex, flags, ringSize);
BatchSubmission::init(queue);
}
void deinit()
{
RingFences::deinit();
RingCommandPool::deinit();
//BatchSubmission::deinit();
}
void reset()
{
waitIdle();
RingFences::reset();
RingCommandPool::reset();
}
void enqueue(uint32_t num, const VkCommandBuffer* cmdbuffers) { BatchSubmission::enqueue(num, cmdbuffers); }
void enqueue(VkCommandBuffer cmdbuffer) { BatchSubmission::enqueue(cmdbuffer); }
void enqueueSignal(VkSemaphore sem) { BatchSubmission::enqueueSignal(sem); }
void enqueueWait(VkSemaphore sem, VkPipelineStageFlags flag) { BatchSubmission::enqueueWait(sem, flag); }
VkResult execute(uint32_t deviceMask = 0) { return BatchSubmission::execute(getFence(), deviceMask); }
void waitIdle() const { BatchSubmission::waitIdle(); }
void setCycleAndWait(uint32_t cycle)
{
RingFences::setCycleAndWait(cycle);
RingCommandPool::setCycle(cycle);
}
// ensure proper cycle is set prior this
VkCommandBuffer createCommandBuffer(VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
bool begin = true,
VkCommandBufferUsageFlags flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
const VkCommandBufferInheritanceInfo* pInheritanceInfo = nullptr)
{
return RingCommandPool::createCommandBuffer(level, begin, flags, pInheritanceInfo);
}
// pointer is only valid until next create
const VkCommandBuffer* createCommandBuffers(VkCommandBufferLevel level, uint32_t count)
{
return RingCommandPool::createCommandBuffers(level, count);
}
struct ScopedCmd
{
FencedCommandPools* pCmdPools;
VkCommandBuffer cmd;
ScopedCmd(FencedCommandPools& cp)
{
pCmdPools = &cp;
cmd = cp.createCommandBuffer();
}
~ScopedCmd()
{
vkEndCommandBuffer(cmd);
pCmdPools->enqueue(cmd);
pCmdPools->execute();
pCmdPools->waitIdle();
}
operator VkCommandBuffer() { return cmd; }
};
};
} // namespace nvvk

View file

@ -0,0 +1,400 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <unordered_map>
#include <memory>
#include "vulkan/vulkan_core.h"
#include "descriptorsets_vk.hpp"
#define NVVK_COMPUTE_DEFAULT_BLOCK_SIZE 256
namespace nvvk {
//////////////////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::PushComputeDispatcher
nvvk::PushComputeDispatcher is a convenience structure for easily creating
compute-only pipelines by defining the bindings and providing SPV code.
The descriptor set updates are carried out using the KHR_push_descriptor
extension.
Example:
```cpp
enum BindingLocation
{
eMyBindingLocation = 0
};
struct PushConstant{
...
}
pushConstant;
nvvk::PushComputeDispatcher<PushConstant, BindingLocation> myCompute;
VkBuffer myFirstBuffer = createMyFirstBuffer(...);
VkBuffer mySecondBuffer = createMySecondBuffer(...);
VkDevice device = getMyVkDevice(...);
const uint8_t* spvCode = getMyComputeShaderCode(...);
size_t spvCodeSize = getMyComputeShaderCodeSize(...);
myCompute.addBufferBinding(BindingLocation::eMyBindingLocation, myFirstBuffer);
myCompute.setCode(device, spvCode, spvCodeSize);
myCompute.finalizePipeline(device);
...
VkCommandBuffer cmd = getMyCommandBuffer(...);
myCompute.dispatch(cmd, targetThreadCount, &pushConstant);
...
myCompute.updateBufferBinding(BindingLocation::eMyBindingLocation, mySecondBuffer)
myCompute.dispatch(cmd, targetThreadCount, &pushConstant);
...
```
@DOC_END */
/// Barrier types usable before and after the shader dispatch
/// Those barriers apply to SHADER_READ, SHADER_WRITE and TRANSFER if needed
enum DispatcherBarrier
{
eNone = 0,
eCompute = 1,
eTransfer = 2,
eGraphics = 4,
eRaytracing = 8
};
template <typename TPushConstants, typename TBindingEnum, uint32_t pipelineCount = 1u>
struct PushComputeDispatcher
{
VkPipelineLayout layout{};
std::array<VkPipeline, pipelineCount> pipelines{};
VkDescriptorSetLayout dsetLayout{};
nvvk::DescriptorSetBindings bindings;
std::unordered_map<TBindingEnum, std::unique_ptr<VkDescriptorBufferInfo>> bufferInfos;
std::unordered_map<TBindingEnum, std::unique_ptr<VkWriteDescriptorSetAccelerationStructureKHR>> accelInfos;
std::unordered_map<TBindingEnum, std::unique_ptr<VkAccelerationStructureKHR>> accel;
std::unordered_map<TBindingEnum, std::unique_ptr<VkDescriptorImageInfo>> sampledImageInfos;
TPushConstants pushConstants{};
struct ShaderModule
{
VkShaderModule module{VK_NULL_HANDLE};
bool isLocal{false};
};
std::vector<VkWriteDescriptorSet> writes;
std::array<ShaderModule, pipelineCount> shaderModules;
bool addBufferBinding(TBindingEnum index)
{
if(bufferInfos.find(index) == bufferInfos.end())
{
bindings.addBinding(VkDescriptorSetLayoutBinding{uint32_t(index), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT});
bufferInfos[index] = std::make_unique<VkDescriptorBufferInfo>();
auto* info = bufferInfos[index].get();
*(info) = {VK_NULL_HANDLE, 0, VK_WHOLE_SIZE};
writes.emplace_back(bindings.makeWrite(0, index, info));
return true;
}
return false;
}
bool addAccelerationStructureBinding(TBindingEnum index)
{
if(accelInfos.find(index) == accelInfos.end())
{
bindings.addBinding(VkDescriptorSetLayoutBinding{uint32_t(index), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
1, VK_SHADER_STAGE_COMPUTE_BIT});
accelInfos[index] = std::make_unique<VkWriteDescriptorSetAccelerationStructureKHR>();
auto* info = accelInfos[index].get();
accel[index] = std::make_unique<VkAccelerationStructureKHR>();
auto* acc = accel[index].get();
info->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR;
info->pNext = nullptr;
info->accelerationStructureCount = 1;
info->pAccelerationStructures = acc;
writes.emplace_back(bindings.makeWrite(0, index, info));
return true;
}
return false;
}
bool addSampledImageBinding(TBindingEnum index)
{
if(sampledImageInfos.find(index) == sampledImageInfos.end())
{
bindings.addBinding(VkDescriptorSetLayoutBinding{uint32_t(index), VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1,
VK_SHADER_STAGE_COMPUTE_BIT});
sampledImageInfos[index] = std::make_unique<VkDescriptorImageInfo>();
auto* info = sampledImageInfos[index].get();
writes.emplace_back(bindings.makeWrite(0, index, info));
return true;
}
return false;
}
bool updateBufferBinding(TBindingEnum index, VkBuffer buffer)
{
auto it = bufferInfos.find(index);
if(it != bufferInfos.end())
{
it->second->buffer = buffer;
return true;
}
return false;
}
bool updateAccelerationStructureBinding(TBindingEnum index, VkAccelerationStructureKHR acc)
{
auto it = accel.find(index);
if(it != accel.end())
{
*(it->second.get()) = acc;
return true;
}
return false;
}
bool updateSampledImageBinding(TBindingEnum index,
VkSampler sampler = VK_NULL_HANDLE,
VkImageView view = VK_NULL_HANDLE,
VkImageLayout layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)
{
auto it = sampledImageInfos.find(index);
if(it != sampledImageInfos.end())
{
it->second->sampler = sampler;
it->second->imageView = view;
it->second->imageLayout = layout;
return true;
}
return false;
}
bool setCode(VkDevice device, void* shaderCode, size_t codeSize, uint32_t pipelineIndex = 0u)
{
VkShaderModuleCreateInfo moduleCreateInfo{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
moduleCreateInfo.codeSize = codeSize;
moduleCreateInfo.pCode = reinterpret_cast<uint32_t*>(shaderCode);
VkResult r = vkCreateShaderModule(device, &moduleCreateInfo, nullptr, &(shaderModules[pipelineIndex].module));
if(r != VK_SUCCESS || shaderModules[pipelineIndex].module == VK_NULL_HANDLE)
{
return false;
}
shaderModules[pipelineIndex].isLocal = true;
return true;
}
bool setCode(VkShaderModule m, uint32_t pipelineIndex = 0u)
{
shaderModules[pipelineIndex].module = m;
shaderModules[pipelineIndex].isLocal = false;
return m != VK_NULL_HANDLE;
}
bool finalizePipeline(VkDevice device)
{
dsetLayout = bindings.createLayout(device, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
pipelineLayoutCreateInfo.pSetLayouts = &dsetLayout;
pipelineLayoutCreateInfo.setLayoutCount = 1;
VkPushConstantRange pushConstantRange{VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(TPushConstants)};
pipelineLayoutCreateInfo.pushConstantRangeCount = 1;
pipelineLayoutCreateInfo.pPushConstantRanges = &pushConstantRange;
VkResult r = vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, nullptr, &layout);
if(r != VK_SUCCESS || layout == VK_NULL_HANDLE)
{
return false;
}
VkPipelineShaderStageCreateInfo stageCreateInfo = {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO};
stageCreateInfo.stage = VK_SHADER_STAGE_COMPUTE_BIT;
stageCreateInfo.pName = "main";
for(uint32_t i = 0; i < pipelineCount; i++)
{
stageCreateInfo.module = shaderModules[i].module;
VkComputePipelineCreateInfo createInfo{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
createInfo.stage = stageCreateInfo;
createInfo.layout = layout;
r = vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &createInfo, nullptr, &pipelines[i]);
if(r != VK_SUCCESS || pipelines[i] == VK_NULL_HANDLE)
{
return false;
}
if(shaderModules[i].isLocal)
{
vkDestroyShaderModule(device, shaderModules[i].module, nullptr);
}
}
return true;
}
uint32_t getBlockCount(uint32_t targetThreadCount, uint32_t blockSize)
{
return (targetThreadCount + blockSize - 1) / blockSize;
}
// Bind the pipeline resources. Used internally, or if the app uses a direct call to
// vkCmdDispatch instead of the dispatch() method
void bind(VkCommandBuffer cmd, const TPushConstants* constants = nullptr, uint32_t pipelineIndex = 0u)
{
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipelines[pipelineIndex]);
if(constants != nullptr)
{
vkCmdPushConstants(cmd, layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(TPushConstants), constants);
}
if(writes.size() > 0)
{
vkCmdPushDescriptorSetKHR(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, static_cast<uint32_t>(writes.size()),
writes.data());
}
}
void dispatchThreads(VkCommandBuffer cmd,
uint32_t threadCount,
const TPushConstants* constants = nullptr,
uint32_t postBarrier = DispatcherBarrier::eCompute,
uint32_t preBarrier = DispatcherBarrier::eNone,
uint32_t blockSize = NVVK_COMPUTE_DEFAULT_BLOCK_SIZE,
// If pipelineIndex == ~0u, all pipelines will be executed sequentially. Otherwise, only dispatch the requested pipeline
uint32_t pipelineIndex = ~0u)
{
uint32_t blockCount = getBlockCount(threadCount, blockSize);
dispatchBlocks(cmd, blockCount, constants, postBarrier, preBarrier, pipelineIndex);
}
void dispatchBlocks(VkCommandBuffer cmd,
uint32_t blockCount,
const TPushConstants* constants = nullptr,
uint32_t postBarrier = DispatcherBarrier::eCompute,
uint32_t preBarrier = DispatcherBarrier::eNone,
// If pipelineIndex == ~0u, all pipelines will be executed sequentially. Otherwise, only dispatch the requested pipeline
uint32_t pipelineIndex = ~0u)
{
dispatchBlocks(cmd, {blockCount, 1, 1}, constants, postBarrier, preBarrier, pipelineIndex);
}
void dispatchBlocks(VkCommandBuffer cmd,
glm::uvec3 blockCount,
const TPushConstants* constants = nullptr,
uint32_t postBarrier = DispatcherBarrier::eCompute,
uint32_t preBarrier = DispatcherBarrier::eNone,
// If pipelineIndex == ~0u, all pipelines will be executed sequentially. Otherwise, only dispatch the requested pipeline
uint32_t pipelineIndex = ~0u)
{
if(preBarrier != eNone)
{
VkMemoryBarrier mb{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
mb.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
VkPipelineStageFlags srcStage{};
if((preBarrier & eCompute) || (preBarrier & eGraphics) || (preBarrier & eRaytracing))
{
mb.srcAccessMask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
if(preBarrier & eCompute)
srcStage |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
if(preBarrier & eGraphics)
srcStage |= VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT;
if(preBarrier & eRaytracing)
srcStage |= VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR;
}
if(preBarrier & eTransfer)
{
mb.srcAccessMask |= VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
srcStage |= VK_PIPELINE_STAGE_TRANSFER_BIT;
}
vkCmdPipelineBarrier(cmd, srcStage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &mb, 0, nullptr, 0, nullptr);
}
uint32_t currentPipeline = (pipelineIndex == ~0u) ? 0 : pipelineIndex;
uint32_t count = (pipelineIndex == ~0u) ? pipelineCount : 1;
for(uint32_t i = 0; i < count; i++)
{
bind(cmd, constants, currentPipeline + i);
vkCmdDispatch(cmd, blockCount.x, blockCount.y, blockCount.z);
if(postBarrier != eNone)
{
VkMemoryBarrier mb{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
mb.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
VkPipelineStageFlags dstStage{};
if((postBarrier & eCompute) || (postBarrier & eGraphics) || (postBarrier & eRaytracing))
{
mb.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
if(postBarrier & eCompute)
dstStage |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
if(postBarrier & eGraphics)
dstStage |= VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT;
if(postBarrier & eRaytracing)
dstStage |= VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR;
}
if(postBarrier & eTransfer)
{
mb.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
dstStage |= VK_PIPELINE_STAGE_TRANSFER_BIT;
}
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, dstStage, 0, 1, &mb, 0, nullptr, 0, nullptr);
}
}
}
void destroy(VkDevice device)
{
vkDestroyPipelineLayout(device, layout, nullptr);
for(uint32_t i = 0; i < pipelineCount; i++)
{
vkDestroyPipeline(device, pipelines[i], nullptr);
}
vkDestroyDescriptorSetLayout(device, dsetLayout, nullptr);
bufferInfos.clear();
accelInfos.clear();
accel.clear();
sampledImageInfos.clear();
writes.clear();
bindings.clear();
}
};
} // namespace nvvk

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,522 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef NV_VK_DEVICEINSTANCE_INCLUDED
#define NV_VK_DEVICEINSTANCE_INCLUDED
#include <string> // std::string
#include <string.h> // memcpy
#include <unordered_set>
#include <vector>
#include <functional>
#include <vulkan/vulkan_core.h>
#include "nsight_aftermath_vk.hpp"
static_assert(VK_HEADER_VERSION >= 261, "Vulkan SDK version needs to be 1.3.261.0 or greater");
namespace nvvk {
/** @DOC_START
To run a Vulkan application, you need to create the Vulkan instance and device.
This is done using the `nvvk::Context`, which wraps the creation of `VkInstance`
and `VkDevice`.
First, any application needs to specify how instance and device should be created:
Version, layers, instance and device extensions influence the features available.
This is done through a temporary and intermediate class that will allow you to gather
all the required conditions for the device creation.
@DOC_END */
//////////////////////////////////////////////////////////////////////////
/** @DOC_START
# struct ContextCreateInfo
This structure allows the application to specify a set of features
that are expected for the creation of
- VkInstance
- VkDevice
It is consumed by the `nvvk::Context::init` function.
Example on how to populate information in it :
```cpp
nvvk::ContextCreateInfo ctxInfo;
ctxInfo.setVersion(1, 2);
ctxInfo.addInstanceExtension(VK_KHR_SURFACE_EXTENSION_NAME, false);
ctxInfo.addInstanceExtension(VK_KHR_WIN32_SURFACE_EXTENSION_NAME, false);
ctxInfo.addDeviceExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, false);
// adding an extension with a feature struct:
//
VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR pipePropFeatures = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR};
// Be aware of the lifetime of the pointer of the feature struct.
// ctxInfo stores the pointer directly and context init functions use it for read & write access.
ctxInfo.addDeviceExtension(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME, true, &pipePropFeatures);
// disabling a feature:
//
// This callback is called after the feature structs were filled with physical device information
// and prior logical device creation.
// The callback iterates over all feature structs, including those from
// the vulkan versions.
ctxInfo.fnDisableFeatures = [](VkStructureType sType, void *pFeatureStruct)
{
switch(sType){
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES:
{
auto* features11 = reinterpret_cast<VkPhysicalDeviceVulkan11Features>(pFeatureStruct);
// at this point the struct is populated with what the device supports
// and therefore it is only legal to disable features, not enable them.
// let's say we wanted to disable multiview
features11->multiView = VK_FALSE;
}
break;
default:
break;
}
};
```
then you are ready to create initialize `nvvk::Context`
> Note: In debug builds, the extension `VK_EXT_DEBUG_UTILS_EXTENSION_NAME` and the layer `VK_LAYER_KHRONOS_validation` are added to help finding issues early.
@DOC_END */
static const VkDeviceDiagnosticsConfigFlagsNV defaultAftermathFlags =
(VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV // Additional information about the resource related to a GPU virtual address
| VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV // Automatic checkpoints for all draw calls (ADD OVERHEAD)
| VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV); // instructs the shader compiler to generate debug information (ADD OVERHEAD)
struct ContextCreateInfo
{
// aftermathFlags != 0 will enable GPU crash dumps when Aftermath is available via SUPPORT_AFTERMATH
// No-op when Aftermath is not available.
ContextCreateInfo(bool bUseValidation = true);
void setVersion(uint32_t major, uint32_t minor);
void addInstanceExtension(const char* name, bool optional = false);
void addInstanceLayer(const char* name, bool optional = false);
// Add a extension to be enabled at context creation time. If 'optional' is
// false, context creation will fail if the extension is not supported by the
// device. If the extension requires a feature struct, pass the initialized
// struct to 'pFeatureStruct'. If 'version' = 0: don't care, otherwise check
// against equality (useful for provisional exts)
//
// IMPORTANT: The 'pFeatureStruct' pointer will be stored and the object will
// later be written to! Make sure the pointer is still valid when
// Context::Init() gets called with the ContextCreateInfo object. All
// pFeatureStruct objects will be chained together and filled out with the
// actual device capabilities during Context::Init().
void addDeviceExtension(const char* name, bool optional = false, void* pFeatureStruct = nullptr, uint32_t version = 0);
void removeInstanceExtension(const char* name);
void removeInstanceLayer(const char* name);
void removeDeviceExtension(const char* name);
// by default-constructor three queues are requested,
// if you want more/different setups manipulate the requestedQueues vector
// or use this function.
void addRequestedQueue(VkQueueFlags flags, uint32_t count = 1, float priority = 1.0f);
// this callback is run after extension and version related feature structs were queried for their support
// from the physical device and prior using them for device creation. It allows custom logic for disabling
// certain features.
// Be aware that enabling a feature is not legal within this function, only disabling.
std::function<void(VkStructureType sType, void* pFeatureStruct)> fnDisableFeatures = nullptr;
// Configure additional device creation with these variables and functions
// use device groups
bool useDeviceGroups = false;
// which compatible device or device group to pick
// only used by All-in-one Context::init(...)
uint32_t compatibleDeviceIndex = 0;
// instance properties
std::string appEngine = "nvpro-sample";
std::string appTitle = "nvpro-sample";
// may impact performance hence disable by default
bool disableRobustBufferAccess = true;
// Information printed at Context::init time
bool verboseCompatibleDevices = true;
bool verboseUsed = true; // Print what is used
bool verboseAvailable = // Print what is available
#ifndef NDEBUG
true;
#else
false;
#endif
// Will Enable GPU crash dumps when Aftermath is available.
// No-op when Aftermath has not been made available via SUPPORT_AFTERMATH in CMakeLists.txt
bool enableAftermath = true;
VkDeviceDiagnosticsConfigFlagsNV aftermathFlags = defaultAftermathFlags;
struct Entry
{
Entry(const char* entryName, bool isOptional = false, void* pointerFeatureStruct = nullptr, uint32_t checkVersion = 0)
: name(entryName)
, optional(isOptional)
, pFeatureStruct(pointerFeatureStruct)
, version(checkVersion)
{
}
std::string name;
bool optional{false};
void* pFeatureStruct{nullptr};
uint32_t version{0};
};
uint32_t apiMajor{1};
uint32_t apiMinor{1};
using EntryArray = std::vector<Entry>;
EntryArray instanceLayers;
EntryArray instanceExtensions;
EntryArray deviceExtensions;
void* deviceCreateInfoExt{nullptr};
void* instanceCreateInfoExt{nullptr};
struct QueueSetup
{
VkQueueFlags requiredFlags = 0;
uint32_t count = 0;
float priority = 1.0;
};
using QueueArray = std::vector<QueueSetup>;
// this array defines how many queues are required for the provided queue flags
// reset / add new entries if changes are desired
//
// ContextCreateInfo constructor adds 1 queue per default queue flag below
QueueArray requestedQueues;
// leave 0 and no default queue will be created
VkQueueFlags defaultQueueGCT = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
VkQueueFlags defaultQueueT = VK_QUEUE_TRANSFER_BIT;
VkQueueFlags defaultQueueC = VK_QUEUE_COMPUTE_BIT;
float defaultPriorityGCT = 1.0f;
float defaultPriorityT = 1.0f;
float defaultPriorityC = 1.0f;
};
//////////////////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::Context
nvvk::Context class helps creating the Vulkan instance and to choose the logical device for the mandatory extensions. First is to fill the `ContextCreateInfo` structure, then call:
```cpp
// Creating the Vulkan instance and device
nvvk::ContextCreateInfo ctxInfo;
... see above ...
nvvk::Context vkctx;
vkctx.init(ctxInfo);
// after init the ctxInfo is no longer needed
```
At this point, the class will have created the `VkInstance` and `VkDevice` according to the information passed. It will also keeps track or have query the information of:
* Physical Device information that you can later query : `PhysicalDeviceInfo` in which lots of `VkPhysicalDevice...` are stored
* `VkInstance` : the one instance being used for the program
* `VkPhysicalDevice` : physical device(s) used for the logical device creation. In case of more than one physical device, we have a std::vector for this purpose...
* `VkDevice` : the logical device instantiated
* `VkQueue` : By default, 3 queues are created, one per family: Graphic-Compute-Transfer, Compute and Transfer.
For any additionnal queue, they need to be requested with `ContextCreateInfo::addRequestedQueue()`. This is creating information of the best suitable queues,
but not creating them. To create the additional queues,
`Context::createQueue()` **must be call after** creating the Vulkan context.
</br>The following queues are always created and can be directly accessed without calling createQueue :
* `Queue m_queueGCT` : Graphics/Compute/Transfer Queue + family index
* `Queue m_queueT` : async Transfer Queue + family index
* `Queue m_queueC` : async Compute Queue + family index
* maintains what extensions are finally available
* implicitly hooks up the debug callback
## Choosing the device
When there are multiple devices, the `init` method is choosing the first compatible device available, but it is also possible the choose another one.
```cpp
vkctx.initInstance(deviceInfo);
// Find all compatible devices
auto compatibleDevices = vkctx.getCompatibleDevices(deviceInfo);
assert(!compatibleDevices.empty());
// Use first compatible device
vkctx.initDevice(compatibleDevices[0], deviceInfo);
```
## Multi-GPU
When multiple graphic cards should be used as a single device, the `ContextCreateInfo::useDeviceGroups` need to be set to `true`.
The above methods will transparently create the `VkDevice` using `VkDeviceGroupDeviceCreateInfo`.
Especially in the context of NVLink connected cards this is useful.
@DOC_END */
class Context
{
public:
Context(Context const&) = delete;
Context& operator=(Context const&) = delete;
Context() = default;
// Vulkan == 1.1 used individual structs
// Vulkan >= 1.2 have per-version structs
struct Features11Old
{
VkPhysicalDeviceMultiviewFeatures multiview{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES};
VkPhysicalDevice16BitStorageFeatures t16BitStorage{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES};
VkPhysicalDeviceSamplerYcbcrConversionFeatures samplerYcbcrConversion{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES};
VkPhysicalDeviceProtectedMemoryFeatures protectedMemory{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES};
VkPhysicalDeviceShaderDrawParameterFeatures drawParameters{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES};
VkPhysicalDeviceVariablePointerFeatures variablePointers{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES};
Features11Old()
{
multiview.pNext = &t16BitStorage;
t16BitStorage.pNext = &samplerYcbcrConversion;
samplerYcbcrConversion.pNext = &protectedMemory;
protectedMemory.pNext = &drawParameters;
drawParameters.pNext = &variablePointers;
variablePointers.pNext = nullptr;
}
void read(const VkPhysicalDeviceVulkan11Features& features11)
{
multiview.multiview = features11.multiview;
multiview.multiviewGeometryShader = features11.multiviewGeometryShader;
multiview.multiviewTessellationShader = features11.multiviewTessellationShader;
t16BitStorage.storageBuffer16BitAccess = features11.storageBuffer16BitAccess;
t16BitStorage.storageInputOutput16 = features11.storageInputOutput16;
t16BitStorage.storagePushConstant16 = features11.storagePushConstant16;
t16BitStorage.uniformAndStorageBuffer16BitAccess = features11.uniformAndStorageBuffer16BitAccess;
samplerYcbcrConversion.samplerYcbcrConversion = features11.samplerYcbcrConversion;
protectedMemory.protectedMemory = features11.protectedMemory;
drawParameters.shaderDrawParameters = features11.shaderDrawParameters;
variablePointers.variablePointers = features11.variablePointers;
variablePointers.variablePointersStorageBuffer = features11.variablePointersStorageBuffer;
}
void write(VkPhysicalDeviceVulkan11Features& features11)
{
features11.multiview = multiview.multiview;
features11.multiviewGeometryShader = multiview.multiviewGeometryShader;
features11.multiviewTessellationShader = multiview.multiviewTessellationShader;
features11.storageBuffer16BitAccess = t16BitStorage.storageBuffer16BitAccess;
features11.storageInputOutput16 = t16BitStorage.storageInputOutput16;
features11.storagePushConstant16 = t16BitStorage.storagePushConstant16;
features11.uniformAndStorageBuffer16BitAccess = t16BitStorage.uniformAndStorageBuffer16BitAccess;
features11.samplerYcbcrConversion = samplerYcbcrConversion.samplerYcbcrConversion;
features11.protectedMemory = protectedMemory.protectedMemory;
features11.shaderDrawParameters = drawParameters.shaderDrawParameters;
features11.variablePointers = variablePointers.variablePointers;
features11.variablePointersStorageBuffer = variablePointers.variablePointersStorageBuffer;
}
};
struct Properties11Old
{
VkPhysicalDeviceMaintenance3Properties maintenance3{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES};
VkPhysicalDeviceIDProperties deviceID{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES};
VkPhysicalDeviceMultiviewProperties multiview{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES};
VkPhysicalDeviceProtectedMemoryProperties protectedMemory{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES};
VkPhysicalDevicePointClippingProperties pointClipping{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES};
VkPhysicalDeviceSubgroupProperties subgroup{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES};
Properties11Old()
{
maintenance3.pNext = &deviceID;
deviceID.pNext = &multiview;
multiview.pNext = &protectedMemory;
protectedMemory.pNext = &pointClipping;
pointClipping.pNext = &subgroup;
subgroup.pNext = nullptr;
}
void write(VkPhysicalDeviceVulkan11Properties& properties11)
{
memcpy(properties11.deviceLUID, deviceID.deviceLUID, sizeof(properties11.deviceLUID));
memcpy(properties11.deviceUUID, deviceID.deviceUUID, sizeof(properties11.deviceUUID));
memcpy(properties11.driverUUID, deviceID.driverUUID, sizeof(properties11.driverUUID));
properties11.deviceLUIDValid = deviceID.deviceLUIDValid;
properties11.deviceNodeMask = deviceID.deviceNodeMask;
properties11.subgroupSize = subgroup.subgroupSize;
properties11.subgroupSupportedStages = subgroup.supportedStages;
properties11.subgroupSupportedOperations = subgroup.supportedOperations;
properties11.subgroupQuadOperationsInAllStages = subgroup.quadOperationsInAllStages;
properties11.pointClippingBehavior = pointClipping.pointClippingBehavior;
properties11.maxMultiviewViewCount = multiview.maxMultiviewViewCount;
properties11.maxMultiviewInstanceIndex = multiview.maxMultiviewInstanceIndex;
properties11.protectedNoFault = protectedMemory.protectedNoFault;
properties11.maxPerSetDescriptors = maintenance3.maxPerSetDescriptors;
properties11.maxMemoryAllocationSize = maintenance3.maxMemoryAllocationSize;
}
};
// This struct holds all core feature information for a physical device
struct PhysicalDeviceInfo
{
VkPhysicalDeviceMemoryProperties memoryProperties{};
std::vector<VkQueueFamilyProperties> queueProperties;
VkPhysicalDeviceFeatures features10{};
VkPhysicalDeviceVulkan11Features features11{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES};
VkPhysicalDeviceVulkan12Features features12{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES};
VkPhysicalDeviceVulkan13Features features13{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES};
VkPhysicalDeviceProperties properties10{};
VkPhysicalDeviceVulkan11Properties properties11{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES};
VkPhysicalDeviceVulkan12Properties properties12{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES};
VkPhysicalDeviceVulkan13Properties properties13{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES};
};
struct Queue
{
VkQueue queue = VK_NULL_HANDLE;
uint32_t familyIndex = ~0U;
uint32_t queueIndex = ~0U;
float priority = 1.0f;
operator VkQueue() const { return queue; }
operator uint32_t() const { return familyIndex; }
};
VkInstance m_instance{VK_NULL_HANDLE};
VkDevice m_device{VK_NULL_HANDLE};
VkPhysicalDevice m_physicalDevice{VK_NULL_HANDLE};
PhysicalDeviceInfo m_physicalInfo;
uint32_t m_apiMajor = 0;
uint32_t m_apiMinor = 0;
// following queues are automatically created if appropriate ContextCreateInfo.defaultQueue??? is set
// and ContextCreateInfo::requestedQueues contains a compatible config.
Queue m_queueGCT; // for Graphics/Compute/Transfer
Queue m_queueT; // for pure async Transfer Queue
Queue m_queueC; // for async Compute
// additional queues must be created once through this function
// returns new Queue and pops entry from available Queues that were requested via info.requestedQueues
Queue createQueue(VkQueueFlags requiredFlags, const std::string& debugName, float priority = 1.0f);
operator VkDevice() const { return m_device; }
// All-in-one instance and device creation
bool init(const ContextCreateInfo& info);
void deinit();
// Individual object creation
bool initInstance(const ContextCreateInfo& info);
// deviceIndex is an index either into getPhysicalDevices or getPhysicalDeviceGroups
// depending on info.useDeviceGroups
bool initDevice(uint32_t deviceIndex, const ContextCreateInfo& info);
// Helpers
std::vector<uint32_t> getCompatibleDevices(const ContextCreateInfo& info);
std::vector<VkPhysicalDevice> getPhysicalDevices();
std::vector<VkPhysicalDeviceGroupProperties> getPhysicalDeviceGroups();
std::vector<VkExtensionProperties> getInstanceExtensions();
std::vector<VkLayerProperties> getInstanceLayers();
std::vector<VkExtensionProperties> getDeviceExtensions(VkPhysicalDevice physicalDevice);
void printPhysicalDeviceProperties(const VkPhysicalDeviceProperties& properties);
bool hasMandatoryExtensions(VkPhysicalDevice physicalDevice, const ContextCreateInfo& info, bool bVerbose);
// Returns if GCTQueue supports present
bool setGCTQueueWithPresent(VkSurfaceKHR surface);
// true if the context has the optional extension activated
bool hasDeviceExtension(const char* name) const;
bool hasInstanceExtension(const char* name) const;
void ignoreDebugMessage(int32_t msgID) { m_dbgIgnoreMessages.insert(msgID); }
void setDebugSeverityFilterMask(int32_t severity) { m_dbgSeverity = severity; }
private:
struct QueueScore
{
uint32_t score = 0; // the lower the score, the more 'specialized' it is
uint32_t familyIndex = ~0U;
uint32_t queueIndex = ~0U;
float priority = 1.0f;
};
using QueueScoreList = std::vector<QueueScore>;
// This list is created from ContextCreateInfo::requestedQueues.
// It contains the most specialized queues for compatible flags first.
// Each Context::createQueue call finds a compatible item in this list
// and removes it upon success.
QueueScoreList m_availableQueues;
// optional maxFamilyCounts overrides the device's max queue count per queue family
// optional priorities overrides default priority 1.0 and must be sized physical device's queue family count * maxQueueCount
void initQueueList(QueueScoreList& list, const uint32_t* maxFamilyCounts, const float* priorities, uint32_t maxQueueCount) const;
QueueScore removeQueueListItem(QueueScoreList& list, VkQueueFlags flags, float priority) const;
static VKAPI_ATTR VkBool32 VKAPI_CALL debugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
VkDebugUtilsMessageTypeFlagsEXT messageType,
const VkDebugUtilsMessengerCallbackDataEXT* callbackData,
void* userData);
std::vector<std::string> m_usedInstanceLayers;
std::vector<std::string> m_usedInstanceExtensions;
std::vector<std::string> m_usedDeviceExtensions;
// New Debug system
PFN_vkCreateDebugUtilsMessengerEXT m_createDebugUtilsMessengerEXT = nullptr;
PFN_vkDestroyDebugUtilsMessengerEXT m_destroyDebugUtilsMessengerEXT = nullptr;
VkDebugUtilsMessengerEXT m_dbgMessenger = nullptr;
std::unordered_set<int32_t> m_dbgIgnoreMessages;
uint32_t m_dbgSeverity{VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT};
// nSight Aftermath
GpuCrashTracker m_gpuCrashTracker;
void initDebugUtils();
bool hasDebugUtils() const { return m_createDebugUtilsMessengerEXT != nullptr; }
VkResult fillFilteredNameArray(std::vector<std::string>& used,
const std::vector<VkLayerProperties>& properties,
const ContextCreateInfo::EntryArray& requested);
VkResult fillFilteredNameArray(std::vector<std::string>& used,
const std::vector<VkExtensionProperties>& properties,
const ContextCreateInfo::EntryArray& requested,
std::vector<void*>& featureStructs);
std::vector<std::string> checkEntryArray(const std::vector<VkExtensionProperties>& properties,
const ContextCreateInfo::EntryArray& requested);
static void initPhysicalInfo(PhysicalDeviceInfo& info, VkPhysicalDevice physicalDevice, uint32_t versionMajor, uint32_t versionMinor);
};
} // namespace nvvk
#endif

View file

@ -0,0 +1,27 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "debug_util_vk.hpp"
namespace nvvk {
bool DebugUtil::s_enabled = false;
} // namespace nvvk

View file

@ -0,0 +1,213 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
/// # class DebugUtil
/// This is a companion utility to add debug information to an application
/// See https://vulkan.lunarg.com/doc/sdk/1.1.114.0/windows/chunked_spec/chap39.html
/// - User defined name to objects
/// - Logically annotate region of command buffers
/// - Scoped command buffer label to make thing simpler
#pragma once
#include <algorithm>
#include <string.h>
#include <string>
#include <vulkan/vulkan_core.h>
#include "nvh/nvprint.hpp"
namespace nvvk {
class DebugUtil
{
public:
DebugUtil() = default;
DebugUtil(VkDevice device)
: m_device(device)
{
}
static void setEnabled(bool state) { s_enabled = state; }
void setup(VkDevice device) { m_device = device; }
void setObjectName(const uint64_t object, const std::string& name, VkObjectType t)
{
if(s_enabled)
{
VkDebugUtilsObjectNameInfoEXT s{VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, nullptr, t, object, name.c_str()};
vkSetDebugUtilsObjectNameEXT(m_device, &s);
}
}
// clang-format off
void setObjectName(VkBuffer object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_BUFFER); }
void setObjectName(VkBufferView object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_BUFFER_VIEW); }
void setObjectName(VkCommandBuffer object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_COMMAND_BUFFER ); }
void setObjectName(VkCommandPool object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_COMMAND_POOL ); }
void setObjectName(VkDescriptorPool object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_DESCRIPTOR_POOL); }
void setObjectName(VkDescriptorSet object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_DESCRIPTOR_SET); }
void setObjectName(VkDescriptorSetLayout object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT); }
void setObjectName(VkDevice object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_DEVICE); }
void setObjectName(VkDeviceMemory object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_DEVICE_MEMORY); }
void setObjectName(VkFramebuffer object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_FRAMEBUFFER); }
void setObjectName(VkImage object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_IMAGE); }
void setObjectName(VkImageView object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_IMAGE_VIEW); }
void setObjectName(VkPipeline object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_PIPELINE); }
void setObjectName(VkPipelineLayout object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_PIPELINE_LAYOUT); }
void setObjectName(VkQueryPool object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_QUERY_POOL); }
void setObjectName(VkQueue object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_QUEUE); }
void setObjectName(VkRenderPass object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_RENDER_PASS); }
void setObjectName(VkSampler object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_SAMPLER); }
void setObjectName(VkSemaphore object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_SEMAPHORE); }
void setObjectName(VkShaderModule object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_SHADER_MODULE); }
void setObjectName(VkSwapchainKHR object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_SWAPCHAIN_KHR); }
#if VK_NV_ray_tracing
void setObjectName(VkAccelerationStructureNV object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV); }
#endif
#if VK_KHR_acceleration_structure
void setObjectName(VkAccelerationStructureKHR object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR); }
#endif
// clang-format on
//
//---------------------------------------------------------------------------
//
void beginLabel(VkCommandBuffer cmdBuf, const std::string& label)
{
if(s_enabled)
{
VkDebugUtilsLabelEXT s{VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, nullptr, label.c_str(), {1.0f, 1.0f, 1.0f, 1.0f}};
vkCmdBeginDebugUtilsLabelEXT(cmdBuf, &s);
}
}
void endLabel(VkCommandBuffer cmdBuf)
{
if(s_enabled)
{
vkCmdEndDebugUtilsLabelEXT(cmdBuf);
}
}
void insertLabel(VkCommandBuffer cmdBuf, const std::string& label)
{
if(s_enabled)
{
VkDebugUtilsLabelEXT s{VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, nullptr, label.c_str(), {1.0f, 1.0f, 1.0f, 1.0f}};
vkCmdInsertDebugUtilsLabelEXT(cmdBuf, &s);
}
}
//
// Begin and End Command Label MUST be balanced, this helps as it will always close the opened label
//
struct ScopedCmdLabel
{
ScopedCmdLabel(VkCommandBuffer cmdBuf, const std::string& label)
: m_cmdBuf(cmdBuf)
{
if(s_enabled)
{
VkDebugUtilsLabelEXT s{VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, nullptr, label.c_str(), {1.0f, 1.0f, 1.0f, 1.0f}};
vkCmdBeginDebugUtilsLabelEXT(cmdBuf, &s);
}
}
~ScopedCmdLabel()
{
if(s_enabled)
{
vkCmdEndDebugUtilsLabelEXT(m_cmdBuf);
}
}
void setLabel(const std::string& label)
{
if(s_enabled)
{
VkDebugUtilsLabelEXT s{VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, nullptr, label.c_str(), {1.0f, 1.0f, 1.0f, 1.0f}};
vkCmdInsertDebugUtilsLabelEXT(m_cmdBuf, &s);
}
}
private:
VkCommandBuffer m_cmdBuf;
};
ScopedCmdLabel scopeLabel(VkCommandBuffer cmdBuf, const std::string& label) { return ScopedCmdLabel(cmdBuf, label); }
private:
VkDevice m_device{VK_NULL_HANDLE};
static bool s_enabled;
};
//////////////////////////////////////////////////////////////////////////
/// Macros to help automatically naming variables.
/// Names will be in the form of MyClass::m_myBuffer (in example.cpp:123)
///
/// To use:
/// - Debug member class MUST be named 'm_debug'
/// - Individual name: NAME_VK(m_myBuffer.buffer) or with and index NAME_IDX_VK(m_texture.image, i)
/// - Create/associate and name, instead of
/// pipeline = createPipeline();
/// NAME_VK(pipeline)
/// call
/// CREATE_NAMED_VK(pipeline , createPipeline());
/// - Scope functions can also be automatically named, at the beginning of a function
/// call LABEL_SCOPE_VK( commandBuffer )
///
///
// clang-format off
inline const char* fileNameSplitter(const char* n) { return std::max<const char*>(n, std::max(strrchr(n, '\\') + 1, strrchr(n, '/') + 1)); }
inline const char* upToLastSpace(const char* n) { return std::max<const char*>(n, strrchr(n, ' ') + 1); }
#define CLASS_NAME nvvk::upToLastSpace(typeid(*this).name())
#define NAME_FILE_LOCATION std::string(" in ") + std::string(nvvk::fileNameSplitter(__FILE__)) + std::string(":" S__LINE__ ")")
// Individual naming
#define NAME_VK(_x) m_debug.setObjectName(_x, (std::string(CLASS_NAME) + std::string("::") + std::string(#_x " (") + NAME_FILE_LOCATION).c_str())
#define NAME2_VK(_x, _s) m_debug.setObjectName(_x, (std::string(_s) + std::string(" (" #_x) + NAME_FILE_LOCATION).c_str())
#define NAME_IDX_VK(_x, _i) m_debug.setObjectName(_x, \
(std::string(CLASS_NAME) + std::string("::") + std::string(#_x " (" #_i "=") + std::to_string(_i) + std::string(", ") + NAME_FILE_LOCATION).c_str())
// Name in creation
#define CREATE_NAMED_VK(_x, _c) \
_x = _c; \
NAME_VK(_x);
#define CREATE_NAMED_IDX_VK(_x, _i, _c) \
_x = _c; \
NAME_IDX_VK(_x, _i);
// Running scope
#define LABEL_SCOPE_VK(_cmd) \
auto _scopeLabel = m_debug.scopeLabel(_cmd, std::string(CLASS_NAME) + std::string("::") + std::string(__func__) + std::string(", in ") \
+ std::string(nvvk::fileNameSplitter(__FILE__)) + std::string(":" S__LINE__ ")"))
// Non-defined named variable of the above macros (Ex: m_myDbg->DBG_NAME(vulan_obj); )
#define DBG_NAME(_x) \
setObjectName(_x, (std::string(CLASS_NAME) + std::string("::") + std::string(#_x " (") + NAME_FILE_LOCATION).c_str())
#define DBG_NAME_IDX(_x, _i) \
setObjectName(_x, (std::string(CLASS_NAME) + std::string("::") + std::string(#_x " (" #_i "=") + std::to_string(_i) \
+ std::string(", ") + NAME_FILE_LOCATION) \
.c_str())
#define DBG_SCOPE(_cmd) \
scopeLabel(_cmd, std::string(CLASS_NAME) + std::string("::") + std::string(__func__) + std::string(", in ") \
+ std::string(nvvk::fileNameSplitter(__FILE__)) + std::string(":" S__LINE__ ")"))
// clang-format on
} // namespace nvvk

View file

@ -0,0 +1,467 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "descriptorsets_vk.hpp"
namespace nvvk {
//////////////////////////////////////////////////////////////////////////
void DescriptorSetContainer::init(VkDevice device)
{
assert(m_device == VK_NULL_HANDLE);
m_device = device;
}
void DescriptorSetContainer::setBindings(const std::vector<VkDescriptorSetLayoutBinding>& bindings)
{
m_bindings.setBindings(bindings);
}
void DescriptorSetContainer::addBinding(uint32_t binding,
VkDescriptorType descriptorType,
uint32_t descriptorCount,
VkShaderStageFlags stageFlags,
const VkSampler* pImmutableSamplers /*= nullptr*/)
{
m_bindings.addBinding(binding, descriptorType, descriptorCount, stageFlags, pImmutableSamplers);
}
void DescriptorSetContainer::addBinding(VkDescriptorSetLayoutBinding binding)
{
m_bindings.addBinding(binding);
}
void DescriptorSetContainer::setBindingFlags(uint32_t binding, VkDescriptorBindingFlags bindingFlag)
{
m_bindings.setBindingFlags(binding, bindingFlag);
}
VkDescriptorSetLayout DescriptorSetContainer::initLayout(VkDescriptorSetLayoutCreateFlags flags /*= 0*/, DescriptorSupport supportFlags)
{
assert(m_layout == VK_NULL_HANDLE);
m_layout = m_bindings.createLayout(m_device, flags, supportFlags);
return m_layout;
}
VkDescriptorPool DescriptorSetContainer::initPool(uint32_t numAllocatedSets)
{
assert(m_pool == VK_NULL_HANDLE);
assert(m_layout);
m_pool = m_bindings.createPool(m_device, numAllocatedSets);
allocateDescriptorSets(m_device, m_pool, m_layout, numAllocatedSets, m_descriptorSets);
return m_pool;
}
VkPipelineLayout DescriptorSetContainer::initPipeLayout(uint32_t numRanges /*= 0*/,
const VkPushConstantRange* ranges /*= nullptr*/,
VkPipelineLayoutCreateFlags flags /*= 0*/)
{
assert(m_pipelineLayout == VK_NULL_HANDLE);
assert(m_layout);
VkResult result;
VkPipelineLayoutCreateInfo layoutCreateInfo = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
layoutCreateInfo.setLayoutCount = 1;
layoutCreateInfo.pSetLayouts = &m_layout;
layoutCreateInfo.pushConstantRangeCount = numRanges;
layoutCreateInfo.pPushConstantRanges = ranges;
layoutCreateInfo.flags = flags;
result = vkCreatePipelineLayout(m_device, &layoutCreateInfo, nullptr, &m_pipelineLayout);
assert(result == VK_SUCCESS);
return m_pipelineLayout;
}
void DescriptorSetContainer::deinitPool()
{
if(!m_descriptorSets.empty())
{
m_descriptorSets.clear();
}
if(m_pool)
{
vkDestroyDescriptorPool(m_device, m_pool, nullptr);
m_pool = VK_NULL_HANDLE;
}
}
void DescriptorSetContainer::deinitLayout()
{
if(m_pipelineLayout)
{
vkDestroyPipelineLayout(m_device, m_pipelineLayout, nullptr);
m_pipelineLayout = VK_NULL_HANDLE;
}
if(m_layout)
{
vkDestroyDescriptorSetLayout(m_device, m_layout, nullptr);
m_layout = VK_NULL_HANDLE;
}
}
void DescriptorSetContainer::deinit()
{
deinitLayout();
deinitPool();
m_bindings.clear();
m_device = VK_NULL_HANDLE;
}
VkDescriptorSet DescriptorSetContainer::getSet(uint32_t dstSetIdx /*= 0*/) const
{
if(m_descriptorSets.empty())
{
return {};
}
return m_descriptorSets[dstSetIdx];
}
//////////////////////////////////////////////////////////////////////////
VkDescriptorSetLayout DescriptorSetBindings::createLayout(VkDevice device, VkDescriptorSetLayoutCreateFlags flags, DescriptorSupport supportFlags)
{
VkResult result;
VkDescriptorSetLayoutBindingFlagsCreateInfo bindingsInfo = {
isSet(supportFlags, DescriptorSupport::CORE_1_2) ? VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO :
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT};
// Pad binding flags to match bindings if any exist
if(!m_bindingFlags.empty() && m_bindingFlags.size() <= m_bindings.size())
{
m_bindingFlags.resize(m_bindings.size(), 0);
}
bindingsInfo.bindingCount = uint32_t(m_bindingFlags.size());
bindingsInfo.pBindingFlags = m_bindingFlags.data();
VkDescriptorSetLayoutCreateInfo createInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
createInfo.bindingCount = uint32_t(m_bindings.size());
createInfo.pBindings = m_bindings.data();
createInfo.flags = flags;
createInfo.pNext =
m_bindingFlags.empty() && !(isAnySet(supportFlags, (DescriptorSupport::CORE_1_2 | DescriptorSupport::INDEXING_EXT))) ?
nullptr :
&bindingsInfo;
VkDescriptorSetLayout descriptorSetLayout;
result = vkCreateDescriptorSetLayout(device, &createInfo, nullptr, &descriptorSetLayout);
assert(result == VK_SUCCESS);
return descriptorSetLayout;
}
void DescriptorSetBindings::addRequiredPoolSizes(std::vector<VkDescriptorPoolSize>& poolSizes, uint32_t numSets) const
{
for(auto it = m_bindings.cbegin(); it != m_bindings.cend(); ++it)
{
// Bindings can have a zero descriptor count, used for the layout, but don't reserve storage for them.
if(it->descriptorCount == 0)
{
continue;
}
bool found = false;
for(auto itpool = poolSizes.begin(); itpool != poolSizes.end(); ++itpool)
{
if(itpool->type == it->descriptorType)
{
itpool->descriptorCount += it->descriptorCount * numSets;
found = true;
break;
}
}
if(!found)
{
VkDescriptorPoolSize poolSize{};
poolSize.type = it->descriptorType;
poolSize.descriptorCount = it->descriptorCount * numSets;
poolSizes.push_back(poolSize);
}
}
}
VkDescriptorPool DescriptorSetBindings::createPool(VkDevice device, uint32_t maxSets /*= 1*/, VkDescriptorPoolCreateFlags flags /*= 0*/) const
{
VkResult result;
// setup poolsizes for each descriptorType
std::vector<VkDescriptorPoolSize> poolSizes;
addRequiredPoolSizes(poolSizes, maxSets);
VkDescriptorPool descrPool;
VkDescriptorPoolCreateInfo descrPoolInfo = {};
descrPoolInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
descrPoolInfo.pNext = nullptr;
descrPoolInfo.maxSets = maxSets;
descrPoolInfo.poolSizeCount = uint32_t(poolSizes.size());
descrPoolInfo.pPoolSizes = poolSizes.data();
descrPoolInfo.flags = flags;
// scene pool
result = vkCreateDescriptorPool(device, &descrPoolInfo, nullptr, &descrPool);
assert(result == VK_SUCCESS);
return descrPool;
}
void DescriptorSetBindings::setBindingFlags(uint32_t binding, VkDescriptorBindingFlags bindingFlag)
{
for(size_t i = 0; i < m_bindings.size(); i++)
{
if(m_bindings[i].binding == binding)
{
if(m_bindingFlags.size() <= m_bindings.size())
{
m_bindingFlags.resize(m_bindings.size(), 0);
}
m_bindingFlags[i] = bindingFlag;
return;
}
}
assert(0 && "binding not found");
}
VkDescriptorType DescriptorSetBindings::getType(uint32_t binding) const
{
for(size_t i = 0; i < m_bindings.size(); i++)
{
if(m_bindings[i].binding == binding)
{
return m_bindings[i].descriptorType;
}
}
assert(0 && "binding not found");
return VK_DESCRIPTOR_TYPE_MAX_ENUM;
}
uint32_t DescriptorSetBindings::getCount(uint32_t binding) const
{
for(size_t i = 0; i < m_bindings.size(); i++)
{
if(m_bindings[i].binding == binding)
{
return m_bindings[i].descriptorCount;
}
}
assert(0 && "binding not found");
return ~0;
}
VkWriteDescriptorSet DescriptorSetBindings::makeWrite(VkDescriptorSet dstSet, uint32_t dstBinding, uint32_t arrayElement) const
{
VkWriteDescriptorSet writeSet = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
writeSet.descriptorType = VK_DESCRIPTOR_TYPE_MAX_ENUM;
for(size_t i = 0; i < m_bindings.size(); i++)
{
if(m_bindings[i].binding == dstBinding)
{
writeSet.descriptorCount = 1;
writeSet.descriptorType = m_bindings[i].descriptorType;
writeSet.dstBinding = dstBinding;
writeSet.dstSet = dstSet;
writeSet.dstArrayElement = arrayElement;
return writeSet;
}
}
assert(0 && "binding not found");
return writeSet;
}
VkWriteDescriptorSet DescriptorSetBindings::makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding) const
{
VkWriteDescriptorSet writeSet = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
writeSet.descriptorType = VK_DESCRIPTOR_TYPE_MAX_ENUM;
for(size_t i = 0; i < m_bindings.size(); i++)
{
if(m_bindings[i].binding == dstBinding)
{
writeSet.descriptorCount = m_bindings[i].descriptorCount;
writeSet.descriptorType = m_bindings[i].descriptorType;
writeSet.dstBinding = dstBinding;
writeSet.dstSet = dstSet;
writeSet.dstArrayElement = 0;
return writeSet;
}
}
assert(0 && "binding not found");
return writeSet;
}
VkWriteDescriptorSet DescriptorSetBindings::makeWrite(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkDescriptorImageInfo* pImageInfo,
uint32_t arrayElement) const
{
VkWriteDescriptorSet writeSet = makeWrite(dstSet, dstBinding, arrayElement);
assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER
|| writeSet.descriptorType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
|| writeSet.descriptorType == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT);
writeSet.pImageInfo = pImageInfo;
return writeSet;
}
VkWriteDescriptorSet DescriptorSetBindings::makeWrite(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkDescriptorBufferInfo* pBufferInfo,
uint32_t arrayElement) const
{
VkWriteDescriptorSet writeSet = makeWrite(dstSet, dstBinding, arrayElement);
assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
|| writeSet.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER
|| writeSet.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
writeSet.pBufferInfo = pBufferInfo;
return writeSet;
}
VkWriteDescriptorSet DescriptorSetBindings::makeWrite(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkBufferView* pTexelBufferView,
uint32_t arrayElement) const
{
VkWriteDescriptorSet writeSet = makeWrite(dstSet, dstBinding, arrayElement);
assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER
|| writeSet.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
writeSet.pTexelBufferView = pTexelBufferView;
return writeSet;
}
#if VK_NV_ray_tracing
VkWriteDescriptorSet DescriptorSetBindings::makeWrite(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkWriteDescriptorSetAccelerationStructureNV* pAccel,
uint32_t arrayElement) const
{
VkWriteDescriptorSet writeSet = makeWrite(dstSet, dstBinding, arrayElement);
assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV);
writeSet.pNext = pAccel;
return writeSet;
}
#endif
#if VK_KHR_acceleration_structure
VkWriteDescriptorSet DescriptorSetBindings::makeWrite(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkWriteDescriptorSetAccelerationStructureKHR* pAccel,
uint32_t arrayElement) const
{
VkWriteDescriptorSet writeSet = makeWrite(dstSet, dstBinding, arrayElement);
assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
writeSet.pNext = pAccel;
return writeSet;
}
#endif
#if VK_EXT_inline_uniform_block
VkWriteDescriptorSet DescriptorSetBindings::makeWrite(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkWriteDescriptorSetInlineUniformBlockEXT* pInline,
uint32_t arrayElement) const
{
VkWriteDescriptorSet writeSet = makeWrite(dstSet, dstBinding, arrayElement);
assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT);
writeSet.pNext = pInline;
return writeSet;
}
#endif
VkWriteDescriptorSet DescriptorSetBindings::makeWriteArray(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkDescriptorImageInfo* pImageInfo) const
{
VkWriteDescriptorSet writeSet = makeWriteArray(dstSet, dstBinding);
assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER
|| writeSet.descriptorType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
|| writeSet.descriptorType == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT);
writeSet.pImageInfo = pImageInfo;
assert(writeSet.descriptorCount > 0); // Can have a zero descriptors in the descriptorset layout, but can't write zero items.
return writeSet;
}
VkWriteDescriptorSet DescriptorSetBindings::makeWriteArray(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkDescriptorBufferInfo* pBufferInfo) const
{
VkWriteDescriptorSet writeSet = makeWriteArray(dstSet, dstBinding);
assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
|| writeSet.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER
|| writeSet.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
writeSet.pBufferInfo = pBufferInfo;
return writeSet;
}
VkWriteDescriptorSet DescriptorSetBindings::makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding, const VkBufferView* pTexelBufferView) const
{
VkWriteDescriptorSet writeSet = makeWriteArray(dstSet, dstBinding);
assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
writeSet.pTexelBufferView = pTexelBufferView;
return writeSet;
}
#if VK_NV_ray_tracing
VkWriteDescriptorSet DescriptorSetBindings::makeWriteArray(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkWriteDescriptorSetAccelerationStructureNV* pAccel) const
{
VkWriteDescriptorSet writeSet = makeWriteArray(dstSet, dstBinding);
assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV);
writeSet.pNext = pAccel;
return writeSet;
}
#endif
#if VK_KHR_acceleration_structure
VkWriteDescriptorSet DescriptorSetBindings::makeWriteArray(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkWriteDescriptorSetAccelerationStructureKHR* pAccel) const
{
VkWriteDescriptorSet writeSet = makeWriteArray(dstSet, dstBinding);
assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
writeSet.pNext = pAccel;
return writeSet;
}
#endif
#if VK_EXT_inline_uniform_block
VkWriteDescriptorSet DescriptorSetBindings::makeWriteArray(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkWriteDescriptorSetInlineUniformBlockEXT* pInline) const
{
VkWriteDescriptorSet writeSet = makeWriteArray(dstSet, dstBinding);
assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT);
writeSet.pNext = pInline;
return writeSet;
}
#endif
} // namespace nvvk

View file

@ -0,0 +1,653 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <assert.h>
#include <platform.h>
#include <vector>
#include <vulkan/vulkan_core.h>
namespace nvvk {
// utility for additional feature support
enum class DescriptorSupport : uint32_t
{
CORE_1_0 = 0, // VK Version 1.0
CORE_1_2 = 1, // VK Version 1.2 (adds descriptor_indexing)
INDEXING_EXT = 2, // VK_EXT_descriptor_indexing
};
using DescriptorSupport_t = std::underlying_type_t<DescriptorSupport>;
inline DescriptorSupport operator|(DescriptorSupport lhs, DescriptorSupport rhs)
{
return static_cast<DescriptorSupport>(static_cast<DescriptorSupport_t>(lhs) | static_cast<DescriptorSupport_t>(rhs));
}
inline DescriptorSupport operator&(DescriptorSupport lhs, DescriptorSupport rhs)
{
return static_cast<DescriptorSupport>(static_cast<DescriptorSupport_t>(lhs) & static_cast<DescriptorSupport_t>(rhs));
}
inline bool isSet(DescriptorSupport test, DescriptorSupport query)
{
return (test & query) == query;
}
inline bool isAnySet(DescriptorSupport test, DescriptorSupport query)
{
return (test & query) != DescriptorSupport::CORE_1_0;
}
/** @DOC_START
# functions in nvvk
- createDescriptorPool : wrappers for vkCreateDescriptorPool
- allocateDescriptorSet : allocates a single VkDescriptorSet
- allocateDescriptorSets : allocates multiple VkDescriptorSets
@DOC_END */
inline VkDescriptorPool createDescriptorPool(VkDevice device, size_t poolSizeCount, const VkDescriptorPoolSize* poolSizes, uint32_t maxSets)
{
VkResult result;
VkDescriptorPool descrPool;
VkDescriptorPoolCreateInfo descrPoolInfo = {};
descrPoolInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
descrPoolInfo.pNext = nullptr;
descrPoolInfo.maxSets = maxSets;
descrPoolInfo.poolSizeCount = uint32_t(poolSizeCount);
descrPoolInfo.pPoolSizes = poolSizes;
// scene pool
result = vkCreateDescriptorPool(device, &descrPoolInfo, nullptr, &descrPool);
assert(result == VK_SUCCESS);
return descrPool;
}
inline VkDescriptorPool createDescriptorPool(VkDevice device, const std::vector<VkDescriptorPoolSize>& poolSizes, uint32_t maxSets)
{
return createDescriptorPool(device, poolSizes.size(), poolSizes.data(), maxSets);
}
inline VkDescriptorSet allocateDescriptorSet(VkDevice device, VkDescriptorPool pool, VkDescriptorSetLayout layout)
{
VkResult result;
VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
allocInfo.descriptorPool = pool;
allocInfo.descriptorSetCount = 1;
allocInfo.pSetLayouts = &layout;
VkDescriptorSet set;
result = vkAllocateDescriptorSets(device, &allocInfo, &set);
assert(result == VK_SUCCESS);
return set;
}
inline void allocateDescriptorSets(VkDevice device,
VkDescriptorPool pool,
VkDescriptorSetLayout layout,
uint32_t count,
std::vector<VkDescriptorSet>& sets)
{
sets.resize(count);
std::vector<VkDescriptorSetLayout> layouts(count, layout);
VkResult result;
VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
allocInfo.descriptorPool = pool;
allocInfo.descriptorSetCount = count;
allocInfo.pSetLayouts = layouts.data();
result = vkAllocateDescriptorSets(device, &allocInfo, sets.data());
assert(result == VK_SUCCESS);
}
/////////////////////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::DescriptorSetBindings
nvvk::DescriptorSetBindings is a helper class that keeps a vector of `VkDescriptorSetLayoutBinding` for a single
`VkDescriptorSetLayout`. Provides helper functions to create `VkDescriptorSetLayout`
as well as `VkDescriptorPool` based on this information, as well as utilities
to fill the `VkWriteDescriptorSet` structure with binding information stored
within the class.
The class comes with the convenience functionality that when you make a
VkWriteDescriptorSet you provide the binding slot, rather than the
index of the binding's storage within this class. This results in a small
linear search, but makes it easy to change the content/order of bindings
at creation time.
Example :
```cpp
DescriptorSetBindings binds;
binds.addBinding( VIEW_BINDING, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1, VK_SHADER_STAGE_VERTEX_BIT);
binds.addBinding(XFORM_BINDING, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_VERTEX_BIT);
VkDescriptorSetLayout layout = binds.createLayout(device);
#if SINGLE_LAYOUT_POOL
// let's create a pool with 2 sets
VkDescriptorPool pool = binds.createPool(device, 2);
#else
// if you want to combine multiple layouts into a common pool
std::vector<VkDescriptorPoolSize> poolSizes;
bindsA.addRequiredPoolSizes(poolSizes, numSetsA);
bindsB.addRequiredPoolSizes(poolSizes, numSetsB);
VkDescriptorPool pool = nvvk::createDescriptorPool(device, poolSizes,
numSetsA + numSetsB);
#endif
// fill them
std::vector<VkWriteDescriptorSet> updates;
updates.push_back(binds.makeWrite(0, VIEW_BINDING, &view0BufferInfo));
updates.push_back(binds.makeWrite(1, VIEW_BINDING, &view1BufferInfo));
updates.push_back(binds.makeWrite(0, XFORM_BINDING, &xform0BufferInfo));
updates.push_back(binds.makeWrite(1, XFORM_BINDING, &xform1BufferInfo));
vkUpdateDescriptorSets(device, updates.size(), updates.data(), 0, nullptr);
```
@DOC_END */
class DescriptorSetBindings
{
public:
DescriptorSetBindings() = default;
DescriptorSetBindings(const std::vector<VkDescriptorSetLayoutBinding>& bindings)
: m_bindings(bindings)
{
}
// Add a binding to the descriptor set
void addBinding(uint32_t binding, // Slot to which the descriptor will be bound, corresponding to the layout
// binding index in the shader
VkDescriptorType type, // Type of the bound descriptor(s)
uint32_t count, // Number of descriptors
VkShaderStageFlags stageFlags, // Shader stages at which the bound resources will be available
const VkSampler* pImmutableSampler = nullptr // Corresponding sampler, in case of textures
)
{
m_bindings.push_back({binding, type, count, stageFlags, pImmutableSampler});
}
void addBinding(const VkDescriptorSetLayoutBinding& layoutBinding) { m_bindings.emplace_back(layoutBinding); }
void setBindings(const std::vector<VkDescriptorSetLayoutBinding>& bindings) { m_bindings = bindings; }
// requires use of SUPPORT_INDEXING_EXT/SUPPORT_INDEXING_V1_2 on createLayout
void setBindingFlags(uint32_t binding, VkDescriptorBindingFlags bindingFlags);
void clear()
{
m_bindings.clear();
m_bindingFlags.clear();
}
bool empty() const { return m_bindings.empty(); }
size_t size() const { return m_bindings.size(); }
const VkDescriptorSetLayoutBinding* data() const { return m_bindings.data(); }
VkDescriptorType getType(uint32_t binding) const;
uint32_t getCount(uint32_t binding) const;
// Once the bindings have been added, this generates the descriptor layout corresponding to the
// bound resources.
VkDescriptorSetLayout createLayout(VkDevice device,
VkDescriptorSetLayoutCreateFlags flags = 0,
DescriptorSupport supportFlags = DescriptorSupport::CORE_1_0);
// Once the bindings have been added, this generates the descriptor pool with enough space to
// handle all the bound resources and allocate up to maxSets descriptor sets
VkDescriptorPool createPool(VkDevice device, uint32_t maxSets = 1, VkDescriptorPoolCreateFlags flags = 0) const;
// appends the required poolsizes for N sets
void addRequiredPoolSizes(std::vector<VkDescriptorPoolSize>& poolSizes, uint32_t numSets) const;
// provide single element
VkWriteDescriptorSet makeWrite(VkDescriptorSet dstSet, uint32_t dstBinding, uint32_t arrayElement = 0) const;
VkWriteDescriptorSet makeWrite(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkDescriptorImageInfo* pImageInfo,
uint32_t arrayElement = 0) const;
VkWriteDescriptorSet makeWrite(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkDescriptorBufferInfo* pBufferInfo,
uint32_t arrayElement = 0) const;
VkWriteDescriptorSet makeWrite(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkBufferView* pTexelBufferView,
uint32_t arrayElement = 0) const;
#if VK_NV_ray_tracing
VkWriteDescriptorSet makeWrite(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkWriteDescriptorSetAccelerationStructureNV* pAccel,
uint32_t arrayElement = 0) const;
#endif
#if VK_KHR_acceleration_structure
VkWriteDescriptorSet makeWrite(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkWriteDescriptorSetAccelerationStructureKHR* pAccel,
uint32_t arrayElement = 0) const;
#endif
#if VK_EXT_inline_uniform_block
VkWriteDescriptorSet makeWrite(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniform,
uint32_t arrayElement = 0) const;
#endif
// provide full array
VkWriteDescriptorSet makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding) const;
VkWriteDescriptorSet makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding, const VkDescriptorImageInfo* pImageInfo) const;
VkWriteDescriptorSet makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding, const VkDescriptorBufferInfo* pBufferInfo) const;
VkWriteDescriptorSet makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding, const VkBufferView* pTexelBufferView) const;
#if VK_NV_ray_tracing
VkWriteDescriptorSet makeWriteArray(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkWriteDescriptorSetAccelerationStructureNV* pAccel) const;
#endif
#if VK_KHR_acceleration_structure
VkWriteDescriptorSet makeWriteArray(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkWriteDescriptorSetAccelerationStructureKHR* pAccel) const;
#endif
#if VK_EXT_inline_uniform_block
VkWriteDescriptorSet makeWriteArray(VkDescriptorSet dstSet,
uint32_t dstBinding,
const VkWriteDescriptorSetInlineUniformBlockEXT* pInline) const;
#endif
protected:
std::vector<VkDescriptorSetLayoutBinding> m_bindings;
std::vector<VkDescriptorBindingFlags> m_bindingFlags;
};
/////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::DescriptorSetContainer
nvvk::DescriptorSetContainer is a container class that stores allocated DescriptorSets
as well as reflection, layout and pool for a single
VkDescripterSetLayout.
Example:
```cpp
container.init(device, allocator);
// setup dset layouts
container.addBinding(0, UBO...)
container.addBinding(1, SSBO...)
container.initLayout();
// allocate descriptorsets
container.initPool(17);
// update descriptorsets
writeUpdates.push_back( container.makeWrite(0, 0, &..) );
writeUpdates.push_back( container.makeWrite(0, 1, &..) );
writeUpdates.push_back( container.makeWrite(1, 0, &..) );
writeUpdates.push_back( container.makeWrite(1, 1, &..) );
writeUpdates.push_back( container.makeWrite(2, 0, &..) );
writeUpdates.push_back( container.makeWrite(2, 1, &..) );
...
// at render time
vkCmdBindDescriptorSets(cmd, GRAPHICS, pipeLayout, 1, 1, container.at(7).getSets());
```
@DOC_END */
class DescriptorSetContainer
{
public:
DescriptorSetContainer(DescriptorSetContainer const&) = delete;
DescriptorSetContainer& operator=(DescriptorSetContainer const&) = delete;
DescriptorSetContainer() {}
DescriptorSetContainer(VkDevice device) { init(device); }
void init(VkDevice device);
~DescriptorSetContainer() { deinit(); }
void setBindings(const std::vector<VkDescriptorSetLayoutBinding>& bindings);
void addBinding(VkDescriptorSetLayoutBinding layoutBinding);
void addBinding(uint32_t binding,
VkDescriptorType descriptorType,
uint32_t descriptorCount,
VkShaderStageFlags stageFlags,
const VkSampler* pImmutableSamplers = nullptr);
// requires use of SUPPORT_INDEXING_EXT/SUPPORT_INDEXING_V1_2 on initLayout
void setBindingFlags(uint32_t binding, VkDescriptorBindingFlags bindingFlags);
VkDescriptorSetLayout initLayout(VkDescriptorSetLayoutCreateFlags flags = 0,
DescriptorSupport supportFlags = DescriptorSupport::CORE_1_0);
// inits pool and immediately allocates all numSets-many DescriptorSets
VkDescriptorPool initPool(uint32_t numAllocatedSets);
// optionally generates a pipelinelayout for the descriptorsetlayout
VkPipelineLayout initPipeLayout(uint32_t numRanges = 0,
const VkPushConstantRange* ranges = nullptr,
VkPipelineLayoutCreateFlags flags = 0);
void deinitPool();
void deinitLayout();
void deinit();
//////////////////////////////////////////////////////////////////////////
VkDescriptorSet getSet(uint32_t dstSetIdx = 0) const;
const VkDescriptorSet* getSets(uint32_t dstSetIdx = 0) const { return m_descriptorSets.data() + dstSetIdx; }
uint32_t getSetsCount() const { return static_cast<uint32_t>(m_descriptorSets.size()); }
const VkDescriptorSetLayout& getLayout() const { return m_layout; }
const VkPipelineLayout& getPipeLayout() const { return m_pipelineLayout; }
const DescriptorSetBindings& getBindings() const { return m_bindings; }
VkDevice getDevice() const { return m_device; }
//////////////////////////////////////////////////////////////////////////
// provide single element
VkWriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding, const VkDescriptorImageInfo* pImageInfo, uint32_t arrayElement = 0) const
{
return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, pImageInfo, arrayElement);
}
VkWriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding, const VkDescriptorBufferInfo* pBufferInfo, uint32_t arrayElement = 0) const
{
return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, pBufferInfo, arrayElement);
}
VkWriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding, const VkBufferView* pTexelBufferView, uint32_t arrayElement = 0) const
{
return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, pTexelBufferView, arrayElement);
}
#if VK_NV_ray_tracing
VkWriteDescriptorSet makeWrite(uint32_t dstSetIdx,
uint32_t dstBinding,
const VkWriteDescriptorSetAccelerationStructureNV* pAccel,
uint32_t arrayElement = 0) const
{
return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, pAccel, arrayElement);
}
#endif
#if VK_KHR_acceleration_structure
VkWriteDescriptorSet makeWrite(uint32_t dstSetIdx,
uint32_t dstBinding,
const VkWriteDescriptorSetAccelerationStructureKHR* pAccel,
uint32_t arrayElement = 0) const
{
return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, pAccel, arrayElement);
}
#endif
#if VK_EXT_inline_uniform_block
VkWriteDescriptorSet makeWrite(uint32_t dstSetIdx,
uint32_t dstBinding,
const VkWriteDescriptorSetInlineUniformBlockEXT* pInline,
uint32_t arrayElement = 0) const
{
return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, pInline, arrayElement);
}
#endif
// provide full array
VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const VkDescriptorImageInfo* pImageInfo) const
{
return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, pImageInfo);
}
VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const VkDescriptorBufferInfo* pBufferInfo) const
{
return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, pBufferInfo);
}
VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const VkBufferView* pTexelBufferView) const
{
return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, pTexelBufferView);
}
#if VK_NV_ray_tracing
VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const VkWriteDescriptorSetAccelerationStructureNV* pAccel) const
{
return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, pAccel);
}
#endif
#if VK_KHR_acceleration_structure
VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const VkWriteDescriptorSetAccelerationStructureKHR* pAccel) const
{
return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, pAccel);
}
#endif
#if VK_EXT_inline_uniform_block
VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const VkWriteDescriptorSetInlineUniformBlockEXT* pInline) const
{
return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, pInline);
}
#endif
protected:
VkDevice m_device = VK_NULL_HANDLE;
VkDescriptorSetLayout m_layout = VK_NULL_HANDLE;
VkDescriptorPool m_pool = VK_NULL_HANDLE;
VkPipelineLayout m_pipelineLayout = VK_NULL_HANDLE;
std::vector<VkDescriptorSet> m_descriptorSets = {};
DescriptorSetBindings m_bindings = {};
};
//////////////////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::TDescriptorSetContainer<SETS,PIPES=1>
nvvk::TDescriptorSetContainer is a templated version of DescriptorSetContainer :
- SETS - many DescriptorSetContainers
- PIPES - many VkPipelineLayouts
The pipeline layouts are stored separately, the class does
not use the pipeline layouts of the embedded DescriptorSetContainers.
Example :
```cpp
Usage, e.g.SETS = 2, PIPES = 2
container.init(device, allocator);
// setup dset layouts
container.at(0).addBinding(0, UBO...)
container.at(0).addBinding(1, SSBO...)
container.at(0).initLayout();
container.at(1).addBinding(0, COMBINED_SAMPLER...)
container.at(1).initLayout();
// pipe 0 uses set 0 alone
container.initPipeLayout(0, 1);
// pipe 1 uses sets 0, 1
container.initPipeLayout(1, 2);
// allocate descriptorsets
container.at(0).initPool(1);
container.at(1).initPool(16);
// update descriptorsets
writeUpdates.push_back(container.at(0).makeWrite(0, 0, &..));
writeUpdates.push_back(container.at(0).makeWrite(0, 1, &..));
writeUpdates.push_back(container.at(1).makeWrite(0, 0, &..));
writeUpdates.push_back(container.at(1).makeWrite(1, 0, &..));
writeUpdates.push_back(container.at(1).makeWrite(2, 0, &..));
...
// at render time
vkCmdBindDescriptorSets(cmd, GRAPHICS, container.getPipeLayout(0), 0, 1, container.at(0).getSets());
..
vkCmdBindDescriptorSets(cmd, GRAPHICS, container.getPipeLayout(1), 1, 1, container.at(1).getSets(7));
```
@DOC_END */
template <int SETS, int PIPES = 1>
class TDescriptorSetContainer
{
public:
TDescriptorSetContainer() {}
TDescriptorSetContainer(VkDevice device) { init(device); }
~TDescriptorSetContainer() { deinit(); }
void init(VkDevice device);
void deinit();
void deinitLayouts();
void deinitPools();
// pipelayout uses range of m_sets[0.. first null or SETS[
VkPipelineLayout initPipeLayout(uint32_t pipe,
uint32_t numRanges = 0,
const VkPushConstantRange* ranges = nullptr,
VkPipelineLayoutCreateFlags flags = 0);
// pipelayout uses range of m_sets[0..numDsets[
VkPipelineLayout initPipeLayout(uint32_t pipe,
uint32_t numDsets,
uint32_t numRanges = 0,
const VkPushConstantRange* ranges = nullptr,
VkPipelineLayoutCreateFlags flags = 0);
DescriptorSetContainer& at(uint32_t set) { return m_sets[set]; }
const DescriptorSetContainer& at(uint32_t set) const { return m_sets[set]; }
DescriptorSetContainer& operator[](uint32_t set) { return m_sets[set]; }
const DescriptorSetContainer& operator[](uint32_t set) const { return m_sets[set]; }
VkPipelineLayout getPipeLayout(uint32_t pipe = 0) const
{
assert(pipe <= PIPES);
return m_pipelayouts[pipe];
}
protected:
VkPipelineLayout m_pipelayouts[PIPES] = {};
DescriptorSetContainer m_sets[SETS];
};
//////////////////////////////////////////////////////////////////////////
template <int SETS, int PIPES>
VkPipelineLayout TDescriptorSetContainer<SETS, PIPES>::initPipeLayout(uint32_t pipe,
uint32_t numDsets,
uint32_t numRanges /*= 0*/,
const VkPushConstantRange* ranges /*= nullptr*/,
VkPipelineLayoutCreateFlags flags /*= 0*/)
{
assert(pipe <= uint32_t(PIPES));
assert(numDsets <= uint32_t(SETS));
assert(m_pipelayouts[pipe] == VK_NULL_HANDLE);
VkDevice device = m_sets[0].getDevice();
VkDescriptorSetLayout setLayouts[SETS];
for(uint32_t d = 0; d < numDsets; d++)
{
setLayouts[d] = m_sets[d].getLayout();
assert(setLayouts[d]);
}
VkResult result;
VkPipelineLayoutCreateInfo layoutCreateInfo = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
layoutCreateInfo.setLayoutCount = numDsets;
layoutCreateInfo.pSetLayouts = setLayouts;
layoutCreateInfo.pushConstantRangeCount = numRanges;
layoutCreateInfo.pPushConstantRanges = ranges;
layoutCreateInfo.flags = flags;
result = vkCreatePipelineLayout(device, &layoutCreateInfo, nullptr, &m_pipelayouts[pipe]);
assert(result == VK_SUCCESS);
return m_pipelayouts[pipe];
}
template <int SETS, int PIPES>
VkPipelineLayout TDescriptorSetContainer<SETS, PIPES>::initPipeLayout(uint32_t pipe,
uint32_t numRanges /*= 0*/,
const VkPushConstantRange* ranges /*= nullptr*/,
VkPipelineLayoutCreateFlags flags /*= 0*/)
{
assert(pipe <= uint32_t(PIPES));
assert(m_pipelayouts[pipe] == VK_NULL_HANDLE);
VkDevice device = m_sets[0].getDevice();
VkDescriptorSetLayout setLayouts[SETS];
int used;
for(used = 0; used < SETS; used++)
{
setLayouts[used] = m_sets[used].getLayout();
if(!setLayouts[used])
break;
}
VkResult result;
VkPipelineLayoutCreateInfo layoutCreateInfo = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
layoutCreateInfo.setLayoutCount = uint32_t(used);
layoutCreateInfo.pSetLayouts = setLayouts;
layoutCreateInfo.pushConstantRangeCount = numRanges;
layoutCreateInfo.pPushConstantRanges = ranges;
layoutCreateInfo.flags = flags;
result = vkCreatePipelineLayout(device, &layoutCreateInfo, nullptr, &m_pipelayouts[pipe]);
assert(result == VK_SUCCESS);
return m_pipelayouts[pipe];
}
template <int SETS, int PIPES>
void TDescriptorSetContainer<SETS, PIPES>::deinitPools()
{
for(int d = 0; d < SETS; d++)
{
m_sets[d].deinitPool();
}
}
template <int SETS, int PIPES>
void TDescriptorSetContainer<SETS, PIPES>::deinitLayouts()
{
VkDevice device = m_sets[0].getDevice();
for(int p = 0; p < PIPES; p++)
{
if(m_pipelayouts[p])
{
vkDestroyPipelineLayout(device, m_pipelayouts[p], nullptr);
m_pipelayouts[p] = VK_NULL_HANDLE;
}
}
for(int d = 0; d < SETS; d++)
{
m_sets[d].deinitLayout();
}
}
template <int SETS, int PIPES>
void TDescriptorSetContainer<SETS, PIPES>::deinit()
{
deinitPools();
deinitLayouts();
}
template <int SETS, int PIPES>
void TDescriptorSetContainer<SETS, PIPES>::init(VkDevice device)
{
for(int d = 0; d < SETS; d++)
{
m_sets[d].init(device);
}
}
} // namespace nvvk

View file

@ -0,0 +1,62 @@
/*
* Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "dynamicrendering_vk.hpp"
namespace nvvk {
#ifdef VK_KHR_dynamic_rendering
// Helper for VK_KHR_dynamic_rendering
createRenderingInfo::createRenderingInfo(VkRect2D renderArea,
const std::vector<VkImageView>& colorViews,
const VkImageView& depthView,
VkAttachmentLoadOp colorLoadOp /*= VK_ATTACHMENT_LOAD_OP_CLEAR*/,
VkAttachmentLoadOp depthLoadOp /*= VK_ATTACHMENT_LOAD_OP_CLEAR*/,
VkClearColorValue clearColorValue /*= {0.f, 0.f, 0.f, 0.f}*/,
VkClearDepthStencilValue clearDepthValue /*= {1.f, 0U}*/,
VkRenderingFlagsKHR flags /*= 0*/)
: VkRenderingInfoKHR{VK_STRUCTURE_TYPE_RENDERING_INFO_KHR}
{
for(auto& cv : colorViews)
{
VkRenderingAttachmentInfoKHR colorAttachment{VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR};
colorAttachment.clearValue.color = clearColorValue;
colorAttachment.imageView = cv;
colorAttachment.imageLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL_KHR;
colorAttachment.loadOp = colorLoadOp;
colorAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
colorAttachments.emplace_back(colorAttachment);
}
depthStencilAttachment.imageView = depthView;
depthStencilAttachment.imageLayout = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL_KHR;
depthStencilAttachment.loadOp = depthLoadOp;
depthStencilAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
depthStencilAttachment.clearValue.depthStencil = clearDepthValue;
this->renderArea = renderArea;
this->layerCount = 1;
this->colorAttachmentCount = static_cast<uint32_t>(colorAttachments.size());
this->pColorAttachments = colorAttachments.data();
this->pDepthAttachment = &depthStencilAttachment;
this->pStencilAttachment = &depthStencilAttachment;
this->flags = flags;
}
#endif
} // namespace nvvk

View file

@ -0,0 +1,51 @@
/*
* Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <vulkan/vulkan_core.h>
#include <vector>
namespace nvvk {
/** @DOC_START
# struct nvvk::RenderPass
> Wrapper for VkRenderingInfoKHR
This class is a wrapper around the VkRenderingInfoKHR structure, which is used to create a render pass with dynamic attachments.
@DOC_END */
#ifdef VK_KHR_dynamic_rendering
struct createRenderingInfo : public VkRenderingInfoKHR
{
createRenderingInfo(VkRect2D renderArea,
const std::vector<VkImageView>& colorViews,
const VkImageView& depthView,
VkAttachmentLoadOp colorLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
VkAttachmentLoadOp depthLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
VkClearColorValue clearColorValue = {{0.f, 0.f, 0.f, 0.f}},
VkClearDepthStencilValue clearDepthValue = {1.f, 0U},
VkRenderingFlagsKHR flags = 0);
VkRenderingAttachmentInfoKHR depthStencilAttachment{VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR};
std::vector<VkRenderingAttachmentInfoKHR> colorAttachments;
};
#endif
} // namespace nvvk

View file

@ -0,0 +1,132 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "error_vk.hpp"
#include <nvh/nvprint.hpp>
namespace nvvk {
CheckResultCallback g_checkResultCallback;
void setCheckResultHook(const CheckResultCallback& callback)
{
g_checkResultCallback = callback;
}
const char* getResultString(VkResult result)
{
const char* resultString = "unknown";
#define STR(a) \
case a: \
resultString = #a; \
break;
switch(result)
{
STR(VK_SUCCESS);
STR(VK_NOT_READY);
STR(VK_TIMEOUT);
STR(VK_EVENT_SET);
STR(VK_EVENT_RESET);
STR(VK_INCOMPLETE);
STR(VK_ERROR_OUT_OF_HOST_MEMORY);
STR(VK_ERROR_OUT_OF_DEVICE_MEMORY);
STR(VK_ERROR_INITIALIZATION_FAILED);
STR(VK_ERROR_DEVICE_LOST);
STR(VK_ERROR_MEMORY_MAP_FAILED);
STR(VK_ERROR_LAYER_NOT_PRESENT);
STR(VK_ERROR_EXTENSION_NOT_PRESENT);
STR(VK_ERROR_FEATURE_NOT_PRESENT);
STR(VK_ERROR_INCOMPATIBLE_DRIVER);
STR(VK_ERROR_TOO_MANY_OBJECTS);
STR(VK_ERROR_FORMAT_NOT_SUPPORTED);
STR(VK_ERROR_FRAGMENTED_POOL);
STR(VK_ERROR_OUT_OF_POOL_MEMORY);
STR(VK_ERROR_INVALID_EXTERNAL_HANDLE);
STR(VK_ERROR_SURFACE_LOST_KHR);
STR(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
STR(VK_SUBOPTIMAL_KHR);
STR(VK_ERROR_OUT_OF_DATE_KHR);
STR(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
STR(VK_ERROR_VALIDATION_FAILED_EXT);
STR(VK_ERROR_INVALID_SHADER_NV);
STR(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
STR(VK_ERROR_FRAGMENTATION_EXT);
STR(VK_ERROR_NOT_PERMITTED_EXT);
STR(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
STR(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
}
#undef STR
return resultString;
}
bool checkResult(VkResult result, const char* message)
{
if(g_checkResultCallback)
return g_checkResultCallback(result, nullptr, -1, message);
if(result == VK_SUCCESS)
{
return false;
}
if(result < 0)
{
if(message)
{
LOGE("VkResult %d - %s - %s\n", result, getResultString(result), message);
}
else
{
LOGE("VkResult %d - %s\n", result, getResultString(result));
}
assert(!"Critical Vulkan Error");
return true;
}
return false;
}
//--------------------------------------------------------------------------------------------------
// Check the result of Vulkan and in case of error, provide a string about what happened
//
bool checkResult(VkResult result, const char* file, int32_t line)
{
if(g_checkResultCallback)
return g_checkResultCallback(result, file, line, nullptr);
if(result == VK_SUCCESS)
{
return false;
}
if(result < 0)
{
LOGE("%s(%d): Vulkan Error : %s\n", file, line, getResultString(result));
assert(!"Critical Vulkan Error");
return true;
}
return false;
}
} // namespace nvvk

View file

@ -0,0 +1,51 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
//////////////////////////////////////////////////////////////////////////
/** @DOC_START
# Function nvvk::checkResult
> Returns true on critical error result, logs errors.
Use `NVVK_CHECK(result)` to automatically log filename/linenumber.
@DOC_END */
#pragma once
#include <cassert>
#include <functional>
#include <vulkan/vulkan_core.h>
namespace nvvk {
bool checkResult(VkResult result, const char* message = nullptr);
bool checkResult(VkResult result, const char* file, int32_t line);
/** @DOC_START
# Function nvvk::setCheckResultHook
> Allow replacing nvvk::checkResult() calls. E.g. to catch
`VK_ERROR_DEVICE_LOST` and wait for aftermath to write the crash dump.
@DOC_END */
using CheckResultCallback = std::function<bool(VkResult, const char*, int32_t, const char*)>;
void setCheckResultHook(const CheckResultCallback& callback);
#ifndef NVVK_CHECK
#define NVVK_CHECK(result) nvvk::checkResult(result, __FILE__, __LINE__)
#endif
} // namespace nvvk

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,589 @@
/*
* Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <vulkan/vulkan.h>
/** @DOC_START
# Function load_VK_EXTENSIONS
> load_VK_EXTENSIONS : Vulkan Extension Loader
The extensions_vk files takes care of loading and providing the symbols of Vulkan C Api extensions.
It is generated by `extensions_vk.py` and generates all extensions found in vk.xml. See script for details.
.
The framework triggers this implicitly in the `nvvk::Context` class, immediately after creating the device.
```cpp
// loads all known extensions
load_VK_EXTENSIONS(instance, vkGetInstanceProcAddr, device, vkGetDeviceProcAddr);
```
@DOC_END */
/* Load all avaiable extensions */
void load_VK_EXTENSIONS(VkInstance instance, PFN_vkGetInstanceProcAddr getInstanceProcAddr, VkDevice device, PFN_vkGetDeviceProcAddr getDeviceProcAddr);
/* NVVK_GENERATE_DEFINE */
#if defined(VK_AMDX_shader_enqueue)
#define NVVK_HAS_VK_AMDX_shader_enqueue
#endif /* VK_AMDX_shader_enqueue */
#if defined(VK_AMD_buffer_marker)
#define NVVK_HAS_VK_AMD_buffer_marker
#endif /* VK_AMD_buffer_marker */
#if defined(VK_AMD_display_native_hdr)
#define NVVK_HAS_VK_AMD_display_native_hdr
#endif /* VK_AMD_display_native_hdr */
#if defined(VK_AMD_draw_indirect_count)
#define NVVK_HAS_VK_AMD_draw_indirect_count
#endif /* VK_AMD_draw_indirect_count */
#if defined(VK_AMD_shader_info)
#define NVVK_HAS_VK_AMD_shader_info
#endif /* VK_AMD_shader_info */
#if defined(VK_ANDROID_external_memory_android_hardware_buffer)
#define NVVK_HAS_VK_ANDROID_external_memory_android_hardware_buffer
#endif /* VK_ANDROID_external_memory_android_hardware_buffer */
#if defined(VK_EXT_acquire_drm_display)
#define NVVK_HAS_VK_EXT_acquire_drm_display
#endif /* VK_EXT_acquire_drm_display */
#if defined(VK_EXT_acquire_xlib_display)
#define NVVK_HAS_VK_EXT_acquire_xlib_display
#endif /* VK_EXT_acquire_xlib_display */
#if defined(VK_EXT_attachment_feedback_loop_dynamic_state)
#define NVVK_HAS_VK_EXT_attachment_feedback_loop_dynamic_state
#endif /* VK_EXT_attachment_feedback_loop_dynamic_state */
#if defined(VK_EXT_buffer_device_address)
#define NVVK_HAS_VK_EXT_buffer_device_address
#endif /* VK_EXT_buffer_device_address */
#if defined(VK_EXT_calibrated_timestamps)
#define NVVK_HAS_VK_EXT_calibrated_timestamps
#endif /* VK_EXT_calibrated_timestamps */
#if defined(VK_EXT_color_write_enable)
#define NVVK_HAS_VK_EXT_color_write_enable
#endif /* VK_EXT_color_write_enable */
#if defined(VK_EXT_conditional_rendering)
#define NVVK_HAS_VK_EXT_conditional_rendering
#endif /* VK_EXT_conditional_rendering */
#if defined(VK_EXT_debug_marker)
#define NVVK_HAS_VK_EXT_debug_marker
#endif /* VK_EXT_debug_marker */
#if defined(VK_EXT_debug_report)
#define NVVK_HAS_VK_EXT_debug_report
#endif /* VK_EXT_debug_report */
#if defined(VK_EXT_debug_utils)
#define NVVK_HAS_VK_EXT_debug_utils
#endif /* VK_EXT_debug_utils */
#if defined(VK_EXT_depth_bias_control)
#define NVVK_HAS_VK_EXT_depth_bias_control
#endif /* VK_EXT_depth_bias_control */
#if defined(VK_EXT_descriptor_buffer)
#define NVVK_HAS_VK_EXT_descriptor_buffer
#endif /* VK_EXT_descriptor_buffer */
#if defined(VK_EXT_descriptor_buffer) && (defined(VK_KHR_acceleration_structure) || defined(VK_NV_ray_tracing))
#define NVVK_HAS_VK_EXT_descriptor_buffer
#endif /* VK_EXT_descriptor_buffer && (VK_KHR_acceleration_structure || VK_NV_ray_tracing) */
#if defined(VK_EXT_device_fault)
#define NVVK_HAS_VK_EXT_device_fault
#endif /* VK_EXT_device_fault */
#if defined(VK_EXT_direct_mode_display)
#define NVVK_HAS_VK_EXT_direct_mode_display
#endif /* VK_EXT_direct_mode_display */
#if defined(VK_EXT_directfb_surface)
#define NVVK_HAS_VK_EXT_directfb_surface
#endif /* VK_EXT_directfb_surface */
#if defined(VK_EXT_discard_rectangles)
#define NVVK_HAS_VK_EXT_discard_rectangles
#endif /* VK_EXT_discard_rectangles */
#if defined(VK_EXT_discard_rectangles) && VK_EXT_DISCARD_RECTANGLES_SPEC_VERSION >= 2
#define NVVK_HAS_VK_EXT_discard_rectangles
#endif /* VK_EXT_discard_rectangles && VK_EXT_DISCARD_RECTANGLES_SPEC_VERSION >= 2 */
#if defined(VK_EXT_display_control)
#define NVVK_HAS_VK_EXT_display_control
#endif /* VK_EXT_display_control */
#if defined(VK_EXT_display_surface_counter)
#define NVVK_HAS_VK_EXT_display_surface_counter
#endif /* VK_EXT_display_surface_counter */
#if defined(VK_EXT_extended_dynamic_state)
#define NVVK_HAS_VK_EXT_extended_dynamic_state
#endif /* VK_EXT_extended_dynamic_state */
#if defined(VK_EXT_extended_dynamic_state2)
#define NVVK_HAS_VK_EXT_extended_dynamic_state2
#endif /* VK_EXT_extended_dynamic_state2 */
#if defined(VK_EXT_extended_dynamic_state3)
#define NVVK_HAS_VK_EXT_extended_dynamic_state3
#endif /* VK_EXT_extended_dynamic_state3 */
#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_clip_space_w_scaling)
#define NVVK_HAS_VK_EXT_extended_dynamic_state3
#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_clip_space_w_scaling */
#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_viewport_swizzle)
#define NVVK_HAS_VK_EXT_extended_dynamic_state3
#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_viewport_swizzle */
#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_fragment_coverage_to_color)
#define NVVK_HAS_VK_EXT_extended_dynamic_state3
#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_fragment_coverage_to_color */
#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_framebuffer_mixed_samples)
#define NVVK_HAS_VK_EXT_extended_dynamic_state3
#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_framebuffer_mixed_samples */
#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_shading_rate_image)
#define NVVK_HAS_VK_EXT_extended_dynamic_state3
#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_shading_rate_image */
#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_representative_fragment_test)
#define NVVK_HAS_VK_EXT_extended_dynamic_state3
#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_representative_fragment_test */
#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_coverage_reduction_mode)
#define NVVK_HAS_VK_EXT_extended_dynamic_state3
#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_coverage_reduction_mode */
#if defined(VK_EXT_external_memory_host)
#define NVVK_HAS_VK_EXT_external_memory_host
#endif /* VK_EXT_external_memory_host */
#if defined(VK_EXT_full_screen_exclusive)
#define NVVK_HAS_VK_EXT_full_screen_exclusive
#endif /* VK_EXT_full_screen_exclusive */
#if defined(VK_EXT_full_screen_exclusive) && defined(VK_KHR_device_group)
#define NVVK_HAS_VK_EXT_full_screen_exclusive
#endif /* VK_EXT_full_screen_exclusive && VK_KHR_device_group */
#if defined(VK_EXT_full_screen_exclusive) && defined(VK_VERSION_1_1)
#define NVVK_HAS_VK_EXT_full_screen_exclusive
#endif /* VK_EXT_full_screen_exclusive && VK_VERSION_1_1 */
#if defined(VK_EXT_hdr_metadata)
#define NVVK_HAS_VK_EXT_hdr_metadata
#endif /* VK_EXT_hdr_metadata */
#if defined(VK_EXT_headless_surface)
#define NVVK_HAS_VK_EXT_headless_surface
#endif /* VK_EXT_headless_surface */
#if defined(VK_EXT_host_image_copy)
#define NVVK_HAS_VK_EXT_host_image_copy
#endif /* VK_EXT_host_image_copy */
#if defined(VK_EXT_host_query_reset)
#define NVVK_HAS_VK_EXT_host_query_reset
#endif /* VK_EXT_host_query_reset */
#if defined(VK_EXT_image_compression_control)
#define NVVK_HAS_VK_EXT_image_compression_control
#endif /* VK_EXT_image_compression_control */
#if defined(VK_EXT_image_drm_format_modifier)
#define NVVK_HAS_VK_EXT_image_drm_format_modifier
#endif /* VK_EXT_image_drm_format_modifier */
#if defined(VK_EXT_line_rasterization)
#define NVVK_HAS_VK_EXT_line_rasterization
#endif /* VK_EXT_line_rasterization */
#if defined(VK_EXT_mesh_shader)
#define NVVK_HAS_VK_EXT_mesh_shader
#endif /* VK_EXT_mesh_shader */
#if defined(VK_EXT_metal_objects)
#define NVVK_HAS_VK_EXT_metal_objects
#endif /* VK_EXT_metal_objects */
#if defined(VK_EXT_metal_surface)
#define NVVK_HAS_VK_EXT_metal_surface
#endif /* VK_EXT_metal_surface */
#if defined(VK_EXT_multi_draw)
#define NVVK_HAS_VK_EXT_multi_draw
#endif /* VK_EXT_multi_draw */
#if defined(VK_EXT_opacity_micromap)
#define NVVK_HAS_VK_EXT_opacity_micromap
#endif /* VK_EXT_opacity_micromap */
#if defined(VK_EXT_pageable_device_local_memory)
#define NVVK_HAS_VK_EXT_pageable_device_local_memory
#endif /* VK_EXT_pageable_device_local_memory */
#if defined(VK_EXT_pipeline_properties)
#define NVVK_HAS_VK_EXT_pipeline_properties
#endif /* VK_EXT_pipeline_properties */
#if defined(VK_EXT_private_data)
#define NVVK_HAS_VK_EXT_private_data
#endif /* VK_EXT_private_data */
#if defined(VK_EXT_sample_locations)
#define NVVK_HAS_VK_EXT_sample_locations
#endif /* VK_EXT_sample_locations */
#if defined(VK_EXT_shader_module_identifier)
#define NVVK_HAS_VK_EXT_shader_module_identifier
#endif /* VK_EXT_shader_module_identifier */
#if defined(VK_EXT_shader_object)
#define NVVK_HAS_VK_EXT_shader_object
#endif /* VK_EXT_shader_object */
#if defined(VK_EXT_shader_object) && defined(VK_NV_clip_space_w_scaling)
#define NVVK_HAS_VK_EXT_shader_object
#endif /* VK_EXT_shader_object && VK_NV_clip_space_w_scaling */
#if defined(VK_EXT_shader_object) && defined(VK_NV_viewport_swizzle)
#define NVVK_HAS_VK_EXT_shader_object
#endif /* VK_EXT_shader_object && VK_NV_viewport_swizzle */
#if defined(VK_EXT_shader_object) && defined(VK_NV_fragment_coverage_to_color)
#define NVVK_HAS_VK_EXT_shader_object
#endif /* VK_EXT_shader_object && VK_NV_fragment_coverage_to_color */
#if defined(VK_EXT_shader_object) && defined(VK_NV_framebuffer_mixed_samples)
#define NVVK_HAS_VK_EXT_shader_object
#endif /* VK_EXT_shader_object && VK_NV_framebuffer_mixed_samples */
#if defined(VK_EXT_shader_object) && defined(VK_NV_shading_rate_image)
#define NVVK_HAS_VK_EXT_shader_object
#endif /* VK_EXT_shader_object && VK_NV_shading_rate_image */
#if defined(VK_EXT_shader_object) && defined(VK_NV_representative_fragment_test)
#define NVVK_HAS_VK_EXT_shader_object
#endif /* VK_EXT_shader_object && VK_NV_representative_fragment_test */
#if defined(VK_EXT_shader_object) && defined(VK_NV_coverage_reduction_mode)
#define NVVK_HAS_VK_EXT_shader_object
#endif /* VK_EXT_shader_object && VK_NV_coverage_reduction_mode */
#if defined(VK_EXT_swapchain_maintenance1)
#define NVVK_HAS_VK_EXT_swapchain_maintenance1
#endif /* VK_EXT_swapchain_maintenance1 */
#if defined(VK_EXT_tooling_info)
#define NVVK_HAS_VK_EXT_tooling_info
#endif /* VK_EXT_tooling_info */
#if defined(VK_EXT_transform_feedback)
#define NVVK_HAS_VK_EXT_transform_feedback
#endif /* VK_EXT_transform_feedback */
#if defined(VK_EXT_validation_cache)
#define NVVK_HAS_VK_EXT_validation_cache
#endif /* VK_EXT_validation_cache */
#if defined(VK_EXT_vertex_input_dynamic_state)
#define NVVK_HAS_VK_EXT_vertex_input_dynamic_state
#endif /* VK_EXT_vertex_input_dynamic_state */
#if defined(VK_FUCHSIA_buffer_collection)
#define NVVK_HAS_VK_FUCHSIA_buffer_collection
#endif /* VK_FUCHSIA_buffer_collection */
#if defined(VK_FUCHSIA_external_memory)
#define NVVK_HAS_VK_FUCHSIA_external_memory
#endif /* VK_FUCHSIA_external_memory */
#if defined(VK_FUCHSIA_external_semaphore)
#define NVVK_HAS_VK_FUCHSIA_external_semaphore
#endif /* VK_FUCHSIA_external_semaphore */
#if defined(VK_FUCHSIA_imagepipe_surface)
#define NVVK_HAS_VK_FUCHSIA_imagepipe_surface
#endif /* VK_FUCHSIA_imagepipe_surface */
#if defined(VK_GGP_stream_descriptor_surface)
#define NVVK_HAS_VK_GGP_stream_descriptor_surface
#endif /* VK_GGP_stream_descriptor_surface */
#if defined(VK_GOOGLE_display_timing)
#define NVVK_HAS_VK_GOOGLE_display_timing
#endif /* VK_GOOGLE_display_timing */
#if defined(VK_HUAWEI_cluster_culling_shader)
#define NVVK_HAS_VK_HUAWEI_cluster_culling_shader
#endif /* VK_HUAWEI_cluster_culling_shader */
#if defined(VK_HUAWEI_invocation_mask)
#define NVVK_HAS_VK_HUAWEI_invocation_mask
#endif /* VK_HUAWEI_invocation_mask */
#if defined(VK_HUAWEI_subpass_shading)
#define NVVK_HAS_VK_HUAWEI_subpass_shading
#endif /* VK_HUAWEI_subpass_shading */
#if defined(VK_INTEL_performance_query)
#define NVVK_HAS_VK_INTEL_performance_query
#endif /* VK_INTEL_performance_query */
#if defined(VK_KHR_acceleration_structure)
#define NVVK_HAS_VK_KHR_acceleration_structure
#endif /* VK_KHR_acceleration_structure */
#if defined(VK_KHR_android_surface)
#define NVVK_HAS_VK_KHR_android_surface
#endif /* VK_KHR_android_surface */
#if defined(VK_KHR_bind_memory2)
#define NVVK_HAS_VK_KHR_bind_memory2
#endif /* VK_KHR_bind_memory2 */
#if defined(VK_KHR_buffer_device_address)
#define NVVK_HAS_VK_KHR_buffer_device_address
#endif /* VK_KHR_buffer_device_address */
#if defined(VK_KHR_calibrated_timestamps)
#define NVVK_HAS_VK_KHR_calibrated_timestamps
#endif /* VK_KHR_calibrated_timestamps */
#if defined(VK_KHR_cooperative_matrix)
#define NVVK_HAS_VK_KHR_cooperative_matrix
#endif /* VK_KHR_cooperative_matrix */
#if defined(VK_KHR_copy_commands2)
#define NVVK_HAS_VK_KHR_copy_commands2
#endif /* VK_KHR_copy_commands2 */
#if defined(VK_KHR_create_renderpass2)
#define NVVK_HAS_VK_KHR_create_renderpass2
#endif /* VK_KHR_create_renderpass2 */
#if defined(VK_KHR_deferred_host_operations)
#define NVVK_HAS_VK_KHR_deferred_host_operations
#endif /* VK_KHR_deferred_host_operations */
#if defined(VK_KHR_descriptor_update_template)
#define NVVK_HAS_VK_KHR_descriptor_update_template
#endif /* VK_KHR_descriptor_update_template */
#if defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)
#define NVVK_HAS_VK_KHR_descriptor_update_template
#endif /* VK_KHR_descriptor_update_template && VK_KHR_push_descriptor */
#if defined(VK_KHR_device_group)
#define NVVK_HAS_VK_KHR_device_group
#endif /* VK_KHR_device_group */
#if defined(VK_KHR_device_group) && defined(VK_KHR_surface)
#define NVVK_HAS_VK_KHR_device_group
#endif /* VK_KHR_device_group && VK_KHR_surface */
#if defined(VK_KHR_device_group) && defined(VK_KHR_swapchain)
#define NVVK_HAS_VK_KHR_device_group
#endif /* VK_KHR_device_group && VK_KHR_swapchain */
#if defined(VK_KHR_device_group_creation)
#define NVVK_HAS_VK_KHR_device_group_creation
#endif /* VK_KHR_device_group_creation */
#if defined(VK_KHR_draw_indirect_count)
#define NVVK_HAS_VK_KHR_draw_indirect_count
#endif /* VK_KHR_draw_indirect_count */
#if defined(VK_KHR_dynamic_rendering)
#define NVVK_HAS_VK_KHR_dynamic_rendering
#endif /* VK_KHR_dynamic_rendering */
#if defined(VK_KHR_external_fence_capabilities)
#define NVVK_HAS_VK_KHR_external_fence_capabilities
#endif /* VK_KHR_external_fence_capabilities */
#if defined(VK_KHR_external_fence_fd)
#define NVVK_HAS_VK_KHR_external_fence_fd
#endif /* VK_KHR_external_fence_fd */
#if defined(VK_KHR_external_fence_win32)
#define NVVK_HAS_VK_KHR_external_fence_win32
#endif /* VK_KHR_external_fence_win32 */
#if defined(VK_KHR_external_memory_capabilities)
#define NVVK_HAS_VK_KHR_external_memory_capabilities
#endif /* VK_KHR_external_memory_capabilities */
#if defined(VK_KHR_external_memory_fd)
#define NVVK_HAS_VK_KHR_external_memory_fd
#endif /* VK_KHR_external_memory_fd */
#if defined(VK_KHR_external_memory_win32)
#define NVVK_HAS_VK_KHR_external_memory_win32
#endif /* VK_KHR_external_memory_win32 */
#if defined(VK_KHR_external_semaphore_capabilities)
#define NVVK_HAS_VK_KHR_external_semaphore_capabilities
#endif /* VK_KHR_external_semaphore_capabilities */
#if defined(VK_KHR_external_semaphore_fd)
#define NVVK_HAS_VK_KHR_external_semaphore_fd
#endif /* VK_KHR_external_semaphore_fd */
#if defined(VK_KHR_external_semaphore_win32)
#define NVVK_HAS_VK_KHR_external_semaphore_win32
#endif /* VK_KHR_external_semaphore_win32 */
#if defined(VK_KHR_fragment_shading_rate)
#define NVVK_HAS_VK_KHR_fragment_shading_rate
#endif /* VK_KHR_fragment_shading_rate */
#if defined(VK_KHR_get_memory_requirements2)
#define NVVK_HAS_VK_KHR_get_memory_requirements2
#endif /* VK_KHR_get_memory_requirements2 */
#if defined(VK_KHR_get_physical_device_properties2)
#define NVVK_HAS_VK_KHR_get_physical_device_properties2
#endif /* VK_KHR_get_physical_device_properties2 */
#if defined(VK_KHR_maintenance1)
#define NVVK_HAS_VK_KHR_maintenance1
#endif /* VK_KHR_maintenance1 */
#if defined(VK_KHR_maintenance3)
#define NVVK_HAS_VK_KHR_maintenance3
#endif /* VK_KHR_maintenance3 */
#if defined(VK_KHR_maintenance4)
#define NVVK_HAS_VK_KHR_maintenance4
#endif /* VK_KHR_maintenance4 */
#if defined(VK_KHR_maintenance5)
#define NVVK_HAS_VK_KHR_maintenance5
#endif /* VK_KHR_maintenance5 */
#if defined(VK_KHR_maintenance6)
#define NVVK_HAS_VK_KHR_maintenance6
#endif /* VK_KHR_maintenance6 */
#if defined(VK_KHR_maintenance6) && defined(VK_KHR_push_descriptor)
#define NVVK_HAS_VK_KHR_maintenance6
#endif /* VK_KHR_maintenance6 && VK_KHR_push_descriptor */
#if defined(VK_KHR_maintenance6) && defined(VK_EXT_descriptor_buffer)
#define NVVK_HAS_VK_KHR_maintenance6
#endif /* VK_KHR_maintenance6 && VK_EXT_descriptor_buffer */
#if defined(VK_KHR_map_memory2)
#define NVVK_HAS_VK_KHR_map_memory2
#endif /* VK_KHR_map_memory2 */
#if defined(VK_KHR_performance_query)
#define NVVK_HAS_VK_KHR_performance_query
#endif /* VK_KHR_performance_query */
#if defined(VK_KHR_pipeline_executable_properties)
#define NVVK_HAS_VK_KHR_pipeline_executable_properties
#endif /* VK_KHR_pipeline_executable_properties */
#if defined(VK_KHR_present_wait)
#define NVVK_HAS_VK_KHR_present_wait
#endif /* VK_KHR_present_wait */
#if defined(VK_KHR_push_descriptor)
#define NVVK_HAS_VK_KHR_push_descriptor
#endif /* VK_KHR_push_descriptor */
#if defined(VK_KHR_push_descriptor) && defined(VK_VERSION_1_1)
#define NVVK_HAS_VK_KHR_push_descriptor
#endif /* VK_KHR_push_descriptor && VK_VERSION_1_1 */
#if defined(VK_KHR_push_descriptor) && defined(VK_KHR_descriptor_update_template)
#define NVVK_HAS_VK_KHR_push_descriptor
#endif /* VK_KHR_push_descriptor && VK_KHR_descriptor_update_template */
#if defined(VK_KHR_ray_tracing_maintenance1) && defined(VK_KHR_ray_tracing_pipeline)
#define NVVK_HAS_VK_KHR_ray_tracing_maintenance1
#endif /* VK_KHR_ray_tracing_maintenance1 && VK_KHR_ray_tracing_pipeline */
#if defined(VK_KHR_ray_tracing_pipeline)
#define NVVK_HAS_VK_KHR_ray_tracing_pipeline
#endif /* VK_KHR_ray_tracing_pipeline */
#if defined(VK_KHR_sampler_ycbcr_conversion)
#define NVVK_HAS_VK_KHR_sampler_ycbcr_conversion
#endif /* VK_KHR_sampler_ycbcr_conversion */
#if defined(VK_KHR_shared_presentable_image)
#define NVVK_HAS_VK_KHR_shared_presentable_image
#endif /* VK_KHR_shared_presentable_image */
#if defined(VK_KHR_swapchain) && defined(VK_VERSION_1_1)
#define NVVK_HAS_VK_KHR_swapchain
#endif /* VK_KHR_swapchain && VK_VERSION_1_1 */
#if defined(VK_KHR_synchronization2)
#define NVVK_HAS_VK_KHR_synchronization2
#endif /* VK_KHR_synchronization2 */
#if defined(VK_KHR_synchronization2) && defined(VK_AMD_buffer_marker)
#define NVVK_HAS_VK_KHR_synchronization2
#endif /* VK_KHR_synchronization2 && VK_AMD_buffer_marker */
#if defined(VK_KHR_synchronization2) && defined(VK_NV_device_diagnostic_checkpoints)
#define NVVK_HAS_VK_KHR_synchronization2
#endif /* VK_KHR_synchronization2 && VK_NV_device_diagnostic_checkpoints */
#if defined(VK_KHR_timeline_semaphore)
#define NVVK_HAS_VK_KHR_timeline_semaphore
#endif /* VK_KHR_timeline_semaphore */
#if defined(VK_KHR_video_decode_queue)
#define NVVK_HAS_VK_KHR_video_decode_queue
#endif /* VK_KHR_video_decode_queue */
#if defined(VK_KHR_video_encode_queue)
#define NVVK_HAS_VK_KHR_video_encode_queue
#endif /* VK_KHR_video_encode_queue */
#if defined(VK_KHR_video_queue)
#define NVVK_HAS_VK_KHR_video_queue
#endif /* VK_KHR_video_queue */
#if defined(VK_MVK_ios_surface)
#define NVVK_HAS_VK_MVK_ios_surface
#endif /* VK_MVK_ios_surface */
#if defined(VK_MVK_macos_surface)
#define NVVK_HAS_VK_MVK_macos_surface
#endif /* VK_MVK_macos_surface */
#if defined(VK_NN_vi_surface)
#define NVVK_HAS_VK_NN_vi_surface
#endif /* VK_NN_vi_surface */
#if defined(VK_NVX_binary_import)
#define NVVK_HAS_VK_NVX_binary_import
#endif /* VK_NVX_binary_import */
#if defined(VK_NVX_image_view_handle)
#define NVVK_HAS_VK_NVX_image_view_handle
#endif /* VK_NVX_image_view_handle */
#if defined(VK_NV_acquire_winrt_display)
#define NVVK_HAS_VK_NV_acquire_winrt_display
#endif /* VK_NV_acquire_winrt_display */
#if defined(VK_NV_clip_space_w_scaling)
#define NVVK_HAS_VK_NV_clip_space_w_scaling
#endif /* VK_NV_clip_space_w_scaling */
#if defined(VK_NV_cooperative_matrix)
#define NVVK_HAS_VK_NV_cooperative_matrix
#endif /* VK_NV_cooperative_matrix */
#if defined(VK_NV_copy_memory_indirect)
#define NVVK_HAS_VK_NV_copy_memory_indirect
#endif /* VK_NV_copy_memory_indirect */
#if defined(VK_NV_coverage_reduction_mode)
#define NVVK_HAS_VK_NV_coverage_reduction_mode
#endif /* VK_NV_coverage_reduction_mode */
#if defined(VK_NV_cuda_kernel_launch)
#define NVVK_HAS_VK_NV_cuda_kernel_launch
#endif /* VK_NV_cuda_kernel_launch */
#if defined(VK_NV_device_diagnostic_checkpoints)
#define NVVK_HAS_VK_NV_device_diagnostic_checkpoints
#endif /* VK_NV_device_diagnostic_checkpoints */
#if defined(VK_NV_device_generated_commands)
#define NVVK_HAS_VK_NV_device_generated_commands
#endif /* VK_NV_device_generated_commands */
#if defined(VK_NV_device_generated_commands_compute)
#define NVVK_HAS_VK_NV_device_generated_commands_compute
#endif /* VK_NV_device_generated_commands_compute */
#if defined(VK_NV_external_memory_capabilities)
#define NVVK_HAS_VK_NV_external_memory_capabilities
#endif /* VK_NV_external_memory_capabilities */
#if defined(VK_NV_external_memory_rdma)
#define NVVK_HAS_VK_NV_external_memory_rdma
#endif /* VK_NV_external_memory_rdma */
#if defined(VK_NV_external_memory_win32)
#define NVVK_HAS_VK_NV_external_memory_win32
#endif /* VK_NV_external_memory_win32 */
#if defined(VK_NV_fragment_shading_rate_enums)
#define NVVK_HAS_VK_NV_fragment_shading_rate_enums
#endif /* VK_NV_fragment_shading_rate_enums */
#if defined(VK_NV_low_latency2)
#define NVVK_HAS_VK_NV_low_latency2
#endif /* VK_NV_low_latency2 */
#if defined(VK_NV_memory_decompression)
#define NVVK_HAS_VK_NV_memory_decompression
#endif /* VK_NV_memory_decompression */
#if defined(VK_NV_mesh_shader)
#define NVVK_HAS_VK_NV_mesh_shader
#endif /* VK_NV_mesh_shader */
#if defined(VK_NV_optical_flow)
#define NVVK_HAS_VK_NV_optical_flow
#endif /* VK_NV_optical_flow */
#if defined(VK_NV_ray_tracing)
#define NVVK_HAS_VK_NV_ray_tracing
#endif /* VK_NV_ray_tracing */
#if defined(VK_NV_scissor_exclusive) && VK_NV_SCISSOR_EXCLUSIVE_SPEC_VERSION >= 2
#define NVVK_HAS_VK_NV_scissor_exclusive
#endif /* VK_NV_scissor_exclusive && VK_NV_SCISSOR_EXCLUSIVE_SPEC_VERSION >= 2 */
#if defined(VK_NV_scissor_exclusive)
#define NVVK_HAS_VK_NV_scissor_exclusive
#endif /* VK_NV_scissor_exclusive */
#if defined(VK_NV_shading_rate_image)
#define NVVK_HAS_VK_NV_shading_rate_image
#endif /* VK_NV_shading_rate_image */
#if defined(VK_QCOM_tile_properties)
#define NVVK_HAS_VK_QCOM_tile_properties
#endif /* VK_QCOM_tile_properties */
#if defined(VK_QNX_external_memory_screen_buffer)
#define NVVK_HAS_VK_QNX_external_memory_screen_buffer
#endif /* VK_QNX_external_memory_screen_buffer */
#if defined(VK_QNX_screen_surface)
#define NVVK_HAS_VK_QNX_screen_surface
#endif /* VK_QNX_screen_surface */
#if defined(VK_VALVE_descriptor_set_host_mapping)
#define NVVK_HAS_VK_VALVE_descriptor_set_host_mapping
#endif /* VK_VALVE_descriptor_set_host_mapping */
#if defined(VK_EXT_extended_dynamic_state) || defined(VK_EXT_shader_object)
#define NVVK_HAS_VK_EXT_extended_dynamic_state
#endif /* VK_EXT_extended_dynamic_state || VK_EXT_shader_object */
#if defined(VK_EXT_extended_dynamic_state2) || defined(VK_EXT_shader_object)
#define NVVK_HAS_VK_EXT_extended_dynamic_state2
#endif /* VK_EXT_extended_dynamic_state2 || VK_EXT_shader_object */
#if defined(VK_EXT_extended_dynamic_state3) || defined(VK_EXT_shader_object)
#define NVVK_HAS_VK_EXT_extended_dynamic_state3
#endif /* VK_EXT_extended_dynamic_state3 || VK_EXT_shader_object */
#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_clip_space_w_scaling) || defined(VK_EXT_shader_object) && defined(VK_NV_clip_space_w_scaling)
#define NVVK_HAS_VK_EXT_extended_dynamic_state3
#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_clip_space_w_scaling || VK_EXT_shader_object && VK_NV_clip_space_w_scaling */
#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_viewport_swizzle) || defined(VK_EXT_shader_object) && defined(VK_NV_viewport_swizzle)
#define NVVK_HAS_VK_EXT_extended_dynamic_state3
#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_viewport_swizzle || VK_EXT_shader_object && VK_NV_viewport_swizzle */
#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_fragment_coverage_to_color) || defined(VK_EXT_shader_object) && defined(VK_NV_fragment_coverage_to_color)
#define NVVK_HAS_VK_EXT_extended_dynamic_state3
#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_fragment_coverage_to_color || VK_EXT_shader_object && VK_NV_fragment_coverage_to_color */
#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_framebuffer_mixed_samples) || defined(VK_EXT_shader_object) && defined(VK_NV_framebuffer_mixed_samples)
#define NVVK_HAS_VK_EXT_extended_dynamic_state3
#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_framebuffer_mixed_samples || VK_EXT_shader_object && VK_NV_framebuffer_mixed_samples */
#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_shading_rate_image) || defined(VK_EXT_shader_object) && defined(VK_NV_shading_rate_image)
#define NVVK_HAS_VK_EXT_extended_dynamic_state3
#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_shading_rate_image || VK_EXT_shader_object && VK_NV_shading_rate_image */
#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_representative_fragment_test) || defined(VK_EXT_shader_object) && defined(VK_NV_representative_fragment_test)
#define NVVK_HAS_VK_EXT_extended_dynamic_state3
#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_representative_fragment_test || VK_EXT_shader_object && VK_NV_representative_fragment_test */
#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_coverage_reduction_mode) || defined(VK_EXT_shader_object) && defined(VK_NV_coverage_reduction_mode)
#define NVVK_HAS_VK_EXT_extended_dynamic_state3
#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_coverage_reduction_mode || VK_EXT_shader_object && VK_NV_coverage_reduction_mode */
#if defined(VK_EXT_full_screen_exclusive) && defined(VK_KHR_device_group) || defined(VK_EXT_full_screen_exclusive) && defined(VK_VERSION_1_1)
#define NVVK_HAS_VK_EXT_full_screen_exclusive
#endif /* VK_EXT_full_screen_exclusive && VK_KHR_device_group || VK_EXT_full_screen_exclusive && VK_VERSION_1_1 */
#if defined(VK_EXT_host_image_copy) || defined(VK_EXT_image_compression_control)
#define NVVK_HAS_VK_EXT_host_image_copy
#endif /* VK_EXT_host_image_copy || VK_EXT_image_compression_control */
#if defined(VK_EXT_shader_object) || defined(VK_EXT_vertex_input_dynamic_state)
#define NVVK_HAS_VK_EXT_shader_object
#endif /* VK_EXT_shader_object || VK_EXT_vertex_input_dynamic_state */
#if defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor) || defined(VK_KHR_push_descriptor) && defined(VK_VERSION_1_1) || defined(VK_KHR_push_descriptor) && defined(VK_KHR_descriptor_update_template)
#define NVVK_HAS_VK_KHR_descriptor_update_template
#endif /* VK_KHR_descriptor_update_template && VK_KHR_push_descriptor || VK_KHR_push_descriptor && VK_VERSION_1_1 || VK_KHR_push_descriptor && VK_KHR_descriptor_update_template */
#if defined(VK_KHR_device_group) && defined(VK_KHR_surface) || defined(VK_KHR_swapchain) && defined(VK_VERSION_1_1)
#define NVVK_HAS_VK_KHR_device_group
#endif /* VK_KHR_device_group && VK_KHR_surface || VK_KHR_swapchain && VK_VERSION_1_1 */
#if defined(VK_KHR_device_group) && defined(VK_KHR_swapchain) || defined(VK_KHR_swapchain) && defined(VK_VERSION_1_1)
#define NVVK_HAS_VK_KHR_device_group
#endif /* VK_KHR_device_group && VK_KHR_swapchain || VK_KHR_swapchain && VK_VERSION_1_1 */
/* NVVK_GENERATE_DEFINE */

View file

@ -0,0 +1,445 @@
#
# Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
# SPDX-License-Identifier: Apache-2.0
#
#
# The following script originate from Volk (https://github.com/zeux/volk) and adapted to the need
# of nvpro-core samples.
#
# Copyright (c) 2018-2023 Arseny Kapoulkine
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
#!/usr/bin/python3
# This will generate the entry point for all Vulkan extensions
# Code blocks are created and will be replace between
# 'NVVK_GENERATE_'<BLOCK_NAME>
import argparse
import os.path
import urllib
import urllib.request
import xml.etree.ElementTree as etree
import re
from collections import OrderedDict
# Ignoring those extensions because they are part of vulkan-1.lib
ExcludeList = [
"defined(VK_KHR_surface)",
"defined(VK_KHR_win32_surface)",
"defined(VK_KHR_xlib_surface)",
"defined(VK_KHR_wayland_surface)",
"defined(VK_KHR_xcb_surface)",
"defined(VK_KHR_display)",
"defined(VK_KHR_swapchain)",
"defined(VK_KHR_get_surface_capabilities2)",
"defined(VK_KHR_get_display_properties2)",
"defined(VK_KHR_display_swapchain)",
"VK_VERSION_1_0",
"VK_VERSION_1_1",
"VK_VERSION_1_2",
"VK_VERSION_1_3",
]
# Debugging - To be sure that the exclude list is excluding commands
# exported in vulkan-1, populate the list here. If there is a duplicate
# the name of the command and the extension name will be printed out.
ExportedCommands = [] # dumpbin /EXPORTS vulkan-1.lib
# Commands that were added in newer extension revisions.
# Extensions such as VK_EXT_discard_rectangles have had specification revisions
# that added new commands. Since these commands should only be used if the
# extension's `VkExtensionProperties::specVersion` is high enough, this table
# tracks the first `specVersion` in which each newer command was introduced
# (as this information is not currently contained in vk.xml).
cmdversions = {
"vkCmdSetDiscardRectangleEnableEXT": 2,
"vkCmdSetDiscardRectangleModeEXT": 2,
"vkCmdSetExclusiveScissorEnableNV": 2,
}
def parse_xml(path):
# Parsing the Vulkan 'vk.xml' document
file = urllib.request.urlopen(path) if path.startswith("http") else open(path, "r")
with file:
tree = etree.parse(file)
return tree
def patch_file(fileName, blocks):
# Find each section of NVVK_GENERATE_ and replace with block of text
result = []
block = None
scriptDir = os.path.dirname(os.path.realpath(__file__))
path = os.path.join(scriptDir, fileName)
with open(path, "r") as file:
for line in file.readlines():
if block:
if line == block:
result.append(line)
block = None
else:
result.append(line)
# C comment marker
if line.strip().startswith("/* NVVK_GENERATE_"):
block = line
result.append(blocks[line.strip()[17:-3]])
with open(path, "w", newline="\n") as file:
for line in result:
file.write(line)
def is_descendant_type(types, name, base):
# Finding the base type of each type:
# <type category="handle" parent="VkPhysicalDevice"
# objtypeenum="VK_OBJECT_TYPE_DEVICE"><type>VK_DEFINE_HANDLE</type>(<name>VkDevice</name>)</type>
# <type category="handle" parent="VkDevice"
# objtypeenum="VK_OBJECT_TYPE_QUEUE"><type>VK_DEFINE_HANDLE</type>(<name>VkQueue</name>)</type>
if name == base:
return True
type = types.get(name)
if len(type) == 0:
return False
parents = type.get("parent")
if not parents:
return False
return any(
[is_descendant_type(types, parent, base) for parent in parents.split(",")]
)
def defined(key):
return "defined(" + key + ")"
def cdepends(key):
return (
re.sub(r"[a-zA-Z0-9_]+", lambda m: defined(m.group(0)), key)
.replace(",", " || ")
.replace("+", " && ")
)
# Remove "defined(..)"
def remove_defined(input_string):
return re.sub(r"defined\((.*?)\)", r"\1", input_string)
def toStr(txt):
# Return the string if it exist or '' if None
if txt:
return txt
return ""
def get_function(rtype, name, params):
# Returning the function declaration
fct_args = [] # incoming argument
call_args = [] # call arguments
for p in params:
ptype = p.find("type")
pname = p.find("name")
papi = p.attrib.get("api")
# Avoid `vulkansc`
if not papi or papi == "vulkan":
fct_args.append(
"".join(
[
toStr(p.text),
ptype.text,
ptype.tail,
pname.text,
toStr(pname.tail),
]
)
) # 'const', 'vkSome', '*', 'some', '[2]'
call_args.append(pname.text)
# Function signature
fct = "VKAPI_ATTR " + rtype + " VKAPI_CALL " + name + "(\n"
# Arguments of the function
fct += "\t" + ", \n\t".join(fct_args) + ") \n"
fct += "{ \n "
# fct += ' assert(pfn_'+name+');\n'
# Check if the function is returning a value
if rtype != "void":
fct += "return "
fct += "pfn_" + name + "(" + ", ".join(call_args) + "); \n"
fct += "}\n"
return fct
def get_vk_xml_path(spec_arg):
"""
Find the Vulkan specification XML file by looking for (highest priority to
lowest) an incoming `spec` argument, a local copy within the Vulkan SDK,
or by downloading it from KhronosGroup/Vulkan-Docs.
"""
if spec_arg is not None:
return spec_arg
# VULKAN_SDK is a newer version of VK_SDK_PATH. The Linux Tarball Vulkan SDK
# instructions only say to set VULKAN_SDK - so VULKAN_SDK might exist while
# VK_SDK_PATH might not.
vulkan_sdk_env = os.getenv("VULKAN_SDK")
if vulkan_sdk_env is not None:
local_spec_path = os.path.normpath(
vulkan_sdk_env + "/share/vulkan/registry/vk.xml"
)
if os.path.isfile(local_spec_path):
return local_spec_path
# Ubuntu installations might not have VULKAN_SDK set, but have vk.xml in /usr.
if os.path.isfile("/usr/share/vulkan/registry/vk.xml"):
return "/usr/share/vulkan/registry/vk.xml"
print(
"Warning: no `spec` parameter was provided, and vk.xml could not be "
"found in the path given by the VULKAN_SDK environment variable or in "
"system folders. This script will download the latest copy of vk.xml "
"online, which may be incompatible with an installed Vulkan installation."
)
return "https://raw.githubusercontent.com/KhronosGroup/Vulkan-Docs/main/xml/vk.xml"
#
# MAIN Entry
#
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Generates entry points for Vulkan extensions in extensions_vk.cpp."
)
parser.add_argument(
"--beta",
action="store_true",
help="Includes provisional Vulkan extensions; these extensions are not guaranteed to be consistent across Vulkan SDK versions.",
)
parser.add_argument(
"spec",
type=str,
nargs="?",
help="Optional path to a vk.xml file to use to generate extensions. Otherwise, uses the vk.xml in the Vulkan SDK distribution specified in the VULKAN_SDK environment variable.",
)
args = parser.parse_args()
# Retrieving the XML file
specpath = get_vk_xml_path(args.spec)
spec = parse_xml(specpath)
print("Using " + specpath)
# CODE BLOCS
blocks = {}
# CODE BLOCS for generated code
block_keys = ("STATIC_PFN", "LOAD_PROC", "DECLARE", "DEFINE")
for key in block_keys:
blocks[key] = ""
# Retrieving the version of the Vulkan specification
version = spec.find('types/type[name="VK_HEADER_VERSION"]')
blocks["VERSION_INFO"] = (
"// Generated using Vulkan " + version.find("name").tail.strip() + "\n"
)
# Patching the files
patch_file("extensions_vk.cpp", blocks)
# Ordered list of commands per extension group
command_groups = OrderedDict()
instance_commands = set()
for feature in spec.findall("feature"):
api = feature.get("api")
if "vulkan" not in api.split(","):
continue
key = feature.get("name")
cmdrefs = feature.findall("require/command")
command_groups[key] = [cmdref.get("name") for cmdref in cmdrefs]
# Retrieve all extension and sorted alphabetically
for ext in sorted(
spec.findall("extensions/extension"), key=lambda ext: ext.get("name")
):
# Only add the extension if 'vulkan' is part of the support attribute
supported = ext.get("supported")
if "vulkan" not in supported.split(","):
continue
# Discard beta extensions
if ext.get("provisional") == "true" and not args.beta:
continue
name = ext.get("name")
type = ext.get("type") # device or instance
for req in ext.findall("require"):
# Adding all commands for this extension
key = defined(name)
if req.get("feature"): # old-style XML depends specification
for i in req.get("feature").split(","):
key += " && " + defined(i)
if req.get("extension"): # old-style XML depends specification
for i in req.get("extension").split(","):
key += " && " + defined(i)
if req.get("depends"): # new-style XML depends specification
dep = cdepends(req.get("depends"))
key += " && " + ("(" + dep + ")" if "||" in dep else dep)
cmdrefs = req.findall("command")
# Add ifdef section and split commands with high version
for cmdref in cmdrefs:
ver = cmdversions.get(cmdref.get("name"))
if ver:
command_groups.setdefault(
key + " && " + name.upper() + "_SPEC_VERSION >= " + str(ver), []
).append(cmdref.get("name"))
else:
command_groups.setdefault(key, []).append(cmdref.get("name"))
# Adding commands that are 'instance' instead of 'device'
if type == "instance":
for cmdref in cmdrefs:
instance_commands.add(cmdref.get("name"))
# From a command, find which group it's belong
commands_to_groups = OrderedDict()
for group, cmdnames in command_groups.items():
for name in cmdnames:
commands_to_groups.setdefault(name, []).append(group)
for group, cmdnames in command_groups.items():
command_groups[group] = [
name for name in cmdnames if len(commands_to_groups[name]) == 1
]
for name, groups in commands_to_groups.items():
if len(groups) == 1:
continue
key = " || ".join([g for g in groups])
command_groups.setdefault(key, []).append(name)
# Finding the alias name for a function: <command
# name="vkGetPhysicalDeviceExternalBufferPropertiesKHR"
# alias="vkGetPhysicalDeviceExternalBufferProperties"/>
commands = {}
for cmd in spec.findall("commands/command"):
if not cmd.get("alias"):
name = cmd.findtext("proto/name")
commands[name] = cmd
for cmd in spec.findall("commands/command"):
if cmd.get("alias"):
name = cmd.get("name")
commands[name] = commands[cmd.get("alias")]
# Finding all Vulkan types to be use by is_descendant_type
types = {}
for type in spec.findall("types/type"):
name = type.findtext("name")
if name:
types[name] = type
for key in block_keys:
blocks[key] = ""
# For each group, get the list of all commands
for group, cmdnames in command_groups.items():
# Skipping some extensions
if group in ExcludeList:
continue
ifdef = "#if " + group + "\n"
for key in block_keys:
blocks[key] += ifdef
# Name the NVVK_HAS with the first part of the group
ext_name = group
if "&&" in group:
ext_name = group.split("&&")[0].strip()
elif "||" in group:
ext_name = group.split("||")[0].strip()
if ext_name != None:
blocks["DEFINE"] += "#define NVVK_HAS_" + remove_defined(ext_name) + "\n"
# Getting all commands within the group
for name in sorted(cmdnames):
# Finding the 'alias' command
cmd = commands[name]
if name in ExportedCommands:
print("Command " + name + " from group " + group)
# Get the first argument type, which defines if it is an instance
# function
type = cmd.findtext("param[1]/type")
# Create the function declaration block
params = cmd.findall("param")
return_type = cmd.findtext("proto/type")
blocks["DECLARE"] += get_function(return_type, name, params)
# Loading proc address can be device or instance
if (
is_descendant_type(types, type, "VkDevice")
and name not in instance_commands
):
blocks["LOAD_PROC"] += (
" pfn_"
+ name
+ " = (PFN_"
+ name
+ ')getDeviceProcAddr(device, "'
+ name
+ '");\n'
)
elif is_descendant_type(types, type, "VkInstance"):
blocks["LOAD_PROC"] += (
" pfn_"
+ name
+ " = (PFN_"
+ name
+ ')getInstanceProcAddr(instance, "'
+ name
+ '");\n'
)
# Creates the bloc for all static functions
blocks["STATIC_PFN"] += "static PFN_" + name + " pfn_" + name + "= 0;\n"
# Adding the #endif or removing empty blocks
for key in block_keys:
if blocks[key].endswith(ifdef):
blocks[key] = blocks[key][: -len(ifdef)]
else:
blocks[key] += "#endif /* " + remove_defined(group) + " */\n"
# Patching the files
patch_file("extensions_vk.hpp", blocks)
patch_file("extensions_vk.cpp", blocks)

View file

@ -0,0 +1,327 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2020-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "gizmos_vk.hpp"
#include <glm/gtc/type_ptr.hpp>
namespace nvvk {
//#include "E:\temp\glsl\axis.vert.h"
static const uint32_t s_vert_spv[] = {
0x07230203, 0x00010500, 0x0008000a, 0x0000006e, 0x00000000, 0x00020011, 0x00000001, 0x0006000b, 0x00000001,
0x4c534c47, 0x6474732e, 0x3035342e, 0x00000000, 0x0003000e, 0x00000000, 0x00000001, 0x000b000f, 0x00000000,
0x00000004, 0x6e69616d, 0x00000000, 0x0000000c, 0x0000002e, 0x00000032, 0x0000003b, 0x00000041, 0x00000045,
0x00030003, 0x00000002, 0x000001c2, 0x00040005, 0x00000004, 0x6e69616d, 0x00000000, 0x00050005, 0x0000000c,
0x6f727261, 0x65765f77, 0x00007472, 0x00030005, 0x0000002c, 0x00736f70, 0x00060005, 0x0000002e, 0x565f6c67,
0x65747265, 0x646e4978, 0x00007865, 0x00070005, 0x00000032, 0x495f6c67, 0x6174736e, 0x4965636e, 0x7865646e,
0x00000000, 0x00050005, 0x00000039, 0x65746e69, 0x6c6f7072, 0x00746e61, 0x00050006, 0x00000039, 0x00000000,
0x6f6c6f43, 0x00000072, 0x00030005, 0x0000003b, 0x0074754f, 0x00060005, 0x0000003f, 0x505f6c67, 0x65567265,
0x78657472, 0x00000000, 0x00060006, 0x0000003f, 0x00000000, 0x505f6c67, 0x7469736f, 0x006e6f69, 0x00030005,
0x00000041, 0x00000000, 0x00060005, 0x00000043, 0x73755075, 0x6e6f4368, 0x6e617473, 0x00000074, 0x00060006,
0x00000043, 0x00000000, 0x6e617274, 0x726f6673, 0x0000006d, 0x00030005, 0x00000045, 0x00006370, 0x00040047,
0x0000002e, 0x0000000b, 0x0000002a, 0x00040047, 0x00000032, 0x0000000b, 0x0000002b, 0x00030047, 0x00000039,
0x00000002, 0x00040047, 0x0000003b, 0x0000001e, 0x00000000, 0x00050048, 0x0000003f, 0x00000000, 0x0000000b,
0x00000000, 0x00030047, 0x0000003f, 0x00000002, 0x00040048, 0x00000043, 0x00000000, 0x00000005, 0x00050048,
0x00000043, 0x00000000, 0x00000023, 0x00000000, 0x00050048, 0x00000043, 0x00000000, 0x00000007, 0x00000010,
0x00030047, 0x00000043, 0x00000002, 0x00020013, 0x00000002, 0x00030021, 0x00000003, 0x00000002, 0x00030016,
0x00000006, 0x00000020, 0x00040017, 0x00000007, 0x00000006, 0x00000003, 0x00040015, 0x00000008, 0x00000020,
0x00000000, 0x0004002b, 0x00000008, 0x00000009, 0x00000008, 0x0004001c, 0x0000000a, 0x00000007, 0x00000009,
0x00040020, 0x0000000b, 0x00000006, 0x0000000a, 0x0004003b, 0x0000000b, 0x0000000c, 0x00000006, 0x00040015,
0x0000000d, 0x00000020, 0x00000001, 0x0004002b, 0x0000000d, 0x0000000e, 0x00000000, 0x0004002b, 0x00000006,
0x0000000f, 0x3f800000, 0x0004002b, 0x00000006, 0x00000010, 0x00000000, 0x0006002c, 0x00000007, 0x00000011,
0x0000000f, 0x00000010, 0x00000010, 0x00040020, 0x00000012, 0x00000006, 0x00000007, 0x0004002b, 0x0000000d,
0x00000014, 0x00000001, 0x0004002b, 0x00000006, 0x00000015, 0x3f400000, 0x0004002b, 0x00000006, 0x00000016,
0x3dcccccd, 0x0006002c, 0x00000007, 0x00000017, 0x00000015, 0x00000016, 0x00000016, 0x0004002b, 0x0000000d,
0x00000019, 0x00000002, 0x0004002b, 0x00000006, 0x0000001a, 0xbdcccccd, 0x0006002c, 0x00000007, 0x0000001b,
0x00000015, 0x00000016, 0x0000001a, 0x0004002b, 0x0000000d, 0x0000001d, 0x00000003, 0x0006002c, 0x00000007,
0x0000001e, 0x00000015, 0x0000001a, 0x0000001a, 0x0004002b, 0x0000000d, 0x00000020, 0x00000004, 0x0006002c,
0x00000007, 0x00000021, 0x00000015, 0x0000001a, 0x00000016, 0x0004002b, 0x0000000d, 0x00000023, 0x00000005,
0x0004002b, 0x0000000d, 0x00000025, 0x00000006, 0x0006002c, 0x00000007, 0x00000026, 0x00000010, 0x00000010,
0x00000010, 0x0004002b, 0x0000000d, 0x00000028, 0x00000007, 0x0006002c, 0x00000007, 0x00000029, 0x00000015,
0x00000010, 0x00000010, 0x00040020, 0x0000002b, 0x00000007, 0x00000007, 0x00040020, 0x0000002d, 0x00000001,
0x0000000d, 0x0004003b, 0x0000002d, 0x0000002e, 0x00000001, 0x0004003b, 0x0000002d, 0x00000032, 0x00000001,
0x00020014, 0x00000034, 0x00040017, 0x00000038, 0x00000006, 0x00000004, 0x0003001e, 0x00000039, 0x00000038,
0x00040020, 0x0000003a, 0x00000003, 0x00000039, 0x0004003b, 0x0000003a, 0x0000003b, 0x00000003, 0x0007002c,
0x00000038, 0x0000003c, 0x0000000f, 0x00000010, 0x00000010, 0x0000000f, 0x00040020, 0x0000003d, 0x00000003,
0x00000038, 0x0003001e, 0x0000003f, 0x00000038, 0x00040020, 0x00000040, 0x00000003, 0x0000003f, 0x0004003b,
0x00000040, 0x00000041, 0x00000003, 0x00040018, 0x00000042, 0x00000038, 0x00000004, 0x0003001e, 0x00000043,
0x00000042, 0x00040020, 0x00000044, 0x00000009, 0x00000043, 0x0004003b, 0x00000044, 0x00000045, 0x00000009,
0x00040020, 0x00000046, 0x00000009, 0x00000042, 0x0007002c, 0x00000038, 0x00000055, 0x00000010, 0x0000000f,
0x00000010, 0x0000000f, 0x0007002c, 0x00000038, 0x00000062, 0x00000010, 0x00000010, 0x0000000f, 0x0000000f,
0x00050036, 0x00000002, 0x00000004, 0x00000000, 0x00000003, 0x000200f8, 0x00000005, 0x0004003b, 0x0000002b,
0x0000002c, 0x00000007, 0x00050041, 0x00000012, 0x00000013, 0x0000000c, 0x0000000e, 0x0003003e, 0x00000013,
0x00000011, 0x00050041, 0x00000012, 0x00000018, 0x0000000c, 0x00000014, 0x0003003e, 0x00000018, 0x00000017,
0x00050041, 0x00000012, 0x0000001c, 0x0000000c, 0x00000019, 0x0003003e, 0x0000001c, 0x0000001b, 0x00050041,
0x00000012, 0x0000001f, 0x0000000c, 0x0000001d, 0x0003003e, 0x0000001f, 0x0000001e, 0x00050041, 0x00000012,
0x00000022, 0x0000000c, 0x00000020, 0x0003003e, 0x00000022, 0x00000021, 0x00050041, 0x00000012, 0x00000024,
0x0000000c, 0x00000023, 0x0003003e, 0x00000024, 0x00000017, 0x00050041, 0x00000012, 0x00000027, 0x0000000c,
0x00000025, 0x0003003e, 0x00000027, 0x00000026, 0x00050041, 0x00000012, 0x0000002a, 0x0000000c, 0x00000028,
0x0003003e, 0x0000002a, 0x00000029, 0x0004003d, 0x0000000d, 0x0000002f, 0x0000002e, 0x00050041, 0x00000012,
0x00000030, 0x0000000c, 0x0000002f, 0x0004003d, 0x00000007, 0x00000031, 0x00000030, 0x0003003e, 0x0000002c,
0x00000031, 0x0004003d, 0x0000000d, 0x00000033, 0x00000032, 0x000500aa, 0x00000034, 0x00000035, 0x00000033,
0x0000000e, 0x000300f7, 0x00000037, 0x00000000, 0x000400fa, 0x00000035, 0x00000036, 0x00000050, 0x000200f8,
0x00000036, 0x00050041, 0x0000003d, 0x0000003e, 0x0000003b, 0x0000000e, 0x0003003e, 0x0000003e, 0x0000003c,
0x00050041, 0x00000046, 0x00000047, 0x00000045, 0x0000000e, 0x0004003d, 0x00000042, 0x00000048, 0x00000047,
0x0004003d, 0x00000007, 0x00000049, 0x0000002c, 0x00050051, 0x00000006, 0x0000004a, 0x00000049, 0x00000000,
0x00050051, 0x00000006, 0x0000004b, 0x00000049, 0x00000001, 0x00050051, 0x00000006, 0x0000004c, 0x00000049,
0x00000002, 0x00070050, 0x00000038, 0x0000004d, 0x0000004a, 0x0000004b, 0x0000004c, 0x0000000f, 0x00050091,
0x00000038, 0x0000004e, 0x00000048, 0x0000004d, 0x00050041, 0x0000003d, 0x0000004f, 0x00000041, 0x0000000e,
0x0003003e, 0x0000004f, 0x0000004e, 0x000200f9, 0x00000037, 0x000200f8, 0x00000050, 0x0004003d, 0x0000000d,
0x00000051, 0x00000032, 0x000500aa, 0x00000034, 0x00000052, 0x00000051, 0x00000014, 0x000300f7, 0x00000054,
0x00000000, 0x000400fa, 0x00000052, 0x00000053, 0x00000061, 0x000200f8, 0x00000053, 0x00050041, 0x0000003d,
0x00000056, 0x0000003b, 0x0000000e, 0x0003003e, 0x00000056, 0x00000055, 0x00050041, 0x00000046, 0x00000057,
0x00000045, 0x0000000e, 0x0004003d, 0x00000042, 0x00000058, 0x00000057, 0x0004003d, 0x00000007, 0x00000059,
0x0000002c, 0x0008004f, 0x00000007, 0x0000005a, 0x00000059, 0x00000059, 0x00000001, 0x00000000, 0x00000002,
0x00050051, 0x00000006, 0x0000005b, 0x0000005a, 0x00000000, 0x00050051, 0x00000006, 0x0000005c, 0x0000005a,
0x00000001, 0x00050051, 0x00000006, 0x0000005d, 0x0000005a, 0x00000002, 0x00070050, 0x00000038, 0x0000005e,
0x0000005b, 0x0000005c, 0x0000005d, 0x0000000f, 0x00050091, 0x00000038, 0x0000005f, 0x00000058, 0x0000005e,
0x00050041, 0x0000003d, 0x00000060, 0x00000041, 0x0000000e, 0x0003003e, 0x00000060, 0x0000005f, 0x000200f9,
0x00000054, 0x000200f8, 0x00000061, 0x00050041, 0x0000003d, 0x00000063, 0x0000003b, 0x0000000e, 0x0003003e,
0x00000063, 0x00000062, 0x00050041, 0x00000046, 0x00000064, 0x00000045, 0x0000000e, 0x0004003d, 0x00000042,
0x00000065, 0x00000064, 0x0004003d, 0x00000007, 0x00000066, 0x0000002c, 0x0008004f, 0x00000007, 0x00000067,
0x00000066, 0x00000066, 0x00000001, 0x00000002, 0x00000000, 0x00050051, 0x00000006, 0x00000068, 0x00000067,
0x00000000, 0x00050051, 0x00000006, 0x00000069, 0x00000067, 0x00000001, 0x00050051, 0x00000006, 0x0000006a,
0x00000067, 0x00000002, 0x00070050, 0x00000038, 0x0000006b, 0x00000068, 0x00000069, 0x0000006a, 0x0000000f,
0x00050091, 0x00000038, 0x0000006c, 0x00000065, 0x0000006b, 0x00050041, 0x0000003d, 0x0000006d, 0x00000041,
0x0000000e, 0x0003003e, 0x0000006d, 0x0000006c, 0x000200f9, 0x00000054, 0x000200f8, 0x00000054, 0x000200f9,
0x00000037, 0x000200f8, 0x00000037, 0x000100fd, 0x00010038};
//#include "E:\temp\glsl\axis.frag.h"
static const uint32_t s_frag_spv[] = {
0x07230203, 0x00010500, 0x0008000a, 0x00000012, 0x00000000, 0x00020011, 0x00000001, 0x0006000b, 0x00000001,
0x4c534c47, 0x6474732e, 0x3035342e, 0x00000000, 0x0003000e, 0x00000000, 0x00000001, 0x0007000f, 0x00000004,
0x00000004, 0x6e69616d, 0x00000000, 0x00000009, 0x0000000c, 0x00030010, 0x00000004, 0x00000007, 0x00030003,
0x00000002, 0x000001c2, 0x00040005, 0x00000004, 0x6e69616d, 0x00000000, 0x00040005, 0x00000009, 0x6c6f4366,
0x0000726f, 0x00050005, 0x0000000a, 0x65746e69, 0x6c6f7072, 0x00746e61, 0x00050006, 0x0000000a, 0x00000000,
0x6f6c6f43, 0x00000072, 0x00030005, 0x0000000c, 0x00006e49, 0x00040047, 0x00000009, 0x0000001e, 0x00000000,
0x00030047, 0x0000000a, 0x00000002, 0x00040047, 0x0000000c, 0x0000001e, 0x00000000, 0x00020013, 0x00000002,
0x00030021, 0x00000003, 0x00000002, 0x00030016, 0x00000006, 0x00000020, 0x00040017, 0x00000007, 0x00000006,
0x00000004, 0x00040020, 0x00000008, 0x00000003, 0x00000007, 0x0004003b, 0x00000008, 0x00000009, 0x00000003,
0x0003001e, 0x0000000a, 0x00000007, 0x00040020, 0x0000000b, 0x00000001, 0x0000000a, 0x0004003b, 0x0000000b,
0x0000000c, 0x00000001, 0x00040015, 0x0000000d, 0x00000020, 0x00000001, 0x0004002b, 0x0000000d, 0x0000000e,
0x00000000, 0x00040020, 0x0000000f, 0x00000001, 0x00000007, 0x00050036, 0x00000002, 0x00000004, 0x00000000,
0x00000003, 0x000200f8, 0x00000005, 0x00050041, 0x0000000f, 0x00000010, 0x0000000c, 0x0000000e, 0x0004003d,
0x00000007, 0x00000011, 0x00000010, 0x0003003e, 0x00000009, 0x00000011, 0x000100fd, 0x00010038};
//--------------------------------------------------------------------------------------------------
//
//
void AxisVK::display(VkCommandBuffer cmdBuf, const glm::mat4& transform, const VkExtent2D& screenSize)
{
vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineTriangleFan);
// Setup viewport:
VkViewport viewport{};
viewport.width = float(screenSize.width);
viewport.height = float(screenSize.height);
viewport.minDepth = 0;
viewport.maxDepth = 1;
VkRect2D rect;
rect.offset = VkOffset2D{0, 0};
rect.extent = VkExtent2D{screenSize.width, screenSize.height};
vkCmdSetViewport(cmdBuf, 0, 1, &viewport);
vkCmdSetScissor(cmdBuf, 0, 1, &rect);
// Set the orthographic matrix in the bottom left corner
{
const float pixelW = m_axisSize / screenSize.width;
const float pixelH = m_axisSize / screenSize.height;
const glm::mat4 matOrtho = {pixelW * .8f, 0.0f, 0.0f, 0.0f, //
0.0f, -pixelH * .8f, 0.0f, 0.0f, //
0.0f, 0.0f, -0.5f, 0.0f, //
-1.f + pixelW, 1.f - pixelH, 0.5f, 1.0f};
glm::mat4 modelView = transform;
modelView[3] = glm::vec4(0, 0, 0, 1);
modelView = matOrtho * modelView;
// Push the matrix to the shader
vkCmdPushConstants(cmdBuf, m_pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(glm::mat4), glm::value_ptr(modelView));
}
// Draw 3 times the tip of the arrow, the shader is flipping the orientation and setting the color
vkCmdDraw(cmdBuf, 6, 3, 0, 0);
// Now draw the line of the arrow using the last 2 vertex of the buffer (offset 5)
vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLines);
vkCmdDraw(cmdBuf, 2, 3, 6, 0);
}
void AxisVK::createAxisObject(CreateAxisInfo& info)
{
// The shader need Push Constants: the transformation matrix
const VkPushConstantRange push_constants{VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(glm::mat4)};
VkPipelineLayoutCreateInfo layout_info{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
layout_info.pushConstantRangeCount = 1;
layout_info.pPushConstantRanges = &push_constants;
vkCreatePipelineLayout(m_device, &layout_info, nullptr, &m_pipelineLayout);
// Creation of the pipeline
VkShaderModule smVertex;
VkShaderModule smFrag;
VkShaderModuleCreateInfo createInfo{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
createInfo.codeSize = sizeof(s_vert_spv);
createInfo.pCode = s_vert_spv;
vkCreateShaderModule(m_device, &createInfo, nullptr, &smVertex);
createInfo.codeSize = sizeof(s_frag_spv);
createInfo.pCode = s_frag_spv;
vkCreateShaderModule(m_device, &createInfo, nullptr, &smFrag);
// Pipeline state
nvvk::GraphicsPipelineState gps;
gps.inputAssemblyState.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
gps.rasterizationState.cullMode = VK_CULL_MODE_NONE;
gps.depthStencilState.depthTestEnable = VK_TRUE;
gps.depthStencilState.stencilTestEnable = VK_FALSE;
gps.depthStencilState.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL;
// Creating the tips
nvvk::GraphicsPipelineGenerator gpg(m_device, m_pipelineLayout, info.renderPass, gps);
gpg.addShader(smVertex, VK_SHADER_STAGE_VERTEX_BIT);
gpg.addShader(smFrag, VK_SHADER_STAGE_FRAGMENT_BIT);
// Dynamic Rendering
VkPipelineRenderingCreateInfoKHR rfInfo{VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR};
if(info.renderPass == VK_NULL_HANDLE)
{
rfInfo.colorAttachmentCount = static_cast<uint32_t>(info.colorFormat.size());
rfInfo.pColorAttachmentFormats = info.colorFormat.data();
rfInfo.depthAttachmentFormat = info.depthFormat;
rfInfo.stencilAttachmentFormat = info.stencilFormat;
gpg.createInfo.pNext = &rfInfo;
}
m_pipelineTriangleFan = gpg.createPipeline();
// Creating the lines
gps.inputAssemblyState.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
m_pipelineLines = gpg.createPipeline();
vkDestroyShaderModule(m_device, smVertex, nullptr);
vkDestroyShaderModule(m_device, smFrag, nullptr);
}
// glsl_shader.vert, compiled with: (see comment)
/*************************************************
#version 450 core
// glslangValidator.exe --target-env vulkan1.2 --vn s_vert_spv -o axis.vert.h axis.vert
layout(push_constant) uniform uPushConstant
{
mat4 transform;
}
pc;
out gl_PerVertex
{
vec4 gl_Position;
};
layout(location = 0) out interpolant
{
vec4 Color;
}
Out;
// Arrow along the x axis
const float asize = 1.0f; // lenght of arrow
const float atip = 0.1f; // width of arrow tip
const float abase = 0.66f; // 0.25 == tip lenght
vec3 arrow_vert[8];
void main()
{
arrow_vert[0] = vec3(asize, 0, 0); // Tip
arrow_vert[1] = vec3(abase, atip, atip);
arrow_vert[2] = vec3(abase, atip, -atip);
arrow_vert[3] = vec3(abase, -atip, -atip);
arrow_vert[4] = vec3(abase, -atip, atip);
arrow_vert[5] = vec3(abase, atip, atip);
arrow_vert[6] = vec3(0, 0, 0); // To draw the line
arrow_vert[7] = vec3(abase, 0, 0); // To draw the line
// const float t = 0.04f;
// arrow_vert[6] = vec3(0, t, t); // To draw the line
// arrow_vert[7] = vec3(abase, t, t); // To draw the line
// arrow_vert[8] = vec3(0, -t, t); // To draw the line
// arrow_vert[9] = vec3(abase, -t, t); // To draw the line
// //
// arrow_vert[10] = vec3(0, -t, -t); // To draw the line
// arrow_vert[11] = vec3(abase, -t, -t); // To draw the line
// //
// arrow_vert[12] = vec3(0, t, -t); // To draw the line
// arrow_vert[13] = vec3(abase, t, -t); // To draw the line
//
// arrow_vert[14] = vec3(0, t, t); // To draw the line
// arrow_vert[15] = vec3(abase, t, t); // To draw the line
vec3 pos = arrow_vert[gl_VertexIndex];
// Out.Color = aColor;
if (gl_InstanceIndex == 0)
{
Out.Color = vec4(1, 0, 0, 1);
gl_Position = pc.transform * vec4(pos.xyz, 1);
}
else if (gl_InstanceIndex == 1)
{
Out.Color = vec4(0, 1, 0, 1);
gl_Position = pc.transform * vec4(pos.yxz, 1);
}
else
{
Out.Color = vec4(0, 0, 1, 1);
gl_Position = pc.transform * vec4(pos.yzx, 1);
}
}
*********************/
// glsl_shader.frag
/*************************************************
#version 450 core
// glslangValidator.exe --target-env vulkan1.2 --vn s_frag_spv -o axis.frag.h axis.frag
layout(location = 0) out vec4 fColor;
layout(location = 0) in interpolant
{
vec4 Color;
}
In;
void main()
{
fColor = In.Color;
}
*********************/
} // namespace nvvk

View file

@ -0,0 +1,101 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <array>
#include <vector>
#include <vulkan/vulkan_core.h>
#include <glm/glm.hpp>
#include "nvvk/pipeline_vk.hpp" // Using the Pipeline Generator Utility
namespace nvvk {
//--------------------------------------------------------------------------------------------------
/** @DOC_START
# class nvvk::Axis
nvvk::Axis displays an Axis representing the orientation of the camera in the bottom left corner of the window.
- Initialize the Axis using `init()`
- Add `display()` in a inline rendering pass, one of the lass command
Example:
```cpp
m_axis.display(cmdBuf, CameraManip.getMatrix(), windowSize);
```
@DOC_END */
class AxisVK
{
public:
struct CreateAxisInfo
{
VkRenderPass renderPass{VK_NULL_HANDLE};
uint32_t subpass{0};
std::vector<VkFormat> colorFormat;
VkFormat depthFormat{};
VkFormat stencilFormat{};
float axisSize{50.f};
};
void init(VkDevice device, VkRenderPass renderPass, uint32_t subpass = 0, float axisSize = 50.f)
{
m_device = device;
m_axisSize = axisSize;
CreateAxisInfo info;
info.renderPass = renderPass;
info.subpass = subpass;
createAxisObject(info);
}
void init(VkDevice device, CreateAxisInfo info)
{
m_device = device;
m_axisSize = info.axisSize;
createAxisObject(info);
}
void deinit()
{
vkDestroyPipeline(m_device, m_pipelineTriangleFan, nullptr);
vkDestroyPipeline(m_device, m_pipelineLines, nullptr);
vkDestroyPipelineLayout(m_device, m_pipelineLayout, nullptr);
}
void display(VkCommandBuffer cmdBuf, const glm::mat4& transform, const VkExtent2D& screenSize);
void setAxisSize(float s) { m_axisSize = s; }
private:
void createAxisObject(CreateAxisInfo& info);
VkPipeline m_pipelineTriangleFan = {};
VkPipeline m_pipelineLines = {};
VkPipelineLayout m_pipelineLayout = {};
float m_axisSize = 50.f; // Size in pixel
VkDevice m_device{VK_NULL_HANDLE};
};
} // namespace nvvk

View file

@ -0,0 +1,318 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "images_vk.hpp"
#include <cassert>
namespace nvvk {
VkImageMemoryBarrier makeImageMemoryBarrier(VkImage img,
VkAccessFlags srcAccess,
VkAccessFlags dstAccess,
VkImageLayout oldLayout,
VkImageLayout newLayout,
VkImageAspectFlags aspectMask)
{
VkImageMemoryBarrier barrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
barrier.srcAccessMask = srcAccess;
barrier.dstAccessMask = dstAccess;
barrier.oldLayout = oldLayout;
barrier.newLayout = newLayout;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.image = img;
barrier.subresourceRange = {0};
barrier.subresourceRange.aspectMask = aspectMask;
barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
return barrier;
}
///////////////////////////////////////////////////////////////////////////////
// Return the access flag for an image layout
VkAccessFlags accessFlagsForImageLayout(VkImageLayout layout)
{
switch(layout)
{
case VK_IMAGE_LAYOUT_PREINITIALIZED:
return VK_ACCESS_HOST_WRITE_BIT;
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
return VK_ACCESS_TRANSFER_WRITE_BIT;
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
return VK_ACCESS_TRANSFER_READ_BIT;
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
return VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
return VK_ACCESS_SHADER_READ_BIT;
default:
return VkAccessFlags();
}
}
VkPipelineStageFlags pipelineStageForLayout(VkImageLayout layout)
{
switch(layout)
{
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
return VK_PIPELINE_STAGE_TRANSFER_BIT;
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
return VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
return VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; // We do this to allow queue other than graphic
// return VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
return VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; // We do this to allow queue other than graphic
// return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
case VK_IMAGE_LAYOUT_PREINITIALIZED:
return VK_PIPELINE_STAGE_HOST_BIT;
case VK_IMAGE_LAYOUT_UNDEFINED:
return VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
default:
return VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
}
}
void cmdBarrierImageLayout(VkCommandBuffer cmdbuffer,
VkImage image,
VkImageLayout oldImageLayout,
VkImageLayout newImageLayout,
const VkImageSubresourceRange& subresourceRange)
{
// Create an image barrier to change the layout
VkImageMemoryBarrier imageMemoryBarrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
imageMemoryBarrier.oldLayout = oldImageLayout;
imageMemoryBarrier.newLayout = newImageLayout;
imageMemoryBarrier.image = image;
imageMemoryBarrier.subresourceRange = subresourceRange;
imageMemoryBarrier.srcAccessMask = accessFlagsForImageLayout(oldImageLayout);
imageMemoryBarrier.dstAccessMask = accessFlagsForImageLayout(newImageLayout);
// Fix for a validation issue - should be needed when VkImage sharing mode is VK_SHARING_MODE_EXCLUSIVE
// and the values of srcQueueFamilyIndex and dstQueueFamilyIndex are equal, no ownership transfer is performed,
// and the barrier operates as if they were both set to VK_QUEUE_FAMILY_IGNORED.
imageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imageMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
VkPipelineStageFlags srcStageMask = pipelineStageForLayout(oldImageLayout);
VkPipelineStageFlags destStageMask = pipelineStageForLayout(newImageLayout);
vkCmdPipelineBarrier(cmdbuffer, srcStageMask, destStageMask, 0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier);
}
void cmdBarrierImageLayout(VkCommandBuffer cmdbuffer, VkImage image, VkImageLayout oldImageLayout, VkImageLayout newImageLayout, VkImageAspectFlags aspectMask)
{
VkImageSubresourceRange subresourceRange;
subresourceRange.aspectMask = aspectMask;
subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
subresourceRange.baseMipLevel = 0;
subresourceRange.baseArrayLayer = 0;
cmdBarrierImageLayout(cmdbuffer, image, oldImageLayout, newImageLayout, subresourceRange);
}
VkImageCreateInfo makeImage2DCreateInfo(const VkExtent2D& size, VkFormat format, VkImageUsageFlags usage, bool mipmaps)
{
VkImageCreateInfo icInfo = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
icInfo.imageType = VK_IMAGE_TYPE_2D;
icInfo.format = format;
icInfo.samples = VK_SAMPLE_COUNT_1_BIT;
icInfo.mipLevels = mipmaps ? mipLevels(size) : 1;
icInfo.arrayLayers = 1;
icInfo.extent.width = size.width;
icInfo.extent.height = size.height;
icInfo.extent.depth = 1;
icInfo.usage = usage | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
return icInfo;
}
VkImageViewCreateInfo makeImage2DViewCreateInfo(VkImage image,
VkFormat format /*= VK_FORMAT_R8G8B8A8_UNORM*/,
VkImageAspectFlags aspectFlags /*= VK_IMAGE_ASPECT_COLOR_BIT*/,
uint32_t levels /*= 1*/,
const void* pNextImageView /*= nullptr*/)
{
VkImageViewCreateInfo viewInfo{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
viewInfo.pNext = pNextImageView;
viewInfo.image = image;
viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
viewInfo.format = format;
viewInfo.subresourceRange.aspectMask = aspectFlags;
viewInfo.subresourceRange.baseMipLevel = 0;
viewInfo.subresourceRange.levelCount = levels;
viewInfo.subresourceRange.baseArrayLayer = 0;
viewInfo.subresourceRange.layerCount = 1;
return viewInfo;
}
VkImageViewCreateInfo makeImageViewCreateInfo(VkImage image, const VkImageCreateInfo& imageInfo, bool isCube)
{
VkImageViewCreateInfo viewInfo{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
viewInfo.pNext = nullptr;
viewInfo.image = image;
switch(imageInfo.imageType)
{
case VK_IMAGE_TYPE_1D:
viewInfo.viewType = VK_IMAGE_VIEW_TYPE_1D;
break;
case VK_IMAGE_TYPE_2D:
viewInfo.viewType = isCube ? VK_IMAGE_VIEW_TYPE_CUBE : VK_IMAGE_VIEW_TYPE_2D;
break;
case VK_IMAGE_TYPE_3D:
viewInfo.viewType = VK_IMAGE_VIEW_TYPE_3D;
break;
default:
assert(0);
}
viewInfo.format = imageInfo.format;
viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
viewInfo.subresourceRange.baseMipLevel = 0;
viewInfo.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
viewInfo.subresourceRange.baseArrayLayer = 0;
viewInfo.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
return viewInfo;
}
VkImageCreateInfo makeImage3DCreateInfo(const VkExtent3D& size, VkFormat format, VkImageUsageFlags usage, bool mipmaps)
{
VkImageCreateInfo icInfo = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
icInfo.imageType = VK_IMAGE_TYPE_3D;
icInfo.format = format;
icInfo.mipLevels = mipmaps ? mipLevels(size) : 1;
icInfo.arrayLayers = 1;
icInfo.samples = VK_SAMPLE_COUNT_1_BIT;
icInfo.extent.width = size.width;
icInfo.extent.height = size.height;
icInfo.extent.depth = size.depth;
icInfo.usage = usage | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
return icInfo;
}
VkImageCreateInfo makeImageCubeCreateInfo(const VkExtent2D& size, VkFormat format, VkImageUsageFlags usage, bool mipmaps)
{
VkImageCreateInfo icInfo{VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
icInfo.imageType = VK_IMAGE_TYPE_2D;
icInfo.format = format;
icInfo.mipLevels = mipmaps ? mipLevels(size) : 1;
icInfo.arrayLayers = 6;
icInfo.samples = VK_SAMPLE_COUNT_1_BIT;
icInfo.extent.width = size.width;
icInfo.extent.height = size.height;
icInfo.extent.depth = 1;
icInfo.usage = usage | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
icInfo.flags = VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
return icInfo;
}
// This mipmap generation relies on blitting
// A more sophisticated version could be done with computer shader
// We will publish how to in the future
void cmdGenerateMipmaps(VkCommandBuffer cmdBuf, VkImage image, VkFormat imageFormat, const VkExtent2D& size, uint32_t levelCount, uint32_t layerCount, VkImageLayout currentLayout)
{
// Transfer the top level image to a layout 'eTransferSrcOptimal` and its access to 'eTransferRead'
VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
barrier.subresourceRange.baseArrayLayer = 0;
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
barrier.subresourceRange.baseMipLevel = 0;
barrier.subresourceRange.layerCount = layerCount;
barrier.subresourceRange.levelCount = 1;
barrier.image = image;
barrier.oldLayout = currentLayout;
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
barrier.srcAccessMask = accessFlagsForImageLayout(currentLayout);
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
vkCmdPipelineBarrier(cmdBuf, pipelineStageForLayout(currentLayout), VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0,
nullptr, 1, &barrier);
if(levelCount > 1)
{
// transfer remaining mips to DST optimal
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.subresourceRange.baseMipLevel = 1;
barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
vkCmdPipelineBarrier(cmdBuf, pipelineStageForLayout(currentLayout), VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr,
0, nullptr, 1, &barrier);
};
int32_t mipWidth = size.width;
int32_t mipHeight = size.height;
for(uint32_t i = 1; i < levelCount; i++)
{
VkImageBlit blit;
blit.srcOffsets[0] = {0, 0, 0};
blit.srcOffsets[1] = {mipWidth, mipHeight, 1};
blit.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
blit.srcSubresource.mipLevel = i - 1;
blit.srcSubresource.baseArrayLayer = 0;
blit.srcSubresource.layerCount = layerCount;
blit.dstOffsets[0] = {0, 0, 0};
blit.dstOffsets[1] = {mipWidth > 1 ? mipWidth / 2 : 1, mipHeight > 1 ? mipHeight / 2 : 1, 1};
blit.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
blit.dstSubresource.mipLevel = i;
blit.dstSubresource.baseArrayLayer = 0;
blit.dstSubresource.layerCount = layerCount;
vkCmdBlitImage(cmdBuf, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
&blit, VK_FILTER_LINEAR);
// Next
{
// Transition the current miplevel into a eTransferSrcOptimal layout, to be used as the source for the next one.
barrier.subresourceRange.baseMipLevel = i;
barrier.subresourceRange.levelCount = 1;
barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0,
nullptr, 1, &barrier);
}
if(mipWidth > 1)
mipWidth /= 2;
if(mipHeight > 1)
mipHeight /= 2;
}
// Transition all miplevels (now in VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) back to currentLayout
barrier.subresourceRange.baseMipLevel = 0;
barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
barrier.newLayout = currentLayout;
barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
barrier.dstAccessMask = accessFlagsForImageLayout(currentLayout);
vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, pipelineStageForLayout(currentLayout), 0, 0, nullptr, 0,
nullptr, 1, &barrier);
}
} // namespace nvvk

View file

@ -0,0 +1,123 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <vulkan/vulkan_core.h>
#include <algorithm>
#include <cmath>
namespace nvvk {
//////////////////////////////////////////////////////////////////////////
/** @DOC_START
# functions in nvvk
- makeImageMemoryBarrier : returns VkImageMemoryBarrier for an image based on provided layouts and access flags.
- mipLevels : return number of mips for 2d/3d extent
- accessFlagsForImageLayout : helps resource transtions
- pipelineStageForLayout : helps resource transitions
- cmdBarrierImageLayout : inserts barrier for image transition
- cmdGenerateMipmaps : basic mipmap creation for images (meant for one-shot operations)
- makeImage2DCreateInfo : aids 2d image creation
- makeImage3DCreateInfo : aids 3d descriptor set updating
- makeImageCubeCreateInfo : aids cube descriptor set updating
- makeImageViewCreateInfo : aids common image view creation, derives info from VkImageCreateInfo
- makeImage2DViewCreateInfo : aids 2d image view creation
@DOC_END */
VkImageMemoryBarrier makeImageMemoryBarrier(VkImage image,
VkAccessFlags srcAccess,
VkAccessFlags dstAccess,
VkImageLayout oldLayout,
VkImageLayout newLayout,
VkImageAspectFlags aspectMask = VK_IMAGE_ASPECT_COLOR_BIT);
//--------------------------------------------------------------------------------------------------
inline uint32_t mipLevels(VkExtent2D extent)
{
return static_cast<uint32_t>(std::floor(std::log2(std::max(extent.width, extent.height)))) + 1;
}
inline uint32_t mipLevels(VkExtent3D extent)
{
return static_cast<uint32_t>(std::floor(std::log2(std::max(extent.width, extent.height)))) + 1;
}
//--------------------------------------------------------------------------------------------------
// Transition Pipeline Layout tools
VkAccessFlags accessFlagsForImageLayout(VkImageLayout layout);
VkPipelineStageFlags pipelineStageForLayout(VkImageLayout layout);
void cmdBarrierImageLayout(VkCommandBuffer cmdbuffer,
VkImage image,
VkImageLayout oldImageLayout,
VkImageLayout newImageLayout,
const VkImageSubresourceRange& subresourceRange);
void cmdBarrierImageLayout(VkCommandBuffer cmdbuffer, VkImage image, VkImageLayout oldImageLayout, VkImageLayout newImageLayout, VkImageAspectFlags aspectMask);
inline void cmdBarrierImageLayout(VkCommandBuffer cmdbuffer, VkImage image, VkImageLayout oldImageLayout, VkImageLayout newImageLayout)
{
cmdBarrierImageLayout(cmdbuffer, image, oldImageLayout, newImageLayout, VK_IMAGE_ASPECT_COLOR_BIT);
}
VkImageCreateInfo makeImage3DCreateInfo(const VkExtent3D& size,
VkFormat format = VK_FORMAT_R8G8B8A8_UNORM,
VkImageUsageFlags usage = VK_IMAGE_USAGE_SAMPLED_BIT,
bool mipmaps = false);
VkImageCreateInfo makeImage2DCreateInfo(const VkExtent2D& size,
VkFormat format = VK_FORMAT_R8G8B8A8_UNORM,
VkImageUsageFlags usage = VK_IMAGE_USAGE_SAMPLED_BIT,
bool mipmaps = false);
VkImageCreateInfo makeImageCubeCreateInfo(const VkExtent2D& size,
VkFormat format = VK_FORMAT_R8G8B8A8_UNORM,
VkImageUsageFlags usage = VK_IMAGE_USAGE_SAMPLED_BIT,
bool mipmaps = false);
// derives format and view type from imageInfo, special case for IMAGE_2D to treat as cube
// view enables all mips and layers
VkImageViewCreateInfo makeImageViewCreateInfo(VkImage image, const VkImageCreateInfo& imageInfo, bool isCube = false);
VkImageViewCreateInfo makeImage2DViewCreateInfo(VkImage image,
VkFormat format = VK_FORMAT_R8G8B8A8_UNORM,
VkImageAspectFlags aspectFlags = VK_IMAGE_ASPECT_COLOR_BIT,
uint32_t levels = VK_REMAINING_MIP_LEVELS,
const void* pNextImageView = nullptr);
void cmdGenerateMipmaps(VkCommandBuffer cmdBuf,
VkImage image,
VkFormat imageFormat,
const VkExtent2D& size,
uint32_t levelCount,
uint32_t layerCount = 1,
VkImageLayout currentLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
} // namespace nvvk

View file

@ -0,0 +1,186 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "memallocator_dedicated_vk.hpp"
#include "error_vk.hpp"
#include "debug_util_vk.hpp"
#include <cassert>
namespace nvvk {
class DedicatedMemoryHandle : public MemHandleBase
{
public:
DedicatedMemoryHandle() = default;
DedicatedMemoryHandle(const DedicatedMemoryHandle&) = default;
DedicatedMemoryHandle(DedicatedMemoryHandle&&) = default;
DedicatedMemoryHandle& operator=(const DedicatedMemoryHandle&) = default;
DedicatedMemoryHandle& operator=(DedicatedMemoryHandle&&) = default;
VkDeviceMemory getMemory() const { return m_memory; }
VkDeviceSize getSize() const { return m_size; }
private:
friend class DedicatedMemoryAllocator;
DedicatedMemoryHandle(VkDeviceMemory memory, VkDeviceSize size)
: m_memory(memory)
, m_size(size)
{
}
VkDeviceMemory m_memory;
VkDeviceSize m_size;
};
DedicatedMemoryHandle* castDedicatedMemoryHandle(MemHandle memHandle)
{
if(!memHandle)
return nullptr;
#ifndef NDEBUG
auto dedicatedMemHandle = static_cast<DedicatedMemoryHandle*>(memHandle);
#else
auto dedicatedMemHandle = dynamic_cast<DedicatedMemoryHandle*>(memHandle);
assert(dedicatedMemHandle);
#endif
return dedicatedMemHandle;
}
DedicatedMemoryAllocator::DedicatedMemoryAllocator(VkDevice device, VkPhysicalDevice physDevice)
{
init(device, physDevice);
}
DedicatedMemoryAllocator::~DedicatedMemoryAllocator()
{
deinit();
}
bool DedicatedMemoryAllocator::init(VkDevice device, VkPhysicalDevice physDevice)
{
m_device = device;
m_physicalDevice = physDevice;
vkGetPhysicalDeviceMemoryProperties(m_physicalDevice, &m_physicalMemoryProperties);
return true;
}
void DedicatedMemoryAllocator::deinit()
{
m_device = NULL;
}
MemHandle DedicatedMemoryAllocator::allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult)
{
MemAllocateInfo localInfo(allocInfo);
localInfo.setAllocationFlags(allocInfo.getAllocationFlags() | m_flags);
BakedAllocateInfo bakedInfo;
fillBakedAllocateInfo(m_physicalMemoryProperties, localInfo, bakedInfo);
VkDeviceMemory memory = VK_NULL_HANDLE;
VkResult result = vkAllocateMemory(m_device, &bakedInfo.memAllocInfo, nullptr, &memory);
NVVK_CHECK(result);
if(pResult)
{
*pResult = result;
}
if(result == VK_SUCCESS)
{
auto dedicatedMemHandle = new DedicatedMemoryHandle(memory, bakedInfo.memAllocInfo.allocationSize);
if(!allocInfo.getDebugName().empty())
{
const MemInfo& memInfo = getMemoryInfo(dedicatedMemHandle);
nvvk::DebugUtil(m_device).setObjectName(memInfo.memory, localInfo.getDebugName());
}
return dedicatedMemHandle;
}
else
{
return NullMemHandle;
}
}
void DedicatedMemoryAllocator::freeMemory(MemHandle memHandle)
{
if(!memHandle)
return;
auto dedicatedHandle = castDedicatedMemoryHandle(memHandle);
vkFreeMemory(m_device, dedicatedHandle->getMemory(), nullptr);
delete dedicatedHandle;
return;
}
MemAllocator::MemInfo DedicatedMemoryAllocator::getMemoryInfo(MemHandle memHandle) const
{
auto dedicatedHandle = castDedicatedMemoryHandle(memHandle);
return MemInfo{dedicatedHandle->getMemory(), 0, dedicatedHandle->getSize()};
}
void* DedicatedMemoryAllocator::map(MemHandle memHandle, VkDeviceSize offset, VkDeviceSize size, VkResult* pResult)
{
auto dedicatedHandle = castDedicatedMemoryHandle(memHandle);
void* ptr = nullptr;
VkResult result = vkMapMemory(m_device, dedicatedHandle->getMemory(), offset, size, 0 /*VkMemoryFlags*/, &ptr);
NVVK_CHECK(result);
if(pResult)
{
*pResult = result;
}
return ptr;
}
void DedicatedMemoryAllocator::unmap(MemHandle memHandle)
{
auto dedicatedHandle = castDedicatedMemoryHandle(memHandle);
vkUnmapMemory(m_device, dedicatedHandle->getMemory());
}
VkDevice DedicatedMemoryAllocator::getDevice() const
{
return m_device;
}
VkPhysicalDevice DedicatedMemoryAllocator::getPhysicalDevice() const
{
return m_physicalDevice;
}
void DedicatedMemoryAllocator::setAllocateFlags(VkMemoryAllocateFlags flags)
{
m_flags = flags;
}
} // namespace nvvk

View file

@ -0,0 +1,62 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include "memallocator_vk.hpp"
namespace nvvk {
/** @DOC_START
# class nvvk::DedicatedMemoryAllocator
nvvk::DedicatedMemoryAllocator is a simple implementation of the MemAllocator interface, using
one vkDeviceMemory allocation per allocMemory() call. The simplicity of the implementation is
bought with potential slowness (vkAllocateMemory tends to be very slow) and running
out of operating system resources quickly (as some OSs limit the number of physical
memory allocations per process).
@DOC_END */
class DedicatedMemoryAllocator : public MemAllocator
{
public:
DedicatedMemoryAllocator() = default;
explicit DedicatedMemoryAllocator(VkDevice device, VkPhysicalDevice physDevice);
virtual ~DedicatedMemoryAllocator();
bool init(VkDevice device, VkPhysicalDevice physDevice);
void deinit();
virtual MemHandle allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult = nullptr) override;
virtual void freeMemory(MemHandle memHandle) override;
virtual MemInfo getMemoryInfo(MemHandle memHandle) const override;
virtual void* map(MemHandle memHandle, VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE, VkResult* pResult = nullptr) override;
virtual void unmap(MemHandle memHandle) override;
virtual VkDevice getDevice() const override;
virtual VkPhysicalDevice getPhysicalDevice() const override;
void setAllocateFlags(VkMemoryAllocateFlags flags);
private:
VkDevice m_device{NULL};
VkPhysicalDevice m_physicalDevice{NULL};
VkPhysicalDeviceMemoryProperties m_physicalMemoryProperties;
VkMemoryAllocateFlags m_flags{0};
};
} // namespace nvvk

View file

@ -0,0 +1,71 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include "memallocator_vk.hpp"
#include "memorymanagement_vk.hpp"
namespace nvvk {
class DeviceMemoryAllocator;
/** @DOC_START
# class nvvk::DMAMemoryAllocator
nvvk::DMAMemoryAllocator is using nvvk::DeviceMemoryAllocator internally.
nvvk::DeviceMemoryAllocator derives from nvvk::MemAllocator as well, so this class here is for those prefering a reduced wrapper;
@DOC_END */
class DMAMemoryAllocator : public MemAllocator
{
public:
DMAMemoryAllocator() = default;
explicit DMAMemoryAllocator(nvvk::DeviceMemoryAllocator* dma) { init(dma); }
virtual ~DMAMemoryAllocator() { deinit(); }
bool init(nvvk::DeviceMemoryAllocator* dma)
{
m_dma = dma;
return m_dma != nullptr;
}
void deinit() { m_dma = nullptr; }
// Implement MemAllocator interface
virtual MemHandle allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult = nullptr) override
{
return m_dma->allocMemory(allocInfo, pResult);
}
virtual void freeMemory(MemHandle memHandle) override { return m_dma->freeMemory(memHandle); }
virtual MemInfo getMemoryInfo(MemHandle memHandle) const override { return m_dma->getMemoryInfo(memHandle); }
virtual void* map(MemHandle memHandle, VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE, VkResult* pResult = nullptr) override
{
return m_dma->map(memHandle, offset, size, pResult);
}
virtual void unmap(MemHandle memHandle) override { return m_dma->unmap(memHandle); }
virtual VkDevice getDevice() const override { return m_dma->getDevice(); }
virtual VkPhysicalDevice getPhysicalDevice() const override { return m_dma->getPhysicalDevice(); }
// Utility function
AllocationID getAllocationID(MemHandle memHandle) const { return m_dma->getAllocationID(memHandle); }
private:
nvvk::DeviceMemoryAllocator* m_dma;
};
} // namespace nvvk

View file

@ -0,0 +1,201 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "memallocator_vk.hpp"
#include <cassert>
namespace nvvk {
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
MemAllocateInfo::MemAllocateInfo(const VkMemoryRequirements& memReqs, VkMemoryPropertyFlags memProps, bool isTilingOptimal)
: m_memReqs(memReqs)
, m_memProps(memProps)
, m_isTilingOptimal(isTilingOptimal)
{
}
MemAllocateInfo::MemAllocateInfo(VkDevice device, VkBuffer buffer, VkMemoryPropertyFlags memProps)
{
VkBufferMemoryRequirementsInfo2 bufferReqs = {VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, nullptr, buffer};
VkMemoryDedicatedRequirements dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
VkMemoryRequirements2 memReqs = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, &dedicatedRegs};
vkGetBufferMemoryRequirements2(device, &bufferReqs, &memReqs);
m_memReqs = memReqs.memoryRequirements;
m_memProps = memProps;
if(dedicatedRegs.requiresDedicatedAllocation)
{
setDedicatedBuffer(buffer);
}
setTilingOptimal(false);
}
MemAllocateInfo::MemAllocateInfo(VkDevice device, VkImage image, VkMemoryPropertyFlags memProps, bool allowDedicatedAllocation)
{
VkImageMemoryRequirementsInfo2 imageReqs = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, nullptr, image};
VkMemoryDedicatedRequirements dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
VkMemoryRequirements2 memReqs = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, &dedicatedRegs};
vkGetImageMemoryRequirements2(device, &imageReqs, &memReqs);
m_memReqs = memReqs.memoryRequirements;
m_memProps = memProps;
if(dedicatedRegs.requiresDedicatedAllocation || (dedicatedRegs.prefersDedicatedAllocation && allowDedicatedAllocation))
{
setDedicatedImage(image);
}
setTilingOptimal(true);
}
MemAllocateInfo& MemAllocateInfo::setDedicatedImage(VkImage image)
{
assert(!m_dedicatedBuffer);
m_dedicatedImage = image;
return *this;
}
MemAllocateInfo& MemAllocateInfo::setDedicatedBuffer(VkBuffer buffer)
{
assert(!m_dedicatedImage);
m_dedicatedBuffer = buffer;
return *this;
}
MemAllocateInfo& MemAllocateInfo::setAllocationFlags(VkMemoryAllocateFlags flags)
{
m_allocateFlags |= flags;
return *this;
}
MemAllocateInfo& MemAllocateInfo::setDeviceMask(uint32_t mask)
{
m_deviceMask = mask;
return *this;
}
MemAllocateInfo& MemAllocateInfo::setDebugName(const std::string& name)
{
m_debugName = name;
return *this;
}
MemAllocateInfo& MemAllocateInfo::setExportable(bool exportable)
{
m_isExportable = exportable;
return *this;
}
// Determines which heap to allocate from
MemAllocateInfo& MemAllocateInfo::setMemoryProperties(VkMemoryPropertyFlags flags)
{
m_memProps = flags;
return *this;
}
// Determines size and alignment
MemAllocateInfo& MemAllocateInfo::setMemoryRequirements(VkMemoryRequirements requirements)
{
m_memReqs = requirements;
return *this;
}
MemAllocateInfo& MemAllocateInfo::setTilingOptimal(bool isTilingOptimal)
{
m_isTilingOptimal = isTilingOptimal;
return *this;
}
MemAllocateInfo& MemAllocateInfo::setPriority(const float priority /*= 0.5f*/)
{
m_priority = priority;
return *this;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
uint32_t getMemoryType(const VkPhysicalDeviceMemoryProperties& memoryProperties, uint32_t typeBits, const VkMemoryPropertyFlags& properties)
{
for(uint32_t i = 0; i < memoryProperties.memoryTypeCount; i++)
{
if(((typeBits & (1 << i)) > 0) && (memoryProperties.memoryTypes[i].propertyFlags & properties) == properties)
{
return i;
}
}
assert(0);
return ~0u;
}
bool fillBakedAllocateInfo(const VkPhysicalDeviceMemoryProperties& physMemProps, const MemAllocateInfo& info, BakedAllocateInfo& baked)
{
baked.memAllocInfo.allocationSize = info.getMemoryRequirements().size;
baked.memAllocInfo.memoryTypeIndex =
getMemoryType(physMemProps, info.getMemoryRequirements().memoryTypeBits, info.getMemoryProperties());
// Put it last in the chain, so we can directly pass it into the DeviceMemoryAllocator::alloc function
if(info.getDedicatedBuffer() || info.getDedicatedImage())
{
baked.dedicatedInfo.pNext = baked.memAllocInfo.pNext;
baked.memAllocInfo.pNext = &baked.dedicatedInfo;
baked.dedicatedInfo.buffer = info.getDedicatedBuffer();
baked.dedicatedInfo.image = info.getDedicatedImage();
}
if(info.getExportable())
{
baked.exportInfo.pNext = baked.memAllocInfo.pNext;
baked.memAllocInfo.pNext = &baked.exportInfo;
#ifdef WIN32
baked.exportInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
#else
baked.exportInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
#endif
}
if(info.getDeviceMask() || info.getAllocationFlags())
{
baked.flagsInfo.pNext = baked.memAllocInfo.pNext;
baked.memAllocInfo.pNext = &baked.flagsInfo;
baked.flagsInfo.flags = info.getAllocationFlags();
baked.flagsInfo.deviceMask = info.getDeviceMask();
if(baked.flagsInfo.deviceMask)
{
baked.flagsInfo.flags |= VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT;
}
}
return true;
}
} // namespace nvvk

View file

@ -0,0 +1,203 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <vulkan/vulkan_core.h>
#include <string>
namespace nvvk {
class MemHandleBase;
typedef MemHandleBase* MemHandle;
static const MemHandle NullMemHandle = nullptr;
/** @DOC_START
# class nvvk::MemHandle
nvvk::MemHandle represents a memory allocation or sub-allocation from the
generic nvvk::MemAllocator interface. Ideally use `nvvk::NullMemHandle` for
setting to 'NULL'. MemHandle may change to a non-pointer type in future.
# class nvvk::MemAllocateInfo
nvvk::MemAllocateInfo is collecting almost all parameters a Vulkan allocation could potentially need.
This keeps MemAllocator's interface simple and extensible.
@DOC_END */
class MemAllocateInfo
{
public:
MemAllocateInfo(const VkMemoryRequirements& memReqs, // determine size, alignment and memory type
VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, // determine device_local, host_visible, host coherent etc...
bool isTilingOptimal = false // determine if the alocation is going to be used for an VK_IMAGE_TILING_OPTIMAL image
);
// Convenience constructures that infer the allocation information from the buffer object directly
MemAllocateInfo(VkDevice device, VkBuffer buffer, VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
// Convenience constructures that infer the allocation information from the image object directly.
// If the driver _prefers_ a dedicated allocation for this particular image and allowDedicatedAllocation is true, a dedicated allocation will be requested.
// If the driver _requires_ a dedicated allocation, a dedicated allocation will be requested regardless of 'allowDedicatedAllocation'.
MemAllocateInfo(VkDevice device,
VkImage image,
VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
bool allowDedicatedAllocation = true);
// Determines which heap to allocate from
MemAllocateInfo& setMemoryProperties(VkMemoryPropertyFlags flags);
// Determines size and alignment
MemAllocateInfo& setMemoryRequirements(VkMemoryRequirements requirements);
// TilingOptimal should be set for images. The allocator may choose to separate linear and tiling allocations
MemAllocateInfo& setTilingOptimal(bool isTilingOptimal);
// The allocation will be dedicated for the given image
MemAllocateInfo& setDedicatedImage(VkImage image);
// The allocation will be dedicated for the given buffer
MemAllocateInfo& setDedicatedBuffer(VkBuffer buffer);
// Set additional allocation flags
MemAllocateInfo& setAllocationFlags(VkMemoryAllocateFlags flags);
// Set the device mask for the allocation, redirect allocations to specific device(s) in the device group
MemAllocateInfo& setDeviceMask(uint32_t mask);
// Set a name for the allocation (only useful for dedicated allocations or allocators)
MemAllocateInfo& setDebugName(const std::string& name);
// Make the allocation exportable
MemAllocateInfo& setExportable(bool exportable);
// Prioritize the allocation (values 0.0 - 1.0); this may guide eviction strategies
MemAllocateInfo& setPriority(const float priority = 0.5f);
VkImage getDedicatedImage() const { return m_dedicatedImage; }
VkBuffer getDedicatedBuffer() const { return m_dedicatedBuffer; }
VkMemoryAllocateFlags getAllocationFlags() const { return m_allocateFlags; }
uint32_t getDeviceMask() const { return m_deviceMask; }
bool getTilingOptimal() const { return m_isTilingOptimal; }
const VkMemoryRequirements& getMemoryRequirements() const { return m_memReqs; }
const VkMemoryPropertyFlags& getMemoryProperties() const { return m_memProps; }
std::string getDebugName() const { return m_debugName; }
bool getExportable() const { return m_isExportable; }
float getPriority() const { return m_priority; }
private:
VkBuffer m_dedicatedBuffer{VK_NULL_HANDLE};
VkImage m_dedicatedImage{VK_NULL_HANDLE};
VkMemoryAllocateFlags m_allocateFlags{0};
uint32_t m_deviceMask{0};
VkMemoryRequirements m_memReqs{0, 0, 0};
VkMemoryPropertyFlags m_memProps{0};
float m_priority{0.5f};
std::string m_debugName;
bool m_isTilingOptimal{false};
bool m_isExportable{false};
};
// BakedAllocateInfo is a group of allocation relevant Vulkan allocation structures,
// which will be filled out and linked via pNext-> to be used directly via vkAllocateMemory.
struct BakedAllocateInfo
{
BakedAllocateInfo() = default;
// In lieu of proper copy operators, need to delete them as we store
// addresses to members in other members. Copying such object would make them point to
// wrong or out-of-scope addresses
BakedAllocateInfo(BakedAllocateInfo&& other) = delete;
BakedAllocateInfo operator=(BakedAllocateInfo&& other) = delete;
BakedAllocateInfo(const BakedAllocateInfo&) = delete;
BakedAllocateInfo operator=(const BakedAllocateInfo) = delete;
VkMemoryAllocateInfo memAllocInfo{VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
VkMemoryAllocateFlagsInfo flagsInfo{VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO};
VkMemoryDedicatedAllocateInfo dedicatedInfo{VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
VkExportMemoryAllocateInfo exportInfo{VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO};
};
bool fillBakedAllocateInfo(const VkPhysicalDeviceMemoryProperties& physMemProps, const MemAllocateInfo& info, BakedAllocateInfo& baked);
uint32_t getMemoryType(const VkPhysicalDeviceMemoryProperties& memoryProperties, uint32_t typeBits, const VkMemoryPropertyFlags& properties);
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::MemAllocator
nvvk::MemAllocator is a Vulkan memory allocator interface extensively used by ResourceAllocator.
It provides means to allocate, free, map and unmap pieces of Vulkan device memory.
Concrete implementations derive from nvvk::MemoryAllocator.
They can implement the allocator dunctionality themselves or act as an adapter to another
memory allocator implementation.
A nvvk::MemAllocator hands out opaque 'MemHandles'. The implementation of the MemAllocator interface
may chose any type of payload to store in a MemHandle. A MemHandle's relevant information can be
retrieved via getMemoryInfo().
@DOC_END */
class MemAllocator
{
public:
struct MemInfo
{
VkDeviceMemory memory;
VkDeviceSize offset;
VkDeviceSize size;
};
// Allocate a piece of memory according to the requirements of allocInfo.
// may return NullMemHandle on error (provide pResult for details)
virtual MemHandle allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult = nullptr) = 0;
// Free the memory backing 'memHandle'.
// memHandle may be nullptr;
virtual void freeMemory(MemHandle memHandle) = 0;
// Retrieve detailed information about 'memHandle'
virtual MemInfo getMemoryInfo(MemHandle memHandle) const = 0;
// Maps device memory to system memory.
// If 'memHandle' already refers to a suballocation 'offset' will be applied on top of the
// suballocation's offset inside the device memory.
// may return nullptr on error (provide pResult for details)
virtual void* map(MemHandle memHandle, VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE, VkResult* pResult = nullptr) = 0;
// Unmap memHandle
virtual void unmap(MemHandle memHandle) = 0;
// Convenience function to allow mapping straight to a typed pointer.
template <class T>
T* mapT(MemHandle memHandle, VkResult* pResult = nullptr)
{
return (T*)map(memHandle, 0, VK_WHOLE_SIZE, pResult);
}
virtual VkDevice getDevice() const = 0;
virtual VkPhysicalDevice getPhysicalDevice() const = 0;
// Make sure the dtor is virtual
virtual ~MemAllocator() = default;
};
// Base class for memory handles
// Individual allocators will derive from it and fill the handles with their own data.
class MemHandleBase
{
public:
virtual ~MemHandleBase() = default; // force the class to become virtual
};
} // namespace nvvk

View file

@ -0,0 +1,97 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "vk_mem_alloc.h"
#ifndef MEMALLOCATOR_VMA_H_INCLUDED
#define MEMALLOCATOR_VMA_H_INCLUDED
#include "memallocator_vk.hpp"
#include "resourceallocator_vk.hpp"
namespace nvvk {
/** @DOC_START
# class nvvk::VMAMemoryAllocator
nvvk::VMAMemoryAllocator using the GPUOpen [Vulkan Memory Allocator](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator) underneath.
As VMA comes as a header-only library, when using it you'll have to:
1) provide _add_package_VMA() in your CMakeLists.txt
2) put these lines into one of your compilation units:
```cpp
#define VMA_IMPLEMENTATION
#include "vk_mem_alloc.h"
```
@DOC_END */
class VMAMemoryAllocator : public MemAllocator
{
public:
VMAMemoryAllocator() = default;
inline explicit VMAMemoryAllocator(VkDevice device, VkPhysicalDevice physicalDevice, VmaAllocator vma);
inline virtual ~VMAMemoryAllocator();
inline bool init(VkDevice device, VkPhysicalDevice physicalDevice, VmaAllocator vma);
inline void deinit();
inline MemHandle allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult = nullptr) override;
inline void freeMemory(MemHandle memHandle) override;
inline MemInfo getMemoryInfo(MemHandle memHandle) const override;
inline void* map(MemHandle memHandle, VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE, VkResult* pResult = nullptr) override;
inline void unmap(MemHandle memHandle) override;
inline VkDevice getDevice() const override;
inline VkPhysicalDevice getPhysicalDevice() const override;
inline void findLeak(uint64_t leakID) { m_leakID = leakID; }
private:
VmaAllocator m_vma{0};
VkDevice m_device{nullptr};
VkPhysicalDevice m_physicalDevice{nullptr};
uint64_t m_leakID{~0U};
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::ResourceAllocatorVMA
nvvk::ResourceAllocatorVMA is a convencience class creating, initializing and owning a nvvk::VmaAllocator
and associated nvvk::MemAllocator object.
@DOC_END */
class ResourceAllocatorVma : public ResourceAllocator
{
public:
ResourceAllocatorVma() = default;
ResourceAllocatorVma(VkInstance instance, VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
virtual ~ResourceAllocatorVma();
void init(VkInstance instance, VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
void deinit();
protected:
VmaAllocator m_vma{nullptr};
std::unique_ptr<MemAllocator> m_memAlloc;
};
} // namespace nvvk
#include "memallocator_vma_vk.inl"
#endif

View file

@ -0,0 +1,249 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "vk_mem_alloc.h"
#include "error_vk.hpp"
#if defined(LINUX)
#include <signal.h> // LINUX SIGTRAP
#endif
namespace nvvk {
//--------------------------------------------------------------------------------------------------
// Converter utility from Vulkan memory property to VMA
//
static inline VmaMemoryUsage vkToVmaMemoryUsage(VkMemoryPropertyFlags flags)
{
if((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
return VMA_MEMORY_USAGE_GPU_ONLY;
else if((flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) == VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
return VMA_MEMORY_USAGE_CPU_ONLY;
else if((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
return VMA_MEMORY_USAGE_CPU_TO_GPU;
return VMA_MEMORY_USAGE_UNKNOWN;
}
class VMAMemoryHandle : public MemHandleBase
{
public:
VMAMemoryHandle() = default;
VMAMemoryHandle(const VMAMemoryHandle&) = default;
VMAMemoryHandle(VMAMemoryHandle&&) = default;
VmaAllocation getAllocation() const { return m_allocation; }
private:
friend class VMAMemoryAllocator;
VMAMemoryHandle(VmaAllocation allocation)
: m_allocation(allocation)
{
}
VmaAllocation m_allocation;
};
inline VMAMemoryHandle* castVMAMemoryHandle(MemHandle memHandle)
{
if(!memHandle)
return nullptr;
#ifndef NDEBUG
auto vmaMemHandle = static_cast<VMAMemoryHandle*>(memHandle);
#else
auto vmaMemHandle = dynamic_cast<VMAMemoryHandle*>(memHandle);
assert(vmaMemHandle);
#endif
return vmaMemHandle;
}
inline VMAMemoryAllocator::VMAMemoryAllocator(VkDevice device, VkPhysicalDevice physicalDevice, VmaAllocator vma)
{
init(device, physicalDevice, vma);
}
inline VMAMemoryAllocator::~VMAMemoryAllocator()
{
deinit();
}
inline bool VMAMemoryAllocator::init(VkDevice device, VkPhysicalDevice physicalDevice, VmaAllocator vma)
{
m_device = device;
m_physicalDevice = physicalDevice;
m_vma = vma;
return true;
}
inline void VMAMemoryAllocator::deinit()
{
m_vma = 0;
}
inline MemHandle VMAMemoryAllocator::allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult)
{
VmaAllocationCreateInfo vmaAllocInfo = {};
vmaAllocInfo.usage = vkToVmaMemoryUsage(allocInfo.getMemoryProperties());
if(allocInfo.getDedicatedBuffer() || allocInfo.getDedicatedImage())
{
vmaAllocInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
}
vmaAllocInfo.priority = allocInfo.getPriority();
// Not supported by VMA
assert(!allocInfo.getExportable());
assert(!allocInfo.getDeviceMask());
VmaAllocationInfo allocationDetail;
VmaAllocation allocation = nullptr;
VkResult result = vmaAllocateMemory(m_vma, &allocInfo.getMemoryRequirements(), &vmaAllocInfo, &allocation, &allocationDetail);
#ifndef NDEBUG
// !! VMA leaks finder!!
// Call findLeak with the value showing in the leak report.
// Add : #define VMA_DEBUG_LOG(format, ...) do { printf(format, __VA_ARGS__); printf("\n"); } while(false)
// - in the app where VMA_IMPLEMENTATION is defined, to have a leak report
static uint64_t counter{0};
if(counter == m_leakID)
{
bool stop_here = true;
#if defined(_MSVC_LANG)
__debugbreak();
#elif defined(LINUX)
raise(SIGTRAP);
#endif
}
if (result == VK_SUCCESS)
{
std::string allocID = std::to_string(counter++);
vmaSetAllocationName(m_vma, allocation, allocID.c_str());
}
#endif // !NDEBUG
NVVK_CHECK(result);
if(pResult)
{
*pResult = result;
}
if(result == VK_SUCCESS)
{
return new VMAMemoryHandle(allocation);
}
else
{
return NullMemHandle;
}
}
inline void VMAMemoryAllocator::freeMemory(MemHandle memHandle)
{
if(!memHandle)
return;
auto vmaHandle = castVMAMemoryHandle(memHandle);
vmaFreeMemory(m_vma, vmaHandle->getAllocation());
}
inline MemAllocator::MemInfo VMAMemoryAllocator::getMemoryInfo(MemHandle memHandle) const
{
auto vmaHandle = castVMAMemoryHandle(memHandle);
VmaAllocationInfo allocInfo;
vmaGetAllocationInfo(m_vma, vmaHandle->getAllocation(), &allocInfo);
MemInfo memInfo;
memInfo.memory = allocInfo.deviceMemory;
memInfo.offset = allocInfo.offset;
memInfo.size = allocInfo.size;
return memInfo;
}
inline void* VMAMemoryAllocator::map(MemHandle memHandle, VkDeviceSize offset, VkDeviceSize size, VkResult* pResult)
{
auto vmaHandle = castVMAMemoryHandle(memHandle);
void* ptr;
VkResult result = vmaMapMemory(m_vma, vmaHandle->getAllocation(), &ptr);
NVVK_CHECK(result);
if(pResult)
{
*pResult = result;
}
return ptr;
}
inline void VMAMemoryAllocator::unmap(MemHandle memHandle)
{
auto vmaHandle = castVMAMemoryHandle(memHandle);
vmaUnmapMemory(m_vma, vmaHandle->getAllocation());
}
inline VkDevice VMAMemoryAllocator::getDevice() const
{
return m_device;
}
inline VkPhysicalDevice VMAMemoryAllocator::getPhysicalDevice() const
{
return m_physicalDevice;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
inline ResourceAllocatorVma::ResourceAllocatorVma(VkInstance instance, VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize)
{
init(instance, device, physicalDevice);
}
inline ResourceAllocatorVma::~ResourceAllocatorVma()
{
deinit();
}
inline void ResourceAllocatorVma::init(VkInstance instance, VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize)
{
VmaAllocatorCreateInfo allocatorInfo = {};
allocatorInfo.physicalDevice = physicalDevice;
allocatorInfo.device = device;
allocatorInfo.instance = instance;
allocatorInfo.flags = VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT;
vmaCreateAllocator(&allocatorInfo, &m_vma);
m_memAlloc.reset(new VMAMemoryAllocator(device, physicalDevice, m_vma));
ResourceAllocator::init(device, physicalDevice, m_memAlloc.get(), stagingBlockSize);
}
inline void ResourceAllocatorVma::deinit()
{
ResourceAllocator::deinit();
m_memAlloc.reset();
vmaDestroyAllocator(m_vma);
m_vma = nullptr;
}
} // namespace nvvk

View file

@ -0,0 +1,887 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include <algorithm>
#include <string>
#include "debug_util_vk.hpp"
#include "error_vk.hpp"
#include "memorymanagement_vk.hpp"
#include "nvh/nvprint.hpp"
namespace nvvk {
bool getMemoryInfo(const VkPhysicalDeviceMemoryProperties& memoryProperties,
const VkMemoryRequirements& memReqs,
VkMemoryPropertyFlags properties,
VkMemoryAllocateInfo& memInfo,
bool preferDevice)
{
memInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
memInfo.pNext = nullptr;
if(!memReqs.size)
{
memInfo.allocationSize = 0;
memInfo.memoryTypeIndex = ~0;
return true;
}
// Find an available memory type that satisfies the requested properties.
for(uint32_t memoryTypeIndex = 0; memoryTypeIndex < memoryProperties.memoryTypeCount; ++memoryTypeIndex)
{
if((memReqs.memoryTypeBits & (1 << memoryTypeIndex))
// either there is a propertyFlags that also includes the combinations
&& ((properties && (memoryProperties.memoryTypes[memoryTypeIndex].propertyFlags & properties) == properties)
// or it directly matches the properties (zero case)
|| (memoryProperties.memoryTypes[memoryTypeIndex].propertyFlags == properties)))
{
memInfo.allocationSize = memReqs.size;
memInfo.memoryTypeIndex = memoryTypeIndex;
return true;
}
}
// special case zero flag logic
if(properties == 0)
{
// prefer something with host visible
return getMemoryInfo(memoryProperties, memReqs,
preferDevice ? VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, memInfo);
}
return false;
}
//////////////////////////////////////////////////////////////////////////
class DMAMemoryAllocator;
class DMAMemoryHandle : public nvvk::MemHandleBase
{
public:
DMAMemoryHandle() = default;
DMAMemoryHandle(const DMAMemoryHandle&) = default;
DMAMemoryHandle(DMAMemoryHandle&&) = default;
DMAMemoryHandle& operator=(const DMAMemoryHandle&) = default;
DMAMemoryHandle& operator=(DMAMemoryHandle&&) = default;
const AllocationID& getAllocationID() const { return m_allocation; };
private:
friend class nvvk::DeviceMemoryAllocator;
DMAMemoryHandle(const AllocationID& allocation)
: m_allocation(allocation)
{
}
AllocationID m_allocation;
};
DMAMemoryHandle* castDMAMemoryHandle(MemHandle memHandle)
{
if(!memHandle)
return nullptr;
#ifndef NDEBUG
auto dmaMemHandle = static_cast<DMAMemoryHandle*>(memHandle);
#else
auto dmaMemHandle = dynamic_cast<DMAMemoryHandle*>(memHandle);
assert(dmaMemHandle);
#endif
return dmaMemHandle;
}
MemHandle DeviceMemoryAllocator::allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult)
{
BakedAllocateInfo bakedInfo;
fillBakedAllocateInfo(getMemoryProperties(), allocInfo, bakedInfo);
State state = m_defaultState;
state.allocateDeviceMask |= bakedInfo.flagsInfo.deviceMask;
state.allocateFlags |= bakedInfo.flagsInfo.flags;
state.priority = allocInfo.getPriority();
VkResult result;
bool isDedicatedAllocation = allocInfo.getDedicatedBuffer() || allocInfo.getDedicatedImage();
auto dmaHandle = allocInternal(allocInfo.getMemoryRequirements(), allocInfo.getMemoryProperties(),
!allocInfo.getTilingOptimal() /*isLinear*/,
isDedicatedAllocation ? &bakedInfo.dedicatedInfo : nullptr, result, true, state);
if(pResult)
{
*pResult = result;
}
if(dmaHandle)
{
DMAMemoryHandle* dmaMemHandle = new DMAMemoryHandle(dmaHandle);
// Cannot do this, it would override the DeviceMemoryManager's chosen block buffer name
// if(!allocInfo.getDebugName().empty())
// {
// const MemInfo& memInfo = getMemoryInfo(dmaMemHandle);
// nvvk::DebugUtil(m_dma.getDevice()).setObjectName(memInfo.memory, allocInfo.getDebugName());
// }
return dmaMemHandle;
}
else
{
return NullMemHandle;
}
}
void DeviceMemoryAllocator::freeMemory(MemHandle memHandle)
{
if(!memHandle)
return;
auto dmaHandle = castDMAMemoryHandle(memHandle);
assert(dmaHandle);
free(dmaHandle->getAllocationID());
delete dmaHandle;
return;
}
MemAllocator::MemInfo DeviceMemoryAllocator::getMemoryInfo(MemHandle memHandle) const
{
MemInfo info;
auto dmaHandle = castDMAMemoryHandle(memHandle);
assert(dmaHandle);
auto& allocInfo = getAllocation(dmaHandle->getAllocationID());
info.memory = allocInfo.mem;
info.offset = allocInfo.offset;
info.size = allocInfo.size;
return info;
};
nvvk::AllocationID DeviceMemoryAllocator::getAllocationID(MemHandle memHandle) const
{
auto dmaHandle = castDMAMemoryHandle(memHandle);
assert(dmaHandle);
return dmaHandle->getAllocationID();
}
void* DeviceMemoryAllocator::map(MemHandle memHandle, VkDeviceSize offset, VkDeviceSize size, VkResult* pResult)
{
auto dmaHandle = castDMAMemoryHandle(memHandle);
assert(dmaHandle);
void* ptr = map(dmaHandle->getAllocationID(), pResult);
return ptr;
}
void DeviceMemoryAllocator::unmap(MemHandle memHandle)
{
auto dmaHandle = castDMAMemoryHandle(memHandle);
assert(dmaHandle);
unmap(dmaHandle->getAllocationID());
}
const VkMemoryDedicatedAllocateInfo* DeviceMemoryAllocator::DEDICATED_PROXY =
(const VkMemoryDedicatedAllocateInfo*)&DeviceMemoryAllocator::DEDICATED_PROXY;
int DeviceMemoryAllocator::s_allocDebugBias = 0;
//#define DEBUG_ALLOCID 8
nvvk::AllocationID DeviceMemoryAllocator::createID(Allocation& allocation, BlockID block, uint32_t blockOffset, uint32_t blockSize)
{
// find free slot
if(m_freeAllocationIndex != INVALID_ID_INDEX)
{
uint32_t index = m_freeAllocationIndex;
m_freeAllocationIndex = m_allocations[index].id.instantiate((uint32_t)index);
m_allocations[index].allocation = allocation;
m_allocations[index].block = block;
m_allocations[index].blockOffset = blockOffset;
m_allocations[index].blockSize = blockSize;
#if DEBUG_ALLOCID
// debug some specific id, useful to track allocation leaks
if(index == DEBUG_ALLOCID)
{
int breakHere = 0;
breakHere = breakHere;
}
#endif
return m_allocations[index].id;
}
// otherwise push to end
AllocationInfo info;
info.allocation = allocation;
info.id.instantiate((uint32_t)m_allocations.size());
info.block = block;
info.blockOffset = blockOffset;
info.blockSize = blockSize;
m_allocations.push_back(info);
#if DEBUG_ALLOCID
// debug some specific id, useful to track allocation leaks
if(info.id.index == DEBUG_ALLOCID)
{
int breakHere = 0;
breakHere = breakHere;
}
#endif
return info.id;
}
void DeviceMemoryAllocator::destroyID(AllocationID id)
{
assert(m_allocations[id.index].id.isEqual(id));
#if DEBUG_ALLOCID
// debug some specific id, useful to track allocation leaks
if(id.index == DEBUG_ALLOCID)
{
int breakHere = 0;
breakHere = breakHere;
}
#endif
// setup for free list
m_allocations[id.index].id.instantiate(m_freeAllocationIndex);
m_freeAllocationIndex = id.index;
}
const float DeviceMemoryAllocator::DEFAULT_PRIORITY = 0.5f;
void DeviceMemoryAllocator::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize blockSize, VkDeviceSize maxSize)
{
assert(!m_device);
m_device = device;
m_physicalDevice = physicalDevice;
// always default to NVVK_DEFAULT_MEMORY_BLOCKSIZE
m_blockSize = blockSize ? blockSize : NVVK_DEFAULT_MEMORY_BLOCKSIZE;
vkGetPhysicalDeviceMemoryProperties(physicalDevice, &m_memoryProperties);
// Retrieving the max allocation size, can be lowered with maxSize
VkPhysicalDeviceProperties2 prop2{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2};
VkPhysicalDeviceMaintenance3Properties vkProp{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES};
prop2.pNext = &vkProp;
vkGetPhysicalDeviceProperties2(physicalDevice, &prop2);
m_maxAllocationSize = maxSize > 0 ? std::min(maxSize, vkProp.maxMemoryAllocationSize) : vkProp.maxMemoryAllocationSize;
assert(m_blocks.empty());
assert(m_allocations.empty());
}
void DeviceMemoryAllocator::freeAll()
{
for(const auto& it : m_blocks)
{
if(!it.mem)
continue;
if(it.mapped)
{
vkUnmapMemory(m_device, it.mem);
}
vkFreeMemory(m_device, it.mem, nullptr);
}
m_allocations.clear();
m_blocks.clear();
resizeBlocks(0);
m_freeBlockIndex = INVALID_ID_INDEX;
m_freeAllocationIndex = INVALID_ID_INDEX;
}
void DeviceMemoryAllocator::deinit()
{
if(!m_device)
return;
for(const auto& it : m_blocks)
{
if(it.mapped)
{
assert("not all blocks were unmapped properly");
if(it.mem)
{
vkUnmapMemory(m_device, it.mem);
}
}
if(it.mem)
{
if(it.isFirst && m_keepFirst)
{
vkFreeMemory(m_device, it.mem, nullptr);
}
else
{
assert("not all blocks were freed properly");
}
}
}
for(size_t i = 0; i < m_allocations.size(); i++)
{
if(m_allocations[i].id.index == (uint32_t)i)
{
assert(0 && i && "AllocationID not freed");
// set DEBUG_ALLOCID define further up to trace this id
}
}
m_allocations.clear();
m_blocks.clear();
resizeBlocks(0);
m_freeBlockIndex = INVALID_ID_INDEX;
m_freeAllocationIndex = INVALID_ID_INDEX;
m_device = VK_NULL_HANDLE;
}
VkDeviceSize DeviceMemoryAllocator::getMaxAllocationSize() const
{
return m_maxAllocationSize;
}
float DeviceMemoryAllocator::getUtilization(VkDeviceSize& allocatedSize, VkDeviceSize& usedSize) const
{
allocatedSize = m_allocatedSize;
usedSize = m_usedSize;
return float(double(usedSize) / double(allocatedSize));
}
void DeviceMemoryAllocator::nvprintReport() const
{
VkDeviceSize used[VK_MAX_MEMORY_HEAPS] = {0};
VkDeviceSize allocated[VK_MAX_MEMORY_HEAPS] = {0};
uint32_t active[VK_MAX_MEMORY_HEAPS] = {0};
uint32_t dedicated[VK_MAX_MEMORY_HEAPS] = {0};
uint32_t linear[VK_MAX_MEMORY_HEAPS] = {0};
uint32_t dedicatedSum = 0;
uint32_t linearSum = 0;
for(const auto& block : m_blocks)
{
if(block.mem)
{
uint32_t heapIndex = m_memoryProperties.memoryTypes[block.memoryTypeIndex].heapIndex;
used[heapIndex] += block.usedSize;
allocated[heapIndex] += block.allocationSize;
active[heapIndex]++;
linear[heapIndex] += block.isLinear ? 1 : 0;
dedicated[heapIndex] += block.isDedicated ? 1 : 0;
linearSum += block.isLinear ? 1 : 0;
dedicatedSum += block.isDedicated ? 1 : 0;
}
}
LOGI("nvvk::DeviceMemoryAllocator %p\n", this);
{
LOGI(" count : dedicated, linear, all (device-local)\n");
}
for(uint32_t i = 0; i < m_memoryProperties.memoryHeapCount; i++)
{
LOGI(" heap%d : %9d, %6d, %4d (%d)\n", i, dedicated[i], linear[i], active[i],
(m_memoryProperties.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) ? 1 : 0);
}
{
LOGI(" total : %9d, %6d, %4d\n", dedicatedSum, linearSum, m_activeBlockCount);
LOGI(" size : used / allocated / available KB (device-local)\n");
}
for(uint32_t i = 0; i < m_memoryProperties.memoryHeapCount; i++)
{
LOGI(" heap%d : %9d / %9d / %9d (%d)\n", i, uint32_t((used[i] + 1023) / 1024),
uint32_t((allocated[i] + 1023) / 1024), uint32_t((m_memoryProperties.memoryHeaps[i].size + 1023) / 1024),
(m_memoryProperties.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) ? 1 : 0);
}
{
LOGI(" total : %9d / %9d KB (%d percent)\n\n", uint32_t((m_usedSize + 1023) / 1024),
uint32_t((m_allocatedSize + 1023) / 1024), uint32_t(double(m_usedSize) * 100.0 / double(m_allocatedSize)));
}
}
void DeviceMemoryAllocator::getTypeStats(uint32_t count[VK_MAX_MEMORY_TYPES],
VkDeviceSize used[VK_MAX_MEMORY_TYPES],
VkDeviceSize allocated[VK_MAX_MEMORY_TYPES]) const
{
memset(used, 0, sizeof(used[0]) * VK_MAX_MEMORY_TYPES);
memset(allocated, 0, sizeof(allocated[0]) * VK_MAX_MEMORY_TYPES);
for(const auto& block : m_blocks)
{
if(block.mem)
{
count[block.memoryTypeIndex]++;
used[block.memoryTypeIndex] += block.usedSize;
allocated[block.memoryTypeIndex] += block.allocationSize;
}
}
}
VkDevice DeviceMemoryAllocator::getDevice() const
{
return m_device;
}
VkPhysicalDevice DeviceMemoryAllocator::getPhysicalDevice() const
{
return m_physicalDevice;
}
const nvvk::Allocation& DeviceMemoryAllocator::getAllocation(AllocationID id) const
{
assert(m_allocations[id.index].id.isEqual(id));
return m_allocations[id.index].allocation;
}
const VkPhysicalDeviceMemoryProperties& DeviceMemoryAllocator::getMemoryProperties() const
{
return m_memoryProperties;
}
AllocationID DeviceMemoryAllocator::allocInternal(const VkMemoryRequirements& memReqs,
VkMemoryPropertyFlags memProps,
bool isLinear,
const VkMemoryDedicatedAllocateInfo* dedicated,
VkResult& result,
bool preferDevice,
const State& state)
{
VkMemoryAllocateInfo memInfo;
result = VK_SUCCESS;
// Fill out allocation info structure
if(memReqs.size > m_maxAllocationSize || !nvvk::getMemoryInfo(m_memoryProperties, memReqs, memProps, memInfo, preferDevice))
{
result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
return AllocationID();
}
float priority = m_supportsPriority ? state.priority : DEFAULT_PRIORITY;
bool isFirst = !dedicated;
bool mappable = (memProps & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0;
if(!dedicated)
{
// First try to find an existing memory block that we can use
for(uint32_t i = 0; i < (uint32_t)m_blocks.size(); i++)
{
Block& block = m_blocks[i];
// Ignore invalid or blocks with the wrong memory type
if(!block.mem || block.memoryTypeIndex != memInfo.memoryTypeIndex || isLinear != block.isLinear
|| block.priority != priority || block.allocateFlags != state.allocateFlags
|| block.allocateDeviceMask != state.allocateDeviceMask || (!block.mappable && mappable))
{
continue;
}
// if there is a compatible block, we are not "first" of a kind
isFirst = false;
uint32_t blockSize;
uint32_t blockOffset;
uint32_t offset;
// Look for a block which has enough free space available
if(block.range.subAllocate((uint32_t)memReqs.size, (uint32_t)memReqs.alignment, blockOffset, offset, blockSize))
{
block.allocationCount++;
block.usedSize += blockSize;
Allocation allocation;
allocation.mem = block.mem;
allocation.offset = offset;
allocation.size = memReqs.size;
m_usedSize += blockSize;
return createID(allocation, block.id, blockOffset, blockSize);
}
}
}
// find available blockID or create new one
BlockID id;
if(m_freeBlockIndex != INVALID_ID_INDEX)
{
Block& block = m_blocks[m_freeBlockIndex];
m_freeBlockIndex = block.id.instantiate(m_freeBlockIndex);
id = block.id;
}
else
{
uint32_t newIndex = (uint32_t)m_blocks.size();
m_blocks.resize(m_blocks.size() + 1);
resizeBlocks(newIndex + 1);
Block& block = m_blocks[newIndex];
block.id.instantiate(newIndex);
id = block.id;
}
Block& block = m_blocks[id.index];
// enforce custom block under certain conditions
if(dedicated == DEDICATED_PROXY || memReqs.size > ((m_blockSize * 2) / 3))
{
block.allocationSize = memReqs.size;
}
else if(dedicated)
{
block.allocationSize = memReqs.size;
memInfo.pNext = dedicated;
}
else
{
block.allocationSize = std::max(m_blockSize, memReqs.size);
}
VkMemoryPriorityAllocateInfoEXT memPriority = {VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT};
if(priority != DEFAULT_PRIORITY)
{
memPriority.pNext = memInfo.pNext;
memPriority.priority = priority;
memInfo.pNext = &memPriority;
}
VkMemoryAllocateFlagsInfo memFlags = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO};
if(state.allocateFlags)
{
memFlags.pNext = memInfo.pNext;
memFlags.deviceMask = state.allocateDeviceMask;
memFlags.flags = state.allocateFlags;
memInfo.pNext = &memFlags;
}
block.allocationSize = block.range.alignedSize((uint32_t)block.allocationSize);
block.priority = priority;
block.memoryTypeIndex = memInfo.memoryTypeIndex;
block.range.init((uint32_t)block.allocationSize);
block.isLinear = isLinear;
block.isFirst = isFirst;
block.isDedicated = dedicated != nullptr;
block.allocateFlags = state.allocateFlags;
block.allocateDeviceMask = state.allocateDeviceMask;
// set allocationSize from aligned block.allocationSize
memInfo.allocationSize = block.allocationSize;
result = allocBlockMemory(id, memInfo, block.mem);
if(result == VK_SUCCESS)
{
nvvk::DebugUtil(m_device).setObjectName(block.mem, m_debugName);
m_allocatedSize += block.allocationSize;
uint32_t offset;
uint32_t blockSize;
uint32_t blockOffset;
block.range.subAllocate((uint32_t)memReqs.size, (uint32_t)memReqs.alignment, blockOffset, offset, blockSize);
block.allocationCount = 1;
block.usedSize = blockSize;
block.mapCount = 0;
block.mapped = nullptr;
block.mappable = mappable;
Allocation allocation;
allocation.mem = block.mem;
allocation.offset = offset;
allocation.size = memReqs.size;
m_usedSize += blockSize;
m_activeBlockCount++;
return createID(allocation, id, blockOffset, blockSize);
}
else
{
// make block free
m_freeBlockIndex = block.id.instantiate(m_freeBlockIndex);
if(result == VK_ERROR_OUT_OF_DEVICE_MEMORY
&& ((memProps == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) || (memProps == 0 && preferDevice)))
{
// downgrade memory property to zero and/or not preferDevice
LOGW("downgrade memory\n");
return allocInternal(memReqs, 0, isLinear, dedicated, result, !preferDevice, state);
}
else
{
LOGE("could not allocate memory: VkResult %d\n", result);
return AllocationID();
}
}
}
void DeviceMemoryAllocator::free(AllocationID allocationID)
{
const AllocationInfo& info = getInfo(allocationID);
Block& block = getBlock(info.block);
destroyID(allocationID);
m_usedSize -= info.blockSize;
block.range.subFree(info.blockOffset, info.blockSize);
block.allocationCount--;
block.usedSize -= info.blockSize;
if(block.allocationCount == 0 && !(block.isFirst && m_keepFirst))
{
assert(block.usedSize == 0);
assert(!block.mapped);
freeBlockMemory(info.block, block.mem);
block.mem = VK_NULL_HANDLE;
block.isFirst = false;
m_allocatedSize -= block.allocationSize;
block.range.deinit();
m_freeBlockIndex = block.id.instantiate(m_freeBlockIndex);
m_activeBlockCount--;
}
}
void* DeviceMemoryAllocator::map(AllocationID allocationID, VkResult* pResult)
{
const AllocationInfo& info = getInfo(allocationID);
Block& block = getBlock(info.block);
assert(block.mappable);
block.mapCount++;
if(!block.mapped)
{
VkResult result = vkMapMemory(m_device, block.mem, 0, block.allocationSize, 0, (void**)&block.mapped);
if(pResult)
{
*pResult = result;
}
}
return block.mapped + info.allocation.offset;
}
void DeviceMemoryAllocator::unmap(AllocationID allocationID)
{
const AllocationInfo& info = getInfo(allocationID);
Block& block = getBlock(info.block);
assert(block.mapped);
if(--block.mapCount == 0)
{
block.mapped = nullptr;
vkUnmapMemory(m_device, block.mem);
}
}
VkImage DeviceMemoryAllocator::createImage(const VkImageCreateInfo& createInfo,
AllocationID& allocationID,
VkMemoryPropertyFlags memProps,
VkResult& result)
{
VkImage image;
assert(createInfo.extent.width && createInfo.extent.height && createInfo.extent.depth);
result = createImageInternal(m_device, &createInfo, &image);
if(result != VK_SUCCESS)
return VK_NULL_HANDLE;
VkMemoryRequirements2 memReqs = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
VkMemoryDedicatedRequirements dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
VkImageMemoryRequirementsInfo2 imageReqs = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2};
imageReqs.image = image;
memReqs.pNext = &dedicatedRegs;
vkGetImageMemoryRequirements2(m_device, &imageReqs, &memReqs);
VkBool32 useDedicated = m_forceDedicatedAllocation || dedicatedRegs.prefersDedicatedAllocation;
VkMemoryDedicatedAllocateInfo dedicatedInfo = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
dedicatedInfo.image = image;
allocationID = alloc(memReqs.memoryRequirements, memProps, createInfo.tiling == VK_IMAGE_TILING_LINEAR,
useDedicated ? &dedicatedInfo : nullptr);
Allocation allocation = allocationID.isValid() ? getAllocation(allocationID) : Allocation();
if(allocation.mem == VK_NULL_HANDLE)
{
vkDestroyImage(m_device, image, nullptr);
result = VK_ERROR_OUT_OF_POOL_MEMORY;
return VK_NULL_HANDLE;
}
VkBindImageMemoryInfo bindInfos = {VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO};
bindInfos.image = image;
bindInfos.memory = allocation.mem;
bindInfos.memoryOffset = allocation.offset;
result = vkBindImageMemory2(m_device, 1, &bindInfos);
if(result != VK_SUCCESS)
{
vkDestroyImage(m_device, image, nullptr);
return VK_NULL_HANDLE;
}
return image;
}
VkBuffer DeviceMemoryAllocator::createBuffer(const VkBufferCreateInfo& createInfo,
AllocationID& allocationID,
VkMemoryPropertyFlags memProps,
VkResult& result)
{
VkBuffer buffer;
assert(createInfo.size);
result = createBufferInternal(m_device, &createInfo, &buffer);
if(result != VK_SUCCESS)
{
return VK_NULL_HANDLE;
}
VkMemoryRequirements2 memReqs = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
VkMemoryDedicatedRequirements dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
VkBufferMemoryRequirementsInfo2 bufferReqs = {VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2};
bufferReqs.buffer = buffer;
memReqs.pNext = &dedicatedRegs;
vkGetBufferMemoryRequirements2(m_device, &bufferReqs, &memReqs);
// for buffers don't use "preferred", but only requires
VkBool32 useDedicated = m_forceDedicatedAllocation || dedicatedRegs.requiresDedicatedAllocation;
VkMemoryDedicatedAllocateInfo dedicatedInfo = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
dedicatedInfo.buffer = buffer;
allocationID = alloc(memReqs.memoryRequirements, memProps, true, useDedicated ? &dedicatedInfo : nullptr);
Allocation allocation = allocationID.isValid() ? getAllocation(allocationID) : Allocation();
if(allocation.mem == VK_NULL_HANDLE)
{
vkDestroyBuffer(m_device, buffer, nullptr);
result = VK_ERROR_OUT_OF_POOL_MEMORY;
return VK_NULL_HANDLE;
}
VkBindBufferMemoryInfo bindInfos = {VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO};
bindInfos.buffer = buffer;
bindInfos.memory = allocation.mem;
bindInfos.memoryOffset = allocation.offset;
result = vkBindBufferMemory2(m_device, 1, &bindInfos);
if(result != VK_SUCCESS)
{
vkDestroyBuffer(m_device, buffer, nullptr);
return VK_NULL_HANDLE;
}
return buffer;
}
VkBuffer DeviceMemoryAllocator::createBuffer(VkDeviceSize size,
VkBufferUsageFlags usage,
AllocationID& allocationID,
VkMemoryPropertyFlags memProps,
VkResult& result)
{
VkBufferCreateInfo createInfo = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
createInfo.usage = usage | m_defaultBufferUsageFlags | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
createInfo.size = size;
return createBuffer(createInfo, allocationID, memProps, result);
}
#if VK_NV_ray_tracing
VkAccelerationStructureNV DeviceMemoryAllocator::createAccStructure(const VkAccelerationStructureCreateInfoNV& createInfo,
AllocationID& allocationID,
VkMemoryPropertyFlags memProps,
VkResult& result)
{
VkAccelerationStructureNV accel;
result = vkCreateAccelerationStructureNV(m_device, &createInfo, nullptr, &accel);
if(result != VK_SUCCESS)
{
return VK_NULL_HANDLE;
}
VkMemoryRequirements2 memReqs = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
VkAccelerationStructureMemoryRequirementsInfoNV memInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV};
memInfo.accelerationStructure = accel;
vkGetAccelerationStructureMemoryRequirementsNV(m_device, &memInfo, &memReqs);
allocationID = alloc(memReqs.memoryRequirements, memProps, true, m_forceDedicatedAllocation ? DEDICATED_PROXY : nullptr);
Allocation allocation = allocationID.isValid() ? getAllocation(allocationID) : Allocation();
if(allocation.mem == VK_NULL_HANDLE)
{
vkDestroyAccelerationStructureNV(m_device, accel, nullptr);
result = VK_ERROR_OUT_OF_POOL_MEMORY;
return VK_NULL_HANDLE;
}
VkBindAccelerationStructureMemoryInfoNV bind = {VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV};
bind.accelerationStructure = accel;
bind.memory = allocation.mem;
bind.memoryOffset = allocation.offset;
assert(allocation.offset % memReqs.memoryRequirements.alignment == 0);
result = vkBindAccelerationStructureMemoryNV(m_device, 1, &bind);
if(result != VK_SUCCESS)
{
vkDestroyAccelerationStructureNV(m_device, accel, nullptr);
free(allocationID);
allocationID = AllocationID();
return VK_NULL_HANDLE;
}
return accel;
}
#endif
} // namespace nvvk

View file

@ -0,0 +1,549 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <cassert>
#include <vector>
#include <string>
#include <nvvk/memallocator_vk.hpp>
#include <nvh/trangeallocator.hpp>
#include <vulkan/vulkan_core.h>
namespace nvvk {
#define NVVK_DEFAULT_MEMORY_BLOCKSIZE (VkDeviceSize(128) * 1024 * 1024)
//////////////////////////////////////////////////////////////////////////
/** @DOC_START
This framework assumes that memory heaps exists that support:
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
for uploading data to the device
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & VK_MEMORY_PROPERTY_HOST_CACHED_BIT
for downloading data from the device
This is typical on all major desktop platforms and vendors.
See http://vulkan.gpuinfo.org for information of various devices and platforms.
# functions in nvvk
* getMemoryInfo : fills the VkMemoryAllocateInfo based on device's memory properties and memory requirements and property flags. Returns `true` on success.
@DOC_END */
// returns true on success
bool getMemoryInfo(const VkPhysicalDeviceMemoryProperties& memoryProperties,
const VkMemoryRequirements& memReqs,
VkMemoryPropertyFlags properties,
VkMemoryAllocateInfo& memInfo,
bool preferDevice = true); // special case if zero properties are unsupported, otherwise use host
//////////////////////////////////////////////////////////////////////////
static const uint32_t INVALID_ID_INDEX = ~0;
struct Allocation
{
VkDeviceMemory mem = VK_NULL_HANDLE;
VkDeviceSize offset = 0;
VkDeviceSize size = 0;
};
class AllocationID
{
friend class DeviceMemoryAllocator;
private:
uint32_t index = INVALID_ID_INDEX;
uint32_t generation = 0;
void invalidate() { index = INVALID_ID_INDEX; }
uint32_t instantiate(uint32_t newIndex)
{
uint32_t oldIndex = index;
index = newIndex;
generation++;
return oldIndex;
}
public:
bool isValid() const { return index != INVALID_ID_INDEX; }
bool isEqual(const AllocationID& other) const { return index == other.index && generation == other.generation; }
operator bool() const { return isValid(); }
friend bool operator==(const AllocationID& lhs, const AllocationID& rhs) { return rhs.isEqual(lhs); }
};
//////////////////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::DeviceMemoryAllocator
The nvvk::DeviceMemoryAllocator allocates and manages device memory in fixed-size memory blocks.
It implements the nvvk::MemAllocator interface.
It sub-allocates from the blocks, and can re-use memory if it finds empty
regions. Because of the fixed-block usage, you can directly create resources
and don't need a phase to compute the allocation sizes first.
It will create compatible chunks according to the memory requirements and
usage flags. Therefore you can easily create mappable host allocations
and delete them after usage, without inferring device-side allocations.
An `AllocationID` is returned rather than the allocation details directly, which
one can query separately.
Several utility functions are provided to handle the binding of memory
directly with the resource creation of buffers, images and acceleration
structures. These utilities also make implicit use of Vulkan's dedicated
allocation mechanism.
We recommend the use of the nvvk::ResourceAllocator class,
rather than the various create functions provided here, as we may deprecate them.
> **WARNING** : The memory manager serves as proof of concept for some key concepts
> however it is not meant for production use and it currently lacks de-fragmentation logic
> as well. You may want to look at [VMA](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator)
> for a more production-focused solution.
You can derive from this class and overload a few functions to alter the
chunk allocation behavior.
Example :
```cpp
nvvk::DeviceMemoryAllocator memAllocator;
memAllocator.init(device, physicalDevice);
// low-level
aid = memAllocator.alloc(memRequirements,...);
...
memAllocator.free(aid);
// utility wrapper
buffer = memAllocator.createBuffer(bufferSize, bufferUsage, bufferAid);
...
memAllocator.free(bufferAid);
// It is also possible to not track individual resources
// and free everything in one go. However, this is
// not recommended for general purpose use.
bufferA = memAllocator.createBuffer(sizeA, usageA);
bufferB = memAllocator.createBuffer(sizeB, usageB);
...
memAllocator.freeAll();
```
@DOC_END */
class DeviceMemoryAllocator : public MemAllocator
{
public:
static const float DEFAULT_PRIORITY;
DeviceMemoryAllocator(DeviceMemoryAllocator const&) = delete;
DeviceMemoryAllocator& operator=(DeviceMemoryAllocator const&) = delete;
virtual ~DeviceMemoryAllocator()
{
#ifndef NDEBUG
// If all memory was released properly, no blocks should be alive at this point
assert(m_blocks.empty() || m_keepFirst);
#endif
deinit();
}
// system related
DeviceMemoryAllocator() { m_debugName = "nvvk::DeviceMemoryAllocator:" + std::to_string((uint64_t)this); }
DeviceMemoryAllocator(VkDevice device,
VkPhysicalDevice physicalDevice,
VkDeviceSize blockSize = NVVK_DEFAULT_MEMORY_BLOCKSIZE,
VkDeviceSize maxSize = 0)
{
init(device, physicalDevice, blockSize, maxSize);
}
void init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize blockSize = NVVK_DEFAULT_MEMORY_BLOCKSIZE, VkDeviceSize maxSize = 0);
void setDebugName(const std::string& name) { m_debugName = name; }
// requires VK_EXT_memory_priority, default is false
void setPrioritySupported(bool state) { m_supportsPriority = state; }
// frees all blocks independent of individual allocations
// use only if you know the lifetime of all resources from this allocator.
void freeAll();
// asserts on all resources being freed properly
void deinit();
// get utilization of block allocations
float getUtilization(VkDeviceSize& allocatedSize, VkDeviceSize& usedSize) const;
// get total amount of active blocks / VkDeviceMemory allocations
uint32_t getActiveBlockCount() const { return m_activeBlockCount; }
// dump detailed stats via nvprintfLevel(LOGLEVEL_INFO
void nvprintReport() const;
void getTypeStats(uint32_t count[VK_MAX_MEMORY_TYPES],
VkDeviceSize used[VK_MAX_MEMORY_TYPES],
VkDeviceSize allocated[VK_MAX_MEMORY_TYPES]) const;
const VkPhysicalDeviceMemoryProperties& getMemoryProperties() const;
VkDeviceSize getMaxAllocationSize() const;
//////////////////////////////////////////////////////////////////////////
// Implement MemAllocator interface
virtual MemHandle allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult = nullptr) override;
virtual void freeMemory(MemHandle memHandle) override;
virtual MemInfo getMemoryInfo(MemHandle memHandle) const override;
virtual void* map(MemHandle memHandle, VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE, VkResult* pResult = nullptr) override;
virtual void unmap(MemHandle memHandle) override;
virtual VkDevice getDevice() const override;
virtual VkPhysicalDevice getPhysicalDevice() const override;
AllocationID getAllocationID(MemHandle memHandle) const;
//////////////////////////////////////////////////////////////////////////
struct State
{
float priority = DEFAULT_PRIORITY;
VkMemoryAllocateFlags allocateFlags = 0;
uint32_t allocateDeviceMask = 0;
};
// subsequent allocations (and creates) will use the provided priority
// ignored if setPrioritySupported is not enabled
float setPriority(float priority = DEFAULT_PRIORITY)
{
float old = m_defaultState.priority;
m_defaultState.priority = priority;
return old;
}
float getPriority() const { return m_defaultState.priority; }
// subsequent allocations (and creates) will use the provided flags
void setAllocateFlags(VkMemoryAllocateFlags flags, bool enabled)
{
if(enabled)
{
m_defaultState.allocateFlags |= flags;
}
else
{
m_defaultState.allocateFlags &= ~flags;
}
}
void setAllocateDeviceMask(uint32_t allocateDeviceMask, bool enabled)
{
if(enabled)
{
m_defaultState.allocateDeviceMask |= allocateDeviceMask;
}
else
{
m_defaultState.allocateDeviceMask &= ~allocateDeviceMask;
}
}
VkMemoryAllocateFlags getAllocateFlags() const { return m_defaultState.allocateFlags; }
uint32_t getAllocateDeviceMask() const { return m_defaultState.allocateDeviceMask; }
// make individual raw allocations.
// there is also utilities that combine creation of buffers/images etc. with binding
// the memory below.
AllocationID alloc(const VkMemoryRequirements& memReqs,
VkMemoryPropertyFlags memProps,
bool isLinear, // buffers are linear, optimal tiling textures are not
const VkMemoryDedicatedAllocateInfo* dedicated,
VkResult& result)
{
return allocInternal(memReqs, memProps, isLinear, dedicated, result, true, m_defaultState);
}
// make individual raw allocations.
// there is also utilities that combine creation of buffers/images etc. with binding
// the memory below.
AllocationID alloc(const VkMemoryRequirements& memReqs,
VkMemoryPropertyFlags memProps,
bool isLinear, // buffers are linear, optimal tiling textures are not
const VkMemoryDedicatedAllocateInfo* dedicated,
State& state,
VkResult& result)
{
return allocInternal(memReqs, memProps, isLinear, dedicated, result, true, state);
}
AllocationID alloc(const VkMemoryRequirements& memReqs,
VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
bool isLinear = true, // buffers are linear, optimal tiling textures are not
const VkMemoryDedicatedAllocateInfo* dedicated = nullptr)
{
VkResult result;
return allocInternal(memReqs, memProps, isLinear, dedicated, result, true, m_defaultState);
}
// unless you use the freeAll mechanism, each allocation must be freed individually
void free(AllocationID allocationID);
// returns the detailed information from an allocationID
const Allocation& getAllocation(AllocationID id) const;
// can have multiple map/unmaps at once, but must be paired
// internally will keep the vk mapping active as long as one map is active
void* map(AllocationID allocationID, VkResult* pResult = nullptr);
void unmap(AllocationID allocationID);
template <class T>
T* mapT(AllocationID allocationID, VkResult* pResult = nullptr)
{
return (T*)map(allocationID, pResult);
}
//////////////////////////////////////////////////////////////////////////
// utility functions to create resources and bind their memory directly
// subsequent creates will use dedicated allocations (mostly for debugging purposes)
inline void setForceDedicatedAllocation(bool state) { m_forceDedicatedAllocation = state; }
// subsequent createBuffers will also use these flags
inline void setDefaultBufferUsageFlags(VkBufferUsageFlags usage) { m_defaultBufferUsageFlags = usage; }
VkImage createImage(const VkImageCreateInfo& createInfo, AllocationID& allocationID, VkMemoryPropertyFlags memProps, VkResult& result);
VkImage createImage(const VkImageCreateInfo& createInfo, AllocationID& allocationID, VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
{
VkResult result;
return createImage(createInfo, allocationID, memProps, result);
}
VkImage createImage(const VkImageCreateInfo& createInfo, VkMemoryPropertyFlags memProps, VkResult& result)
{
AllocationID id;
return createImage(createInfo, id, memProps, result);
}
VkImage createImage(const VkImageCreateInfo& createInfo, VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
{
AllocationID id;
return createImage(createInfo, id, memProps);
}
VkBuffer createBuffer(const VkBufferCreateInfo& createInfo, AllocationID& allocationID, VkMemoryPropertyFlags memProps, VkResult& result);
VkBuffer createBuffer(const VkBufferCreateInfo& createInfo, AllocationID& allocationID, VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
{
VkResult result;
return createBuffer(createInfo, allocationID, memProps, result);
}
VkBuffer createBuffer(const VkBufferCreateInfo& createInfo, VkMemoryPropertyFlags memProps, VkResult& result)
{
AllocationID id;
return createBuffer(createInfo, id, memProps, result);
}
VkBuffer createBuffer(const VkBufferCreateInfo& createInfo, VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
{
AllocationID id;
return createBuffer(createInfo, id, memProps);
}
VkBuffer createBuffer(VkDeviceSize size,
VkBufferUsageFlags usage, // combined with m_defaultBufferUsageFlags and VK_BUFFER_USAGE_TRANSFER_DST_BIT
AllocationID& allocationID,
VkMemoryPropertyFlags memProps,
VkResult& result);
VkBuffer createBuffer(VkDeviceSize size,
VkBufferUsageFlags usage, // combined with m_defaultBufferUsageFlags and VK_BUFFER_USAGE_TRANSFER_DST_BIT
AllocationID& allocationID,
VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
{
VkResult result;
return createBuffer(size, usage, allocationID, memProps, result);
}
#if VK_NV_ray_tracing
VkAccelerationStructureNV createAccStructure(const VkAccelerationStructureCreateInfoNV& createInfo,
AllocationID& allocationID,
VkMemoryPropertyFlags memProps,
VkResult& result);
VkAccelerationStructureNV createAccStructure(const VkAccelerationStructureCreateInfoNV& createInfo,
AllocationID& allocationID,
VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
{
VkResult result;
return createAccStructure(createInfo, allocationID, memProps, result);
}
#endif
protected:
static const VkMemoryDedicatedAllocateInfo* DEDICATED_PROXY;
static int s_allocDebugBias;
struct BlockID
{
uint32_t index = INVALID_ID_INDEX;
uint32_t generation = 0;
bool isEqual(const BlockID& other) const { return index == other.index && generation == other.generation; }
uint32_t instantiate(uint32_t newIndex)
{
uint32_t oldIndex = index;
index = newIndex;
generation++;
return oldIndex;
}
friend bool operator==(const BlockID& lhs, const BlockID& rhs) { return rhs.isEqual(lhs); }
};
struct Block
{
BlockID id{}; // index to self, or next free item
VkDeviceMemory mem = VK_NULL_HANDLE;
nvh::TRangeAllocator<256> range;
VkDeviceSize allocationSize = 0;
VkDeviceSize usedSize = 0;
// to avoid management of pages via limits::bufferImageGranularity,
// a memory block is either fully linear, or non-linear
bool isLinear = false;
bool isDedicated = false;
bool isFirst = false; // first memory block of a type
float priority = 0.0f;
VkMemoryAllocateFlags allocateFlags{};
uint32_t allocateDeviceMask = 0;
uint32_t memoryTypeIndex = 0;
uint32_t allocationCount = 0;
uint32_t mapCount = 0;
uint32_t mappable = 0;
uint8_t* mapped = nullptr;
Block& operator=(Block&&) = default;
Block(Block&&) = default;
Block(const Block&) = default;
Block() = default;
};
struct AllocationInfo
{
AllocationID id{}; // index to self, or next free item
Allocation allocation{};
uint32_t blockOffset = 0;
uint32_t blockSize = 0;
BlockID block{};
};
VkDevice m_device = VK_NULL_HANDLE;
VkDeviceSize m_blockSize = 0;
VkDeviceSize m_allocatedSize = 0;
VkDeviceSize m_usedSize = 0;
VkDeviceSize m_maxAllocationSize = 0;
std::vector<Block> m_blocks;
std::vector<AllocationInfo> m_allocations;
// linked-list to next free allocation
uint32_t m_freeAllocationIndex = INVALID_ID_INDEX;
// linked-list to next free block
uint32_t m_freeBlockIndex = INVALID_ID_INDEX;
uint32_t m_activeBlockCount = 0;
VkPhysicalDeviceMemoryProperties m_memoryProperties;
VkPhysicalDevice m_physicalDevice = NULL;
State m_defaultState;
VkBufferUsageFlags m_defaultBufferUsageFlags = 0;
bool m_forceDedicatedAllocation = false;
bool m_supportsPriority = false;
// heuristic that doesn't immediately free the first memory block of a specific memorytype
bool m_keepFirst = true;
std::string m_debugName;
AllocationID allocInternal(const VkMemoryRequirements& memReqs,
VkMemoryPropertyFlags memProps,
bool isLinear, // buffers are linear, optimal tiling textures are not
const VkMemoryDedicatedAllocateInfo* dedicated,
VkResult& result,
bool preferDevice,
const State& state);
AllocationID createID(Allocation& allocation, BlockID block, uint32_t blockOffset, uint32_t blockSize);
void destroyID(AllocationID id);
const AllocationInfo& getInfo(AllocationID id) const
{
assert(m_allocations[id.index].id.isEqual(id));
return m_allocations[id.index];
}
Block& getBlock(BlockID id)
{
Block& block = m_blocks[id.index];
assert(block.id.isEqual(id));
return block;
}
//////////////////////////////////////////////////////////////////////////
// For derived memory allocators you can do special purpose operations via overloading these functions.
// A typical use-case would be export/import the memory to another API.
virtual VkResult allocBlockMemory(BlockID id, VkMemoryAllocateInfo& memInfo, VkDeviceMemory& deviceMemory)
{
//s_allocDebugBias++;
return vkAllocateMemory(m_device, &memInfo, nullptr, &deviceMemory);
}
virtual void freeBlockMemory(BlockID id, VkDeviceMemory deviceMemory)
{
//s_allocDebugBias--;
vkFreeMemory(m_device, deviceMemory, nullptr);
}
virtual void resizeBlocks(uint32_t count) {}
virtual VkResult createBufferInternal(VkDevice device, const VkBufferCreateInfo* info, VkBuffer* buffer)
{
return vkCreateBuffer(device, info, nullptr, buffer);
}
virtual VkResult createImageInternal(VkDevice device, const VkImageCreateInfo* info, VkImage* image)
{
return vkCreateImage(device, info, nullptr, image);
}
};
} // namespace nvvk

View file

@ -0,0 +1,162 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#if NVP_SUPPORTS_OPENGL
#include <vulkan/vulkan.h>
#include "memorymanagement_vkgl.hpp"
#ifdef LINUX
#include <unistd.h>
#endif
namespace nvvk {
//////////////////////////////////////////////////////////////////////////
VkExternalMemoryHandleTypeFlags DeviceMemoryAllocatorGL::getExternalMemoryHandleTypeFlags()
{
#ifdef VK_USE_PLATFORM_WIN32_KHR
return VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
#else
return VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
#endif
}
VkResult DeviceMemoryAllocatorGL::allocBlockMemory(BlockID id, VkMemoryAllocateInfo& memInfo, VkDeviceMemory& deviceMemory)
{
BlockGL& blockGL = m_blockGLs[id.index];
bool isDedicated = false;
const StructChain* extChain = (const StructChain*)memInfo.pNext;
while(extChain)
{
if(extChain->sType == VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO)
{
isDedicated = true;
break;
}
extChain = extChain->pNext;
}
// prepare memory allocation for export
VkExportMemoryAllocateInfo exportInfo = {VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO};
exportInfo.handleTypes = getExternalMemoryHandleTypeFlags();
exportInfo.pNext = memInfo.pNext;
memInfo.pNext = &exportInfo;
VkResult result = vkAllocateMemory(m_device, &memInfo, nullptr, &deviceMemory);
if(result != VK_SUCCESS)
{
return result;
}
// get OS-handle (warning must not forget close)
#ifdef VK_USE_PLATFORM_WIN32_KHR
VkMemoryGetWin32HandleInfoKHR memGetHandle = {VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR};
memGetHandle.memory = deviceMemory;
memGetHandle.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
result = vkGetMemoryWin32HandleKHR(m_device, &memGetHandle, &blockGL.handle);
#else
VkMemoryGetFdInfoKHR memGetHandle = {VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR};
memGetHandle.memory = deviceMemory;
memGetHandle.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
result = vkGetMemoryFdKHR(m_device, &memGetHandle, &blockGL.handle);
#endif
if(result != VK_SUCCESS)
{
return result;
}
// import into GL
GLint param = isDedicated ? GL_TRUE : GL_FALSE;
glCreateMemoryObjectsEXT(1, &blockGL.memoryObject);
glMemoryObjectParameterivEXT(blockGL.memoryObject, GL_DEDICATED_MEMORY_OBJECT_EXT, &param);
#ifdef VK_USE_PLATFORM_WIN32_KHR
glImportMemoryWin32HandleEXT(blockGL.memoryObject, memInfo.allocationSize, GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, blockGL.handle);
#else
glImportMemoryFdEXT(blockGL.memoryObject, memInfo.allocationSize, GL_HANDLE_TYPE_OPAQUE_FD_EXT, blockGL.handle);
// the Fd got consumed
blockGL.handle = -1;
#endif
return result;
}
void DeviceMemoryAllocatorGL::freeBlockMemory(BlockID id, VkDeviceMemory deviceMemory)
{
BlockGL& blockGL = m_blockGLs[id.index];
// free vulkan memory
vkFreeMemory(m_device, deviceMemory, nullptr);
glDeleteMemoryObjectsEXT(1, &blockGL.memoryObject);
blockGL.memoryObject = 0;
// don't forget the OS-handle it is ref-counted and can leak memory!
#ifdef VK_USE_PLATFORM_WIN32_KHR
CloseHandle(blockGL.handle);
blockGL.handle = NULL;
#else
if(blockGL.handle != -1)
{
close(blockGL.handle);
blockGL.handle = -1;
}
#endif
}
void DeviceMemoryAllocatorGL::resizeBlocks(uint32_t count)
{
if(count == 0)
{
m_blockGLs.clear();
}
else
{
m_blockGLs.resize(count);
}
}
VkResult DeviceMemoryAllocatorGL::createBufferInternal(VkDevice device, const VkBufferCreateInfo* info, VkBuffer* buffer)
{
VkBufferCreateInfo infoNew = *info;
VkExternalMemoryBufferCreateInfo external = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO};
external.handleTypes = getExternalMemoryHandleTypeFlags();
external.pNext = infoNew.pNext;
infoNew.pNext = &external;
return vkCreateBuffer(device, &infoNew, nullptr, buffer);
}
VkResult DeviceMemoryAllocatorGL::createImageInternal(VkDevice device, const VkImageCreateInfo* info, VkImage* image)
{
VkImageCreateInfo infoNew = *info;
VkExternalMemoryImageCreateInfo external = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO};
external.handleTypes = getExternalMemoryHandleTypeFlags();
external.pNext = infoNew.pNext;
infoNew.pNext = &external;
return vkCreateImage(device, &infoNew, nullptr, image);
}
} // namespace nvvk
#endif

View file

@ -0,0 +1,112 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#if NVP_SUPPORTS_OPENGL
/** @DOC_START
This file contains helpers for resource interoperability between OpenGL and Vulkan.
they only exist if the nvpro_core project is compiled with Vulkan AND OpenGL support.
@DOC_END */
#pragma once
#include <nvgl/extensions_gl.hpp>
#include <nvvk/images_vk.hpp>
#include <nvvk/memorymanagement_vk.hpp>
#include <vulkan/vulkan_core.h>
namespace nvvk {
struct AllocationGL
{
GLuint memoryObject = 0;
GLuint64 offset = 0;
GLuint64 size = 0;
};
//////////////////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::DeviceMemoryAllocatorGL
nvvk::DeviceMemoryAllocatorGL is derived from nvvk::DeviceMemoryAllocator it uses vulkan memory that is exported
and directly imported into OpenGL. Requires GL_EXT_memory_object.
Used just like the original class however a new function to get the
GL memory object exists: `getAllocationGL`.
Look at source of nvvk::AllocatorDmaGL for usage.
@DOC_END */
class DeviceMemoryAllocatorGL : public DeviceMemoryAllocator
{
public:
DeviceMemoryAllocatorGL() {}
DeviceMemoryAllocatorGL(VkDevice device,
VkPhysicalDevice physicalDevice,
VkDeviceSize blockSize = NVVK_DEFAULT_MEMORY_BLOCKSIZE,
VkDeviceSize maxSize = 0)
: DeviceMemoryAllocator(device, physicalDevice, blockSize, maxSize)
{
}
AllocationGL getAllocationGL(AllocationID aid) const
{
AllocationGL alloc;
const AllocationInfo& info = getInfo(aid);
alloc.memoryObject = m_blockGLs[info.block.index].memoryObject;
alloc.offset = info.allocation.offset;
alloc.size = info.allocation.size;
return alloc;
}
static VkExternalMemoryHandleTypeFlags getExternalMemoryHandleTypeFlags();
protected:
struct BlockGL
{
#ifdef WIN32
void* handle = nullptr;
#else
int handle = -1;
#endif
GLuint memoryObject = 0;
};
std::vector<BlockGL> m_blockGLs;
struct StructChain
{
VkStructureType sType;
const StructChain* pNext;
};
VkResult allocBlockMemory(BlockID id, VkMemoryAllocateInfo& memInfo, VkDeviceMemory& deviceMemory) override;
void freeBlockMemory(BlockID id, VkDeviceMemory deviceMemory) override;
void resizeBlocks(uint32_t count) override;
VkResult createBufferInternal(VkDevice device, const VkBufferCreateInfo* info, VkBuffer* buffer) override;
VkResult createImageInternal(VkDevice device, const VkImageCreateInfo* info, VkImage* image) override;
};
} // namespace nvvk
#endif

View file

@ -0,0 +1,691 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "nsight_aftermath_vk.hpp"
#if defined(NVVK_SUPPORTS_AFTERMATH) && defined(NVP_SUPPORTS_VULKANSDK)
#include <vulkan/vulkan.h> // needed so GFSDK_Aftermath_SpirvCode gets declared
#include "nvh/nvprint.hpp"
#include "nvp/perproject_globals.hpp"
#include "nvp/nvpsystem.hpp"
#include "GFSDK_Aftermath.h"
#include "GFSDK_Aftermath_GpuCrashDump.h"
#include "GFSDK_Aftermath_GpuCrashDumpDecoding.h"
#include <array>
#include <cassert>
#include <filesystem>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <memory>
#include <mutex>
#include <sstream>
#include <string>
#include <vector>
//--------------------------------------------------------------------------------------------------
// Some std::to_string overloads for some Nsight Aftermath API types.
//
namespace std {
template <typename T>
inline std::string to_hex_string(T n)
{
std::stringstream stream;
stream << std::setfill('0') << std::setw(2 * sizeof(T)) << std::hex << n;
return stream.str();
}
inline std::string to_string(GFSDK_Aftermath_Result result)
{
return std::string("0x") + to_hex_string(static_cast<uint32_t>(result));
}
inline std::string to_string(const GFSDK_Aftermath_ShaderDebugInfoIdentifier& identifier)
{
return to_hex_string(identifier.id[0]) + "-" + to_hex_string(identifier.id[1]);
}
inline std::string to_string(const GFSDK_Aftermath_ShaderBinaryHash& hash)
{
return to_hex_string(hash.hash);
}
} // namespace std
//*********************************************************
// Helper for comparing shader hashes and debug info identifier.
//
// Helper for comparing GFSDK_Aftermath_ShaderDebugInfoIdentifier.
inline bool operator<(const GFSDK_Aftermath_ShaderDebugInfoIdentifier& lhs, const GFSDK_Aftermath_ShaderDebugInfoIdentifier& rhs)
{
if(lhs.id[0] == rhs.id[0])
{
return lhs.id[1] < rhs.id[1];
}
return lhs.id[0] < rhs.id[0];
}
// Helper for comparing GFSDK_Aftermath_ShaderBinaryHash.
inline bool operator<(const GFSDK_Aftermath_ShaderBinaryHash& lhs, const GFSDK_Aftermath_ShaderBinaryHash& rhs)
{
return lhs.hash < rhs.hash;
}
// Helper for comparing GFSDK_Aftermath_ShaderDebugName.
inline bool operator<(const GFSDK_Aftermath_ShaderDebugName& lhs, const GFSDK_Aftermath_ShaderDebugName& rhs)
{
return strncmp(lhs.name, rhs.name, sizeof(lhs.name)) < 0;
}
//*********************************************************
// Helper for checking Nsight Aftermath failures.
//
inline std::string AftermathErrorMessage(GFSDK_Aftermath_Result result)
{
switch(result)
{
case GFSDK_Aftermath_Result_FAIL_DriverVersionNotSupported:
return "Unsupported driver version - requires an NVIDIA R495 display driver or newer.";
default:
return "Aftermath Error 0x" + std::to_hex_string(result);
}
}
// Helper macro for checking Nsight Aftermath results and throwing exception
// in case of a failure.
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#define VC_EXTRALEAN
#include <windows.h>
#define AFTERMATH_CHECK_ERROR(FC) \
[&]() { \
GFSDK_Aftermath_Result _result = FC; \
if(!GFSDK_Aftermath_SUCCEED(_result)) \
{ \
MessageBoxA(0, AftermathErrorMessage(_result).c_str(), "Aftermath Error", MB_OK); \
exit(1); \
} \
}()
#else
#define AFTERMATH_CHECK_ERROR(FC) \
[&]() { \
GFSDK_Aftermath_Result _result = FC; \
if(!GFSDK_Aftermath_SUCCEED(_result)) \
{ \
printf("%s\n", AftermathErrorMessage(_result).c_str()); \
fflush(stdout); \
exit(1); \
} \
}()
#endif
namespace nvvk {
//*********************************************************
// Implements GPU crash dump tracking using the Nsight
// Aftermath API.
//
class GpuCrashTrackerImpl
{
public:
// keep four frames worth of marker history
const static unsigned int c_markerFrameHistory = 4;
typedef std::array<std::map<uint64_t, std::string>, c_markerFrameHistory> MarkerMap;
GpuCrashTrackerImpl(const MarkerMap& markerMap);
~GpuCrashTrackerImpl();
// Initialize the GPU crash dump tracker.
void initialize();
// Track a shader compiled with -g
void addShaderBinary(std::vector<uint32_t>& data);
// Track an optimized shader with additional debug information
void addShaderBinaryWithDebugInfo(std::vector<uint32_t>& data, std::vector<uint32_t>& strippedData);
private:
//*********************************************************
// Callback handlers for GPU crash dumps and related data.
//
// Handler for GPU crash dump callbacks.
void onCrashDump(const void* pGpuCrashDump, const uint32_t gpuCrashDumpSize);
// Handler for shader debug information callbacks.
void onShaderDebugInfo(const void* pShaderDebugInfo, const uint32_t shaderDebugInfoSize);
// Handler for GPU crash dump description callbacks.
static void onDescription(PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription addDescription);
// Handler for app-managed marker resolve callback
void onResolveMarker(const void* pMarker, void** resolvedMarkerData, uint32_t* markerSize);
//*********************************************************
// Helpers for writing a GPU crash dump and debug information
// data to files.
//
// Helper for writing a GPU crash dump to a file.
void writeGpuCrashDumpToFile(const void* pGpuCrashDump, const uint32_t gpuCrashDumpSize);
// Helper for writing shader debug information to a file
static void writeShaderDebugInformationToFile(GFSDK_Aftermath_ShaderDebugInfoIdentifier identifier,
const void* pShaderDebugInfo,
const uint32_t shaderDebugInfoSize);
//*********************************************************
// Helpers for decoding GPU crash dump to JSON.
//
// Handler for shader debug info lookup callbacks.
void onShaderDebugInfoLookup(const GFSDK_Aftermath_ShaderDebugInfoIdentifier& identifier,
PFN_GFSDK_Aftermath_SetData setShaderDebugInfo) const;
// Handler for shader lookup callbacks.
void onShaderLookup(const GFSDK_Aftermath_ShaderBinaryHash& shaderHash, PFN_GFSDK_Aftermath_SetData setShaderBinary) const;
// Handler for shader source debug info lookup callbacks.
void onShaderSourceDebugInfoLookup(const GFSDK_Aftermath_ShaderDebugName& shaderDebugName,
PFN_GFSDK_Aftermath_SetData setShaderBinary) const;
//*********************************************************
// Static callback wrappers.
//
// GPU crash dump callback.
static void gpuCrashDumpCallback(const void* pGpuCrashDump, const uint32_t gpuCrashDumpSize, void* pUserData);
// Shader debug information callback.
static void shaderDebugInfoCallback(const void* pShaderDebugInfo, const uint32_t shaderDebugInfoSize, void* pUserData);
// GPU crash dump description callback.
static void crashDumpDescriptionCallback(PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription addDescription, void* pUserData);
// App-managed marker resolve callback
static void resolveMarkerCallback(const void* pMarker, const uint32_t markerDataSize, void* pUserData, void** resolvedMarkerData, uint32_t* markerSize);
// Shader debug information lookup callback.
static void shaderDebugInfoLookupCallback(const GFSDK_Aftermath_ShaderDebugInfoIdentifier* pIdentifier,
PFN_GFSDK_Aftermath_SetData setShaderDebugInfo,
void* pUserData);
// Shader lookup callback.
static void shaderLookupCallback(const GFSDK_Aftermath_ShaderBinaryHash* pShaderHash,
PFN_GFSDK_Aftermath_SetData setShaderBinary,
void* pUserData);
// Shader source debug info lookup callback.
static void shaderSourceDebugInfoLookupCallback(const GFSDK_Aftermath_ShaderDebugName* pShaderDebugName,
PFN_GFSDK_Aftermath_SetData setShaderBinary,
void* pUserData);
//*********************************************************
// GPU crash tracker state.
//
// Is the GPU crash dump tracker initialized?
bool m_initialized;
// For thread-safe access of GPU crash tracker state.
mutable std::mutex m_mutex;
// List of Shader Debug Information by ShaderDebugInfoIdentifier.
std::map<GFSDK_Aftermath_ShaderDebugInfoIdentifier, std::vector<uint8_t>> m_shaderDebugInfo;
// App-managed marker tracking
const MarkerMap& m_markerMap;
//*********************************************************
// SAhder database .
//
// Find a shader bytecode binary by shader hash.
bool findShaderBinary(const GFSDK_Aftermath_ShaderBinaryHash& shaderHash, std::vector<uint32_t>& shader) const;
// Find a source shader debug info by shader debug name generated by the DXC compiler.
bool findShaderBinaryWithDebugData(const GFSDK_Aftermath_ShaderDebugName& shaderDebugName, std::vector<uint32_t>& shader) const;
// List of shader binaries by ShaderBinaryHash.
std::map<GFSDK_Aftermath_ShaderBinaryHash, std::vector<uint32_t>> m_shaderBinaries;
// List of available shader binaries with source debug information by ShaderDebugName.
std::map<GFSDK_Aftermath_ShaderDebugName, std::vector<uint32_t>> m_shaderBinariesWithDebugInfo;
};
//*********************************************************
// GpuCrashTrackerImpl implementation
//*********************************************************
GpuCrashTrackerImpl::GpuCrashTrackerImpl(const MarkerMap& markerMap)
: m_initialized(false)
, m_markerMap(markerMap)
{
}
GpuCrashTrackerImpl::~GpuCrashTrackerImpl()
{
// If initialized, disable GPU crash dumps
if(m_initialized)
{
GFSDK_Aftermath_DisableGpuCrashDumps();
}
}
// Initialize the GPU Crash Dump Tracker
void GpuCrashTrackerImpl::initialize()
{
// Enable GPU crash dumps and set up the callbacks for crash dump notifications,
// shader debug information notifications, and providing additional crash
// dump description data.Only the crash dump callback is mandatory. The other two
// callbacks are optional and can be omitted, by passing nullptr, if the corresponding
// functionality is not used.
// The DeferDebugInfoCallbacks flag enables caching of shader debug information data
// in memory. If the flag is set, ShaderDebugInfoCallback will be called only
// in the event of a crash, right before GpuCrashDumpCallback. If the flag is not set,
// ShaderDebugInfoCallback will be called for every shader that is compiled.
AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_EnableGpuCrashDumps(
GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan,
GFSDK_Aftermath_GpuCrashDumpFeatureFlags_DeferDebugInfoCallbacks, // Let the Nsight Aftermath library cache shader debug information.
gpuCrashDumpCallback, // Register callback for GPU crash dumps.
shaderDebugInfoCallback, // Register callback for shader debug information.
crashDumpDescriptionCallback, // Register callback for GPU crash dump description.
resolveMarkerCallback, // Register callback for resolving application-managed markers.
this)); // Set the GpuCrashTrackerImpl object as user data for the above callbacks.
m_initialized = true;
}
// Handler for GPU crash dump callbacks from Nsight Aftermath
void GpuCrashTrackerImpl::onCrashDump(const void* pGpuCrashDump, const uint32_t gpuCrashDumpSize)
{
// Make sure only one thread at a time...
std::lock_guard<std::mutex> lock(m_mutex);
// Write to file for later in-depth analysis with Nsight Graphics.
writeGpuCrashDumpToFile(pGpuCrashDump, gpuCrashDumpSize);
}
// Handler for shader debug information callbacks
void GpuCrashTrackerImpl::onShaderDebugInfo(const void* pShaderDebugInfo, const uint32_t shaderDebugInfoSize)
{
// Make sure only one thread at a time...
std::lock_guard<std::mutex> lock(m_mutex);
// Get shader debug information identifier
GFSDK_Aftermath_ShaderDebugInfoIdentifier identifier = {};
AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GetShaderDebugInfoIdentifier(GFSDK_Aftermath_Version_API, pShaderDebugInfo,
shaderDebugInfoSize, &identifier));
// Store information for decoding of GPU crash dumps with shader address mapping
// from within the application.
std::vector<uint8_t> data((uint8_t*)pShaderDebugInfo, (uint8_t*)pShaderDebugInfo + shaderDebugInfoSize);
m_shaderDebugInfo[identifier].swap(data);
// Write to file for later in-depth analysis of crash dumps with Nsight Graphics
writeShaderDebugInformationToFile(identifier, pShaderDebugInfo, shaderDebugInfoSize);
}
// Handler for GPU crash dump description callbacks
void GpuCrashTrackerImpl::onDescription(PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription addDescription)
{
// Add some basic description about the crash. This is called after the GPU crash happens, but before
// the actual GPU crash dump callback. The provided data is included in the crash dump and can be
// retrieved using GFSDK_Aftermath_GpuCrashDump_GetDescription().
addDescription(GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationName, getProjectName().c_str());
addDescription(GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationVersion, "v1.0");
}
// Handler for app-managed marker resolve callback
void GpuCrashTrackerImpl::onResolveMarker(const void* pMarker, void** resolvedMarkerData, uint32_t* markerSize)
{
// Important: the pointer passed back via resolvedMarkerData must remain valid after this function returns
// using references for all of the m_markerMap accesses ensures that the pointers refer to the persistent data
for(const auto& map : m_markerMap)
{
const auto& found_marker = map.find((uint64_t)pMarker);
if(found_marker != map.end())
{
const std::string& marker_data = found_marker->second;
// std::string::data() will return a valid pointer until the string is next modified
// we don't modify the string after calling data() here, so the pointer should remain valid
*resolvedMarkerData = (void*)marker_data.data();
*markerSize = static_cast<uint32_t>(marker_data.length());
return;
}
}
}
// Helper for writing a GPU crash dump to a file
void GpuCrashTrackerImpl::writeGpuCrashDumpToFile(const void* pGpuCrashDump, const uint32_t gpuCrashDumpSize)
{
// Create a GPU crash dump decoder object for the GPU crash dump.
GFSDK_Aftermath_GpuCrashDump_Decoder decoder = {};
AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GpuCrashDump_CreateDecoder(GFSDK_Aftermath_Version_API, pGpuCrashDump,
gpuCrashDumpSize, &decoder));
// Use the decoder object to read basic information, like application
// name, PID, etc. from the GPU crash dump.
GFSDK_Aftermath_GpuCrashDump_BaseInfo baseInfo = {};
AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GpuCrashDump_GetBaseInfo(decoder, &baseInfo));
// Use the decoder object to query the application name that was set
// in the GPU crash dump description.
uint32_t applicationNameLength = 0;
AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GpuCrashDump_GetDescriptionSize(decoder, GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationName,
&applicationNameLength));
std::vector<char> applicationName(applicationNameLength, '\0');
AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GpuCrashDump_GetDescription(decoder, GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationName,
uint32_t(applicationName.size()), applicationName.data()));
// Create a unique file name for writing the crash dump data to a file.
// Note: due to an Nsight Aftermath bug (will be fixed in an upcoming
// driver release) we may see redundant crash dumps. As a workaround,
// attach a unique count to each generated file name.
static int count = 0;
const std::string base_file_name =
std::string(applicationName.data()) + "-" + std::to_string(baseInfo.pid) + "-" + std::to_string(++count);
// Write the crash dump data to a file using the .nv-gpudmp extension
// registered with Nsight Graphics.
const std::string crash_dump_file_name = base_file_name + ".nv-gpudmp";
const std::filesystem::path crash_dump_file_path(std::filesystem::absolute(crash_dump_file_name));
LOGE("\n--------------------------------------------------------------\n");
LOGE("Writing Aftermath dump file to:\n %s", crash_dump_file_path.string().c_str());
LOGE("\n--------------------------------------------------------------\n");
std::ofstream dump_file(crash_dump_file_path, std::ios::out | std::ios::binary);
if(dump_file)
{
dump_file.write(static_cast<const char*>(pGpuCrashDump), gpuCrashDumpSize);
dump_file.close();
}
// Decode the crash dump to a JSON string.
// Step 1: Generate the JSON and get the size.
uint32_t jsonSize = 0;
AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GpuCrashDump_GenerateJSON(
decoder, GFSDK_Aftermath_GpuCrashDumpDecoderFlags_ALL_INFO, GFSDK_Aftermath_GpuCrashDumpFormatterFlags_NONE,
shaderDebugInfoLookupCallback, shaderLookupCallback, shaderSourceDebugInfoLookupCallback, this, &jsonSize));
// Step 2: Allocate a buffer and fetch the generated JSON.
std::vector<char> json(jsonSize);
AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GpuCrashDump_GetJSON(decoder, uint32_t(json.size()), json.data()));
// Write the crash dump data as JSON to a file.
const std::string json_file_name = crash_dump_file_name + ".json";
const std::filesystem::path json_file_path(std::filesystem::absolute(json_file_name));
LOGE("\n--------------------------------------------------------------\n");
LOGE("Writing JSON dump file to:\n %s", json_file_path.string().c_str());
LOGE("\n--------------------------------------------------------------\n");
std::ofstream json_file(json_file_path, std::ios::out | std::ios::binary);
if(json_file)
{
// Write the JSON to the file (excluding string termination)
json_file.write(json.data(), json.size() - 1);
json_file.close();
}
// Destroy the GPU crash dump decoder object.
AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GpuCrashDump_DestroyDecoder(decoder));
}
// Helper for writing shader debug information to a file
void GpuCrashTrackerImpl::writeShaderDebugInformationToFile(GFSDK_Aftermath_ShaderDebugInfoIdentifier identifier,
const void* pShaderDebugInfo,
const uint32_t shaderDebugInfoSize)
{
// Create a unique file name.
const std::string file_path = "shader-" + std::to_string(identifier) + ".nvdbg";
std::ofstream f(file_path, std::ios::out | std::ios::binary);
if(f)
{
f.write(static_cast<const char*>(pShaderDebugInfo), shaderDebugInfoSize);
}
}
// Handler for shader debug information lookup callbacks.
// This is used by the JSON decoder for mapping shader instruction
// addresses to SPIR-V IL lines or GLSL source lines.
void GpuCrashTrackerImpl::onShaderDebugInfoLookup(const GFSDK_Aftermath_ShaderDebugInfoIdentifier& identifier,
PFN_GFSDK_Aftermath_SetData setShaderDebugInfo) const
{
// Search the list of shader debug information blobs received earlier.
auto i_debug_info = m_shaderDebugInfo.find(identifier);
if(i_debug_info == m_shaderDebugInfo.end())
{
// Early exit, nothing found. No need to call setShaderDebugInfo.
return;
}
// Let the GPU crash dump decoder know about the shader debug information
// that was found.
setShaderDebugInfo(i_debug_info->second.data(), static_cast<uint32_t>(i_debug_info->second.size()));
}
// Handler for shader lookup callbacks.
// This is used by the JSON decoder for mapping shader instruction
// addresses to SPIR-V IL lines or GLSL source lines.
// NOTE: If the application loads stripped shader binaries (ie; --strip-all in spirv-remap),
// Aftermath will require access to both the stripped and the not stripped
// shader binaries.
void GpuCrashTrackerImpl::onShaderLookup(const GFSDK_Aftermath_ShaderBinaryHash& shaderHash, PFN_GFSDK_Aftermath_SetData setShaderBinary) const
{
// Find shader binary data for the shader hash in the shader database.
std::vector<uint32_t> shader_binary;
if(!findShaderBinary(shaderHash, shader_binary))
{
// Early exit, nothing found. No need to call setShaderBinary.
return;
}
// Let the GPU crash dump decoder know about the shader data
// that was found.
setShaderBinary(shader_binary.data(), sizeof(uint32_t) * static_cast<uint32_t>(shader_binary.size()));
}
// Handler for shader source debug info lookup callbacks.
// This is used by the JSON decoder for mapping shader instruction addresses to
// GLSL source lines, if the shaders used by the application were compiled with
// separate debug info data files.
void GpuCrashTrackerImpl::onShaderSourceDebugInfoLookup(const GFSDK_Aftermath_ShaderDebugName& shaderDebugName,
PFN_GFSDK_Aftermath_SetData setShaderBinary) const
{
// Find source debug info for the shader DebugName in the shader database.
std::vector<uint32_t> shader_binary;
if(!findShaderBinaryWithDebugData(shaderDebugName, shader_binary))
{
// Early exit, nothing found. No need to call setShaderBinary.
return;
}
// Let the GPU crash dump decoder know about the shader debug data that was
// found.
setShaderBinary(shader_binary.data(), sizeof(uint32_t) * static_cast<uint32_t>(shader_binary.size()));
}
// Static callback wrapper for OnCrashDump
void GpuCrashTrackerImpl::gpuCrashDumpCallback(const void* pGpuCrashDump, const uint32_t gpuCrashDumpSize, void* pUserData)
{
auto* p_gpu_crash_tracker = reinterpret_cast<GpuCrashTrackerImpl*>(pUserData);
p_gpu_crash_tracker->onCrashDump(pGpuCrashDump, gpuCrashDumpSize);
}
// Static callback wrapper for OnShaderDebugInfo
void GpuCrashTrackerImpl::shaderDebugInfoCallback(const void* pShaderDebugInfo, const uint32_t shaderDebugInfoSize, void* pUserData)
{
auto* p_gpu_crash_tracker = reinterpret_cast<GpuCrashTrackerImpl*>(pUserData);
p_gpu_crash_tracker->onShaderDebugInfo(pShaderDebugInfo, shaderDebugInfoSize);
}
// Static callback wrapper for OnDescription
void GpuCrashTrackerImpl::crashDumpDescriptionCallback(PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription addDescription, void* pUserData)
{
auto* p_gpu_crash_tracker = reinterpret_cast<GpuCrashTrackerImpl*>(pUserData);
p_gpu_crash_tracker->onDescription(addDescription);
}
// Static callback wrapper for OnResolveMarker
void GpuCrashTrackerImpl::resolveMarkerCallback(const void* pMarker,
const uint32_t markerDataSize,
void* pUserData,
void** resolvedMarkerData,
uint32_t* markerSize)
{
auto* p_gpu_crash_tracker = reinterpret_cast<GpuCrashTrackerImpl*>(pUserData);
p_gpu_crash_tracker->onResolveMarker(pMarker, resolvedMarkerData, markerSize);
}
// Static callback wrapper for OnShaderDebugInfoLookup
void GpuCrashTrackerImpl::shaderDebugInfoLookupCallback(const GFSDK_Aftermath_ShaderDebugInfoIdentifier* pIdentifier,
PFN_GFSDK_Aftermath_SetData setShaderDebugInfo,
void* pUserData)
{
auto* p_gpu_crash_tracker = reinterpret_cast<GpuCrashTrackerImpl*>(pUserData);
p_gpu_crash_tracker->onShaderDebugInfoLookup(*pIdentifier, setShaderDebugInfo);
}
// Static callback wrapper for OnShaderLookup
void GpuCrashTrackerImpl::shaderLookupCallback(const GFSDK_Aftermath_ShaderBinaryHash* pShaderHash,
PFN_GFSDK_Aftermath_SetData setShaderBinary,
void* pUserData)
{
auto* p_gpu_crash_tracker = reinterpret_cast<GpuCrashTrackerImpl*>(pUserData);
p_gpu_crash_tracker->onShaderLookup(*pShaderHash, setShaderBinary);
}
// Static callback wrapper for OnShaderSourceDebugInfoLookup
void GpuCrashTrackerImpl::shaderSourceDebugInfoLookupCallback(const GFSDK_Aftermath_ShaderDebugName* pShaderDebugName,
PFN_GFSDK_Aftermath_SetData setShaderBinary,
void* pUserData)
{
auto* p_gpu_crash_tracker = reinterpret_cast<GpuCrashTrackerImpl*>(pUserData);
p_gpu_crash_tracker->onShaderSourceDebugInfoLookup(*pShaderDebugName, setShaderBinary);
}
void GpuCrashTrackerImpl::addShaderBinary(std::vector<uint32_t>& data)
{
// Create shader hash for the shader
const GFSDK_Aftermath_SpirvCode shader{data.data(), static_cast<uint32_t>(data.size())};
GFSDK_Aftermath_ShaderBinaryHash shaderHash{};
AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GetShaderHashSpirv(GFSDK_Aftermath_Version_API, &shader, &shaderHash));
// Store the data for shader mapping when decoding GPU crash dumps.
// cf. FindShaderBinary()
m_shaderBinaries[shaderHash] = data;
}
void GpuCrashTrackerImpl::addShaderBinaryWithDebugInfo(std::vector<uint32_t>& data, std::vector<uint32_t>& strippedData)
{
// Generate shader debug name.
GFSDK_Aftermath_ShaderDebugName debugName{};
const GFSDK_Aftermath_SpirvCode shader{data.data(), static_cast<uint32_t>(data.size())};
const GFSDK_Aftermath_SpirvCode strippedShader{strippedData.data(), static_cast<uint32_t>(strippedData.size())};
AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GetShaderDebugNameSpirv(GFSDK_Aftermath_Version_API, &shader, &strippedShader, &debugName));
// Store the data for shader instruction address mapping when decoding GPU crash dumps.
// cf. FindShaderBinaryWithDebugData()
m_shaderBinariesWithDebugInfo[debugName] = data;
}
// Find a shader binary by shader hash.
bool GpuCrashTrackerImpl::findShaderBinary(const GFSDK_Aftermath_ShaderBinaryHash& shaderHash, std::vector<uint32_t>& shader) const
{
// Find shader binary data for the shader hash
auto i_shader = m_shaderBinaries.find(shaderHash);
if(i_shader == m_shaderBinaries.end())
{
// Nothing found.
return false;
}
shader = i_shader->second;
return true;
}
// Find a shader binary with debug information by shader debug name.
bool GpuCrashTrackerImpl::findShaderBinaryWithDebugData(const GFSDK_Aftermath_ShaderDebugName& shaderDebugName,
std::vector<uint32_t>& shader) const
{
// Find shader binary for the shader debug name.
auto i_shader = m_shaderBinariesWithDebugInfo.find(shaderDebugName);
if(i_shader == m_shaderBinariesWithDebugInfo.end())
{
// Nothing found.
return false;
}
shader = i_shader->second;
return true;
}
// Global marker map
static GpuCrashTrackerImpl::MarkerMap g_marker_map;
GpuCrashTracker::GpuCrashTracker()
: m_pimpl(new GpuCrashTrackerImpl(g_marker_map))
{
}
GpuCrashTracker::~GpuCrashTracker()
{
delete m_pimpl;
}
void GpuCrashTracker::initialize()
{
m_pimpl->initialize();
}
} // namespace nvvk
#else
namespace nvvk {
GpuCrashTracker::GpuCrashTracker()
: m_pimpl(nullptr)
{
}
GpuCrashTracker::~GpuCrashTracker() {}
void GpuCrashTracker::initialize() {}
} // namespace nvvk
#endif

View file

@ -0,0 +1,35 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
namespace nvvk {
//// @DOC_SKIP
class GpuCrashTracker
{
public:
GpuCrashTracker();
~GpuCrashTracker();
void initialize(); // Initialize the GPU crash dump tracker.
private:
class GpuCrashTrackerImpl* m_pimpl;
};
} //namespace nvvk

View file

@ -0,0 +1,222 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "pipeline_vk.hpp"
#include <inttypes.h>
#include <nvh/nvprint.hpp>
namespace nvvk {
void nvprintPipelineStats(VkDevice device, VkPipeline pipeline, const char* name, bool verbose /*= false*/)
{
VkPipelineInfoKHR pipeInfo = {VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR};
pipeInfo.pipeline = pipeline;
if(!pipeline)
return;
std::vector<VkPipelineExecutablePropertiesKHR> props;
uint32_t executableCount = 0;
vkGetPipelineExecutablePropertiesKHR(device, &pipeInfo, &executableCount, nullptr);
props.resize(executableCount, {VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR});
vkGetPipelineExecutablePropertiesKHR(device, &pipeInfo, &executableCount, props.data());
nvprintfLevel(LOGLEVEL_STATS, "VkPipeline stats for %p, %s\n", pipeline, name);
nvprintfLevel(LOGLEVEL_STATS, "---------------------------\n");
for(uint32_t i = 0; i < executableCount; i++)
{
const VkPipelineExecutablePropertiesKHR& prop = props[i];
nvprintfLevel(LOGLEVEL_STATS, "- Executable: %s\n", prop.name);
if(verbose)
nvprintfLevel(LOGLEVEL_STATS, " (%s)\n", prop.description);
nvprintfLevel(LOGLEVEL_STATS, " - stages: 0x%08X\n", prop.stages);
nvprintfLevel(LOGLEVEL_STATS, " - subgroupSize: %2d\n", prop.subgroupSize);
VkPipelineExecutableInfoKHR execInfo = {VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR};
execInfo.pipeline = pipeline;
execInfo.executableIndex = i;
uint32_t statsCount = 0;
std::vector<VkPipelineExecutableStatisticKHR> stats;
vkGetPipelineExecutableStatisticsKHR(device, &execInfo, &statsCount, nullptr);
stats.resize(statsCount, {VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_STATISTIC_KHR});
vkGetPipelineExecutableStatisticsKHR(device, &execInfo, &statsCount, stats.data());
for(uint32_t s = 0; s < statsCount; s++)
{
const VkPipelineExecutableStatisticKHR& stat = stats[s];
switch(stat.format)
{
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR:
nvprintfLevel(LOGLEVEL_STATS, " - %s: %d\n", stat.name, stat.value.b32);
break;
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
nvprintfLevel(LOGLEVEL_STATS, " - %s: %" PRIi64 "\n", stat.name, stat.value.i64);
break;
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
nvprintfLevel(LOGLEVEL_STATS, " - %s: %" PRIu64 "\n", stat.name, stat.value.u64);
break;
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
nvprintfLevel(LOGLEVEL_STATS, " - %s: %f\n", stat.name, stat.value.f64);
break;
}
if(verbose)
nvprintfLevel(LOGLEVEL_STATS, " (%s)\n", stat.description);
}
}
nvprintfLevel(LOGLEVEL_STATS, "\n");
}
void dumpPipelineStats(VkDevice device, VkPipeline pipeline, const char* fileName)
{
VkPipelineInfoKHR pipeInfo = {VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR};
pipeInfo.pipeline = pipeline;
if(!pipeline)
return;
FILE* fdump = fopen(fileName, "wt");
if(!fdump)
return;
std::vector<VkPipelineExecutablePropertiesKHR> props;
uint32_t executableCount = 0;
vkGetPipelineExecutablePropertiesKHR(device, &pipeInfo, &executableCount, nullptr);
props.resize(executableCount, {VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR});
vkGetPipelineExecutablePropertiesKHR(device, &pipeInfo, &executableCount, props.data());
fprintf(fdump, "VkPipeline stats for %p\n", pipeline);
fprintf(fdump, "-----------------------\n");
for(uint32_t i = 0; i < executableCount; i++)
{
const VkPipelineExecutablePropertiesKHR& prop = props[i];
fprintf(fdump, "- Executable: %s\n", prop.name);
fprintf(fdump, " (%s)\n", prop.description);
fprintf(fdump, " - stages: 0x%08X\n", prop.stages);
fprintf(fdump, " - subgroupSize: %2d\n", prop.subgroupSize);
VkPipelineExecutableInfoKHR execInfo = {VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR};
execInfo.pipeline = pipeline;
execInfo.executableIndex = i;
uint32_t statsCount = 0;
std::vector<VkPipelineExecutableStatisticKHR> stats;
vkGetPipelineExecutableStatisticsKHR(device, &execInfo, &statsCount, nullptr);
stats.resize(statsCount, {VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_STATISTIC_KHR});
vkGetPipelineExecutableStatisticsKHR(device, &execInfo, &statsCount, stats.data());
for(uint32_t s = 0; s < statsCount; s++)
{
const VkPipelineExecutableStatisticKHR& stat = stats[s];
switch(stat.format)
{
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR:
fprintf(fdump, " - %s: %d\n", stat.name, stat.value.b32);
break;
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
fprintf(fdump, " - %s: %" PRIi64 "\n", stat.name, stat.value.i64);
break;
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
fprintf(fdump, " - %s: %" PRIu64 "\n", stat.name, stat.value.u64);
break;
case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
fprintf(fdump, " - %s: %f\n", stat.name, stat.value.f64);
break;
}
fprintf(fdump, " (%s)\n", stat.description);
}
}
fprintf(fdump, "\n");
fclose(fdump);
}
static inline std::string stringFormat(const char* msg, ...)
{
char text[1024];
va_list list;
if(msg == 0)
return std::string();
va_start(list, msg);
vsnprintf(text, sizeof(text), msg, list);
va_end(list);
return std::string(text);
}
void dumpPipelineInternals(VkDevice device, VkPipeline pipeline, const char* baseFileName)
{
VkPipelineInfoKHR pipeInfo = {VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR};
pipeInfo.pipeline = pipeline;
if(!pipeline)
return;
std::vector<VkPipelineExecutablePropertiesKHR> props;
uint32_t executableCount = 0;
vkGetPipelineExecutablePropertiesKHR(device, &pipeInfo, &executableCount, nullptr);
props.resize(executableCount, {VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR});
vkGetPipelineExecutablePropertiesKHR(device, &pipeInfo, &executableCount, props.data());
for(uint32_t e = 0; e < executableCount; e++)
{
const VkPipelineExecutablePropertiesKHR& prop = props[e];
VkPipelineExecutableInfoKHR execInfo = {VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR};
execInfo.pipeline = pipeline;
execInfo.executableIndex = e;
uint32_t internalCount = 0;
vkGetPipelineExecutableInternalRepresentationsKHR(device, &execInfo, &internalCount, nullptr);
if(internalCount)
{
std::vector<VkPipelineExecutableInternalRepresentationKHR> internals(
internalCount, {VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INTERNAL_REPRESENTATION_KHR});
vkGetPipelineExecutableInternalRepresentationsKHR(device, &execInfo, &internalCount, internals.data());
size_t offset = 0;
for(uint32_t i = 0; i < internalCount; i++)
{
offset += internals[i].dataSize;
}
std::vector<uint8_t> rawBytes(offset);
offset = 0;
for(uint32_t i = 0; i < internalCount; i++)
{
internals[i].pData = &rawBytes[offset];
offset += internals[i].dataSize;
}
vkGetPipelineExecutableInternalRepresentationsKHR(device, &execInfo, &internalCount, internals.data());
for(uint32_t i = 0; i < internalCount; i++)
{
bool isText = strstr(internals[i].name, "text") != nullptr;
std::string fileName = std::string(baseFileName) + "." + std::string(prop.name) + stringFormat(".%d.", e)
+ internals[i].name + stringFormat(".%d.%s", i, isText ? "txt" : "bin");
FILE* f = fopen(fileName.c_str(), "wb");
if(f)
{
fwrite(internals[i].pData, internals[i].dataSize, 1, f);
fclose(f);
}
}
}
}
}
} // namespace nvvk

View file

@ -0,0 +1,767 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <cassert>
#include <iterator>
#include <string>
#include <vector>
#include <vulkan/vulkan_core.h>
namespace nvvk {
//--------------------------------------------------------------------------------------------------
/** @DOC_START
# functions in nvvk
- nvprintPipelineStats : prints stats of the pipeline using VK_KHR_pipeline_executable_properties (don't forget to enable extension and set VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR)
- dumpPipelineStats : dumps stats of the pipeline using VK_KHR_pipeline_executable_properties to a text file (don't forget to enable extension and set VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR)
- dumpPipelineBinCodes : dumps shader binaries using VK_KHR_pipeline_executable_properties to multiple binary files (don't forget to enable extension and set VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)
@DOEC_END */
// nvprints stats to LOGLEVEL_STATS stream
void nvprintPipelineStats(VkDevice device, VkPipeline pipeline, const char* name, bool verbose = false);
// writes stats into single file
void dumpPipelineStats(VkDevice device, VkPipeline pipeline, const char* fileName);
// creates multiple files, one for each pipe executable and representation.
// The baseFilename will get appended along the lines of ".some details.bin"
void dumpPipelineInternals(VkDevice device, VkPipeline pipeline, const char* baseFileName);
//--------------------------------------------------------------------------------------------------
/** @DOC_START
# struct nvvk::GraphicsPipelineState
Most graphic pipelines have similar states, therefore the helper `GraphicsPipelineStage` holds all the elements and
initialize the structures with the proper default values, such as the primitive type, `PipelineColorBlendAttachmentState`
with their mask, `DynamicState` for viewport and scissor, adjust depth test if enabled, line width to 1 pixel, for
example.
Example of usage :
```cpp
nvvk::GraphicsPipelineState pipelineState();
pipelineState.depthStencilState.setDepthTestEnable(true);
pipelineState.rasterizationState.setCullMode(vk::CullModeFlagBits::eNone);
pipelineState.addBindingDescription({0, sizeof(Vertex)});
pipelineState.addAttributeDescriptions ({
{0, 0, vk::Format::eR32G32B32Sfloat, static_cast<uint32_t>(offsetof(Vertex, pos))},
{1, 0, vk::Format::eR32G32B32Sfloat, static_cast<uint32_t>(offsetof(Vertex, nrm))},
{2, 0, vk::Format::eR32G32B32Sfloat, static_cast<uint32_t>(offsetof(Vertex, col))}});
```
@DOC_END */
struct GraphicsPipelineState
{
// Initialize the state to common values: triangle list topology, depth test enabled,
// dynamic viewport and scissor, one render target, blending disabled
GraphicsPipelineState()
{
rasterizationState.flags = {};
rasterizationState.depthClampEnable = {};
rasterizationState.rasterizerDiscardEnable = {};
setValue(rasterizationState.polygonMode, VK_POLYGON_MODE_FILL);
setValue(rasterizationState.cullMode, VK_CULL_MODE_BACK_BIT);
setValue(rasterizationState.frontFace, VK_FRONT_FACE_COUNTER_CLOCKWISE);
rasterizationState.depthBiasEnable = {};
rasterizationState.depthBiasConstantFactor = {};
rasterizationState.depthBiasClamp = {};
rasterizationState.depthBiasSlopeFactor = {};
rasterizationState.lineWidth = 1.f;
inputAssemblyState.flags = {};
setValue(inputAssemblyState.topology, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST);
inputAssemblyState.primitiveRestartEnable = {};
colorBlendState.flags = {};
colorBlendState.logicOpEnable = {};
setValue(colorBlendState.logicOp, VK_LOGIC_OP_CLEAR);
colorBlendState.attachmentCount = {};
colorBlendState.pAttachments = {};
for(int i = 0; i < 4; i++)
{
colorBlendState.blendConstants[i] = 0.f;
}
dynamicState.flags = {};
dynamicState.dynamicStateCount = {};
dynamicState.pDynamicStates = {};
vertexInputState.flags = {};
vertexInputState.vertexBindingDescriptionCount = {};
vertexInputState.pVertexBindingDescriptions = {};
vertexInputState.vertexAttributeDescriptionCount = {};
vertexInputState.pVertexAttributeDescriptions = {};
viewportState.flags = {};
viewportState.viewportCount = {};
viewportState.pViewports = {};
viewportState.scissorCount = {};
viewportState.pScissors = {};
depthStencilState.flags = {};
depthStencilState.depthTestEnable = VK_TRUE;
depthStencilState.depthWriteEnable = VK_TRUE;
setValue(depthStencilState.depthCompareOp, VK_COMPARE_OP_LESS_OR_EQUAL);
depthStencilState.depthBoundsTestEnable = {};
depthStencilState.stencilTestEnable = {};
setValue(depthStencilState.front, VkStencilOpState());
setValue(depthStencilState.back, VkStencilOpState());
depthStencilState.minDepthBounds = {};
depthStencilState.maxDepthBounds = {};
setValue(multisampleState.rasterizationSamples, VK_SAMPLE_COUNT_1_BIT);
}
GraphicsPipelineState(const GraphicsPipelineState& src) = default;
// Attach the pointer values of the structures to the internal arrays
void update()
{
colorBlendState.attachmentCount = (uint32_t)blendAttachmentStates.size();
colorBlendState.pAttachments = blendAttachmentStates.data();
dynamicState.dynamicStateCount = (uint32_t)dynamicStateEnables.size();
dynamicState.pDynamicStates = dynamicStateEnables.data();
vertexInputState.vertexAttributeDescriptionCount = static_cast<uint32_t>(attributeDescriptions.size());
vertexInputState.vertexBindingDescriptionCount = static_cast<uint32_t>(bindingDescriptions.size());
vertexInputState.pVertexBindingDescriptions = bindingDescriptions.data();
vertexInputState.pVertexAttributeDescriptions = attributeDescriptions.data();
if(viewports.empty())
{
viewportState.viewportCount = 1;
viewportState.pViewports = nullptr;
}
else
{
viewportState.viewportCount = (uint32_t)viewports.size();
viewportState.pViewports = viewports.data();
}
if(scissors.empty())
{
viewportState.scissorCount = 1;
viewportState.pScissors = nullptr;
}
else
{
viewportState.scissorCount = (uint32_t)scissors.size();
viewportState.pScissors = scissors.data();
}
}
static inline VkPipelineColorBlendAttachmentState makePipelineColorBlendAttachmentState(
VkColorComponentFlags colorWriteMask_ = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT
| VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
VkBool32 blendEnable_ = 0,
VkBlendFactor srcColorBlendFactor_ = VK_BLEND_FACTOR_ZERO,
VkBlendFactor dstColorBlendFactor_ = VK_BLEND_FACTOR_ZERO,
VkBlendOp colorBlendOp_ = VK_BLEND_OP_ADD,
VkBlendFactor srcAlphaBlendFactor_ = VK_BLEND_FACTOR_ZERO,
VkBlendFactor dstAlphaBlendFactor_ = VK_BLEND_FACTOR_ZERO,
VkBlendOp alphaBlendOp_ = VK_BLEND_OP_ADD)
{
VkPipelineColorBlendAttachmentState res;
res.blendEnable = blendEnable_;
res.srcColorBlendFactor = srcColorBlendFactor_;
res.dstColorBlendFactor = dstColorBlendFactor_;
res.colorBlendOp = colorBlendOp_;
res.srcAlphaBlendFactor = srcAlphaBlendFactor_;
res.dstAlphaBlendFactor = dstAlphaBlendFactor_;
res.alphaBlendOp = alphaBlendOp_;
res.colorWriteMask = colorWriteMask_;
return res;
}
static inline VkVertexInputBindingDescription makeVertexInputBinding(uint32_t binding, uint32_t stride, VkVertexInputRate rate = VK_VERTEX_INPUT_RATE_VERTEX)
{
VkVertexInputBindingDescription vertexBinding;
vertexBinding.binding = binding;
vertexBinding.inputRate = rate;
vertexBinding.stride = stride;
return vertexBinding;
}
static inline VkVertexInputAttributeDescription makeVertexInputAttribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset)
{
VkVertexInputAttributeDescription attrib;
attrib.binding = binding;
attrib.location = location;
attrib.format = format;
attrib.offset = offset;
return attrib;
}
void clearBlendAttachmentStates() { blendAttachmentStates.clear(); }
void setBlendAttachmentCount(uint32_t attachmentCount) { blendAttachmentStates.resize(attachmentCount); }
void setBlendAttachmentState(uint32_t attachment, const VkPipelineColorBlendAttachmentState& blendState)
{
assert(attachment < blendAttachmentStates.size());
if(attachment <= blendAttachmentStates.size())
{
blendAttachmentStates[attachment] = blendState;
}
}
void setBlendAttachmentColorMask(uint32_t attachment, VkColorComponentFlags mask)
{
assert(attachment < blendAttachmentStates.size());
if(attachment <= blendAttachmentStates.size())
{
blendAttachmentStates[attachment].colorWriteMask = mask;
}
}
uint32_t addBlendAttachmentState(const VkPipelineColorBlendAttachmentState& blendState)
{
blendAttachmentStates.push_back(blendState);
return (uint32_t)(blendAttachmentStates.size() - 1);
}
void clearDynamicStateEnables() { dynamicStateEnables.clear(); }
void setDynamicStateEnablesCount(uint32_t dynamicStateCount) { dynamicStateEnables.resize(dynamicStateCount); }
void setDynamicStateEnable(uint32_t state, VkDynamicState dynamicState)
{
assert(state < dynamicStateEnables.size());
if(state <= dynamicStateEnables.size())
{
dynamicStateEnables[state] = dynamicState;
}
}
uint32_t addDynamicStateEnable(VkDynamicState dynamicState)
{
dynamicStateEnables.push_back(dynamicState);
return (uint32_t)(dynamicStateEnables.size() - 1);
}
void clearBindingDescriptions() { bindingDescriptions.clear(); }
void setBindingDescriptionsCount(uint32_t bindingDescriptionCount)
{
bindingDescriptions.resize(bindingDescriptionCount);
}
void setBindingDescription(uint32_t binding, VkVertexInputBindingDescription bindingDescription)
{
assert(binding < bindingDescriptions.size());
if(binding <= bindingDescriptions.size())
{
bindingDescriptions[binding] = bindingDescription;
}
}
uint32_t addBindingDescription(const VkVertexInputBindingDescription& bindingDescription)
{
bindingDescriptions.push_back(bindingDescription);
return (uint32_t)(bindingDescriptions.size() - 1);
}
void addBindingDescriptions(const std::vector<VkVertexInputBindingDescription>& bindingDescriptions_)
{
bindingDescriptions.insert(bindingDescriptions.end(), bindingDescriptions_.begin(), bindingDescriptions_.end());
}
void clearAttributeDescriptions() { attributeDescriptions.clear(); }
void setAttributeDescriptionsCount(uint32_t attributeDescriptionCount)
{
attributeDescriptions.resize(attributeDescriptionCount);
}
void setAttributeDescription(uint32_t attribute, const VkVertexInputAttributeDescription& attributeDescription)
{
assert(attribute < attributeDescriptions.size());
if(attribute <= attributeDescriptions.size())
{
attributeDescriptions[attribute] = attributeDescription;
}
}
uint32_t addAttributeDescription(const VkVertexInputAttributeDescription& attributeDescription)
{
attributeDescriptions.push_back(attributeDescription);
return (uint32_t)(attributeDescriptions.size() - 1);
}
void addAttributeDescriptions(const std::vector<VkVertexInputAttributeDescription>& attributeDescriptions_)
{
attributeDescriptions.insert(attributeDescriptions.end(), attributeDescriptions_.begin(), attributeDescriptions_.end());
}
void clearViewports() { viewports.clear(); }
void setViewportsCount(uint32_t viewportCount) { viewports.resize(viewportCount); }
void setViewport(uint32_t attribute, VkViewport viewport)
{
assert(attribute < viewports.size());
if(attribute <= viewports.size())
{
viewports[attribute] = viewport;
}
}
uint32_t addViewport(VkViewport viewport)
{
viewports.push_back(viewport);
return (uint32_t)(viewports.size() - 1);
}
void clearScissors() { scissors.clear(); }
void setScissorsCount(uint32_t scissorCount) { scissors.resize(scissorCount); }
void setScissor(uint32_t attribute, VkRect2D scissor)
{
assert(attribute < scissors.size());
if(attribute <= scissors.size())
{
scissors[attribute] = scissor;
}
}
uint32_t addScissor(VkRect2D scissor)
{
scissors.push_back(scissor);
return (uint32_t)(scissors.size() - 1);
}
VkPipelineInputAssemblyStateCreateInfo inputAssemblyState{VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO};
VkPipelineRasterizationStateCreateInfo rasterizationState{VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO};
VkPipelineMultisampleStateCreateInfo multisampleState{VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO};
VkPipelineDepthStencilStateCreateInfo depthStencilState{VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO};
VkPipelineViewportStateCreateInfo viewportState{VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO};
VkPipelineDynamicStateCreateInfo dynamicState{VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO};
VkPipelineColorBlendStateCreateInfo colorBlendState{VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO};
VkPipelineVertexInputStateCreateInfo vertexInputState{VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO};
protected:
std::vector<VkPipelineColorBlendAttachmentState> blendAttachmentStates{makePipelineColorBlendAttachmentState()};
std::vector<VkDynamicState> dynamicStateEnables = {VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR};
std::vector<VkVertexInputBindingDescription> bindingDescriptions;
std::vector<VkVertexInputAttributeDescription> attributeDescriptions;
std::vector<VkViewport> viewports;
std::vector<VkRect2D> scissors;
// Helper to set objects for either C and C++
template <class T, class U>
void setValue(T& target, const U& val)
{
target = (T)(val);
}
};
//--------------------------------------------------------------------------------------------------
/** @DOC_START
# struct nvvk::GraphicsPipelineGenerator
The graphics pipeline generator takes a GraphicsPipelineState object and pipeline-specific information such as
the render pass and pipeline layout to generate the final pipeline.
Example of usage :
```cpp
nvvk::GraphicsPipelineState pipelineState();
...
nvvk::GraphicsPipelineGenerator pipelineGenerator(m_device, m_pipelineLayout, m_renderPass, pipelineState);
pipelineGenerator.addShader(readFile("spv/vert_shader.vert.spv"), VkShaderStageFlagBits::eVertex);
pipelineGenerator.addShader(readFile("spv/frag_shader.frag.spv"), VkShaderStageFlagBits::eFragment);
m_pipeline = pipelineGenerator.createPipeline();
```
@DOC_END */
struct GraphicsPipelineGenerator
{
public:
GraphicsPipelineGenerator(GraphicsPipelineState& pipelineState_)
: pipelineState(pipelineState_)
{
init();
}
GraphicsPipelineGenerator(const GraphicsPipelineGenerator& src)
: createInfo(src.createInfo)
, device(src.device)
, pipelineCache(src.pipelineCache)
, pipelineState(src.pipelineState)
{
init();
}
GraphicsPipelineGenerator(VkDevice device_, const VkPipelineLayout& layout, const VkRenderPass& renderPass, GraphicsPipelineState& pipelineState_)
: device(device_)
, pipelineState(pipelineState_)
{
createInfo.layout = layout;
createInfo.renderPass = renderPass;
init();
}
// For VK_KHR_dynamic_rendering
using PipelineRenderingCreateInfo = VkPipelineRenderingCreateInfo;
GraphicsPipelineGenerator(VkDevice device_,
const VkPipelineLayout& layout,
const PipelineRenderingCreateInfo& pipelineRenderingCreateInfo,
GraphicsPipelineState& pipelineState_)
: device(device_)
, pipelineState(pipelineState_)
{
createInfo.layout = layout;
setPipelineRenderingCreateInfo(pipelineRenderingCreateInfo);
init();
}
const GraphicsPipelineGenerator& operator=(const GraphicsPipelineGenerator& src)
{
device = src.device;
pipelineState = src.pipelineState;
createInfo = src.createInfo;
pipelineCache = src.pipelineCache;
init();
return *this;
}
void setDevice(VkDevice device_) { device = device_; }
void setRenderPass(VkRenderPass renderPass)
{
createInfo.renderPass = renderPass;
createInfo.pNext = nullptr;
}
void setPipelineRenderingCreateInfo(const PipelineRenderingCreateInfo& pipelineRenderingCreateInfo)
{
// Deep copy
assert(pipelineRenderingCreateInfo.pNext == nullptr); // Update deep copy if needed.
dynamicRenderingInfo = pipelineRenderingCreateInfo;
if(dynamicRenderingInfo.colorAttachmentCount != 0)
{
dynamicRenderingColorFormats.assign(dynamicRenderingInfo.pColorAttachmentFormats,
dynamicRenderingInfo.pColorAttachmentFormats + dynamicRenderingInfo.colorAttachmentCount);
dynamicRenderingInfo.pColorAttachmentFormats = dynamicRenderingColorFormats.data();
}
// Set VkGraphicsPipelineCreateInfo::pNext to point to deep copy of extension struct.
// NB: Will have to change if more than 1 extension struct needs to be supported.
createInfo.pNext = &dynamicRenderingInfo;
}
void setLayout(VkPipelineLayout layout) { createInfo.layout = layout; }
~GraphicsPipelineGenerator() { destroyShaderModules(); }
VkPipelineShaderStageCreateInfo& addShader(const std::string& code, VkShaderStageFlagBits stage, const char* entryPoint = "main")
{
std::vector<char> v;
std::copy(code.begin(), code.end(), std::back_inserter(v));
return addShader(v, stage, entryPoint);
}
template <typename T>
VkPipelineShaderStageCreateInfo& addShader(const std::vector<T>& code, VkShaderStageFlagBits stage, const char* entryPoint = "main")
{
VkShaderModuleCreateInfo createInfo{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
createInfo.codeSize = sizeof(T) * code.size();
createInfo.pCode = reinterpret_cast<const uint32_t*>(code.data());
VkShaderModule shaderModule;
vkCreateShaderModule(device, &createInfo, nullptr, &shaderModule);
temporaryModules.push_back(shaderModule);
return addShader(shaderModule, stage, entryPoint);
}
VkPipelineShaderStageCreateInfo& addShader(VkShaderModule shaderModule, VkShaderStageFlagBits stage, const char* entryPoint = "main")
{
VkPipelineShaderStageCreateInfo shaderStage{VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO};
shaderStage.stage = (VkShaderStageFlagBits)stage;
shaderStage.module = shaderModule;
shaderStage.pName = entryPoint;
shaderStages.push_back(shaderStage);
return shaderStages.back();
}
void clearShaders()
{
shaderStages.clear();
destroyShaderModules();
}
VkShaderModule getShaderModule(size_t index) const
{
if(index < shaderStages.size())
return shaderStages[index].module;
return VK_NULL_HANDLE;
}
VkPipeline createPipeline(const VkPipelineCache& cache)
{
update();
VkPipeline pipeline;
vkCreateGraphicsPipelines(device, cache, 1, (VkGraphicsPipelineCreateInfo*)&createInfo, nullptr, &pipeline);
return pipeline;
}
VkPipeline createPipeline() { return createPipeline(pipelineCache); }
void destroyShaderModules()
{
for(const auto& shaderModule : temporaryModules)
{
vkDestroyShaderModule(device, shaderModule, nullptr);
}
temporaryModules.clear();
}
void update()
{
createInfo.stageCount = static_cast<uint32_t>(shaderStages.size());
createInfo.pStages = shaderStages.data();
pipelineState.update();
}
VkGraphicsPipelineCreateInfo createInfo{VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO};
private:
VkDevice device;
VkPipelineCache pipelineCache{};
std::vector<VkPipelineShaderStageCreateInfo> shaderStages;
std::vector<VkShaderModule> temporaryModules;
std::vector<VkFormat> dynamicRenderingColorFormats;
GraphicsPipelineState& pipelineState;
PipelineRenderingCreateInfo dynamicRenderingInfo{VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO};
void init()
{
createInfo.pRasterizationState = &pipelineState.rasterizationState;
createInfo.pInputAssemblyState = &pipelineState.inputAssemblyState;
createInfo.pColorBlendState = &pipelineState.colorBlendState;
createInfo.pMultisampleState = &pipelineState.multisampleState;
createInfo.pViewportState = &pipelineState.viewportState;
createInfo.pDepthStencilState = &pipelineState.depthStencilState;
createInfo.pDynamicState = &pipelineState.dynamicState;
createInfo.pVertexInputState = &pipelineState.vertexInputState;
}
// Helper to set objects for either C and C++
template <class T, class U>
void setValue(T& target, const U& val)
{
target = (T)(val);
}
};
//--------------------------------------------------------------------------------------------------
/** @DOC_START
# class nvvk::GraphicsPipelineGeneratorCombined
In some cases the application may have each state associated to a single pipeline. For convenience,
nvvk::GraphicsPipelineGeneratorCombined combines both the state and generator into a single object.
Example of usage :
```cpp
nvvk::GraphicsPipelineGeneratorCombined pipelineGenerator(m_device, m_pipelineLayout, m_renderPass);
pipelineGenerator.depthStencilState.setDepthTestEnable(true);
pipelineGenerator.rasterizationState.setCullMode(vk::CullModeFlagBits::eNone);
pipelineGenerator.addBindingDescription({0, sizeof(Vertex)});
pipelineGenerator.addAttributeDescriptions ({
{0, 0, vk::Format::eR32G32B32Sfloat, static_cast<uint32_t>(offsetof(Vertex, pos))},
{1, 0, vk::Format::eR32G32B32Sfloat, static_cast<uint32_t>(offsetof(Vertex, nrm))},
{2, 0, vk::Format::eR32G32B32Sfloat, static_cast<uint32_t>(offsetof(Vertex, col))}});
pipelineGenerator.addShader(readFile("spv/vert_shader.vert.spv"), VkShaderStageFlagBits::eVertex);
pipelineGenerator.addShader(readFile("spv/frag_shader.frag.spv"), VkShaderStageFlagBits::eFragment);
m_pipeline = pipelineGenerator.createPipeline();
```
@DOC_END */
struct GraphicsPipelineGeneratorCombined : public GraphicsPipelineState, public GraphicsPipelineGenerator
{
GraphicsPipelineGeneratorCombined(VkDevice device_, const VkPipelineLayout& layout, const VkRenderPass& renderPass)
: GraphicsPipelineState()
, GraphicsPipelineGenerator(device_, layout, renderPass, *this)
{
}
};
//--------------------------------------------------------------------------------------------------
/** @DOC_START
# struct nvvk::GraphicShaderObjectPipeline
This is a helper to set the dynamic graphics pipeline state for shader object
- Set the pipeline state as you would do for a regular pipeline
- Call cmdSetPipelineState to set the pipeline state in the command buffer
Example of usage :
```cpp
// Member of the class
nvvk::GraphicShaderObjectPipeline m_shaderObjPipeline;
// Creation of the dynamic graphic pipeline
m_shaderObjPipeline.rasterizationState.cullMode = VK_CULL_MODE_NONE;
m_shaderObjPipeline.addBindingDescriptions({{0, sizeof(nvh::PrimitiveVertex)}});
m_shaderObjPipeline.addAttributeDescriptions({
{0, 0, VK_FORMAT_R32G32B32_SFLOAT, static_cast<uint32_t>(offsetof(nvh::PrimitiveVertex, p))}, // Position
{1, 0, VK_FORMAT_R32G32B32_SFLOAT, static_cast<uint32_t>(offsetof(nvh::PrimitiveVertex, n))}, // Normal
});
m_shaderObjPipeline.update();
// In the drawing
m_shaderObjPipeline.setViewportScissor(m_app->getViewportSize());
m_shaderObjPipeline.cmdSetPipelineState(cmd);
```
@DOC_START */
struct GraphicShaderObjectPipeline : GraphicsPipelineState
{
VkSampleMask sampleMask{~0U};
std::vector<VkVertexInputBindingDescription2EXT> vertexBindingDescriptions2;
std::vector<VkColorBlendEquationEXT> colorBlendEquationState;
std::vector<VkBool32> colorBlendEnables;
std::vector<VkBool32> colorWriteMasks;
std::vector<VkVertexInputAttributeDescription2EXT> vertexAttributeDescriptions2;
GraphicShaderObjectPipeline()
{
viewports.resize(1); // There should be at least one viewport
scissors.resize(1); //
}
// Set the viewport and scissor to the full extent
void setViewportScissor(const VkExtent2D& extent)
{
viewports[0].x = 0;
viewports[0].y = 0;
viewports[0].width = float(extent.width);
viewports[0].height = float(extent.height);
viewports[0].minDepth = 0;
viewports[0].maxDepth = 1;
scissors[0].offset = {0, 0};
scissors[0].extent = extent;
}
// Update the internal state
void update()
{
GraphicsPipelineState::update();
multisampleState.pSampleMask = &sampleMask;
vertexBindingDescriptions2.resize(vertexInputState.vertexBindingDescriptionCount);
for(uint32_t i = 0; i < vertexInputState.vertexBindingDescriptionCount; i++)
{
vertexBindingDescriptions2[i].sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT;
vertexBindingDescriptions2[i].binding = vertexInputState.pVertexBindingDescriptions[i].binding;
vertexBindingDescriptions2[i].inputRate = vertexInputState.pVertexBindingDescriptions[i].inputRate;
vertexBindingDescriptions2[i].stride = vertexInputState.pVertexBindingDescriptions[i].stride;
vertexBindingDescriptions2[i].divisor = 1;
}
vertexAttributeDescriptions2.resize(vertexInputState.vertexAttributeDescriptionCount);
for(uint32_t i = 0; i < vertexInputState.vertexAttributeDescriptionCount; i++)
{
vertexAttributeDescriptions2[i].sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT;
vertexAttributeDescriptions2[i].binding = vertexInputState.pVertexAttributeDescriptions[i].binding;
vertexAttributeDescriptions2[i].format = vertexInputState.pVertexAttributeDescriptions[i].format;
vertexAttributeDescriptions2[i].location = vertexInputState.pVertexAttributeDescriptions[i].location;
vertexAttributeDescriptions2[i].offset = vertexInputState.pVertexAttributeDescriptions[i].offset;
}
colorBlendEquationState.resize(colorBlendState.attachmentCount);
colorBlendEnables.resize(colorBlendState.attachmentCount);
colorWriteMasks.resize(colorBlendState.attachmentCount);
for(uint32_t i = 0; i < colorBlendState.attachmentCount; i++)
{
colorBlendEquationState[i].srcColorBlendFactor = colorBlendState.pAttachments[i].srcColorBlendFactor;
colorBlendEquationState[i].dstColorBlendFactor = colorBlendState.pAttachments[i].dstColorBlendFactor;
colorBlendEquationState[i].colorBlendOp = colorBlendState.pAttachments[i].colorBlendOp;
colorBlendEquationState[i].srcAlphaBlendFactor = colorBlendState.pAttachments[i].srcAlphaBlendFactor;
colorBlendEquationState[i].dstAlphaBlendFactor = colorBlendState.pAttachments[i].dstAlphaBlendFactor;
colorBlendEquationState[i].alphaBlendOp = colorBlendState.pAttachments[i].alphaBlendOp;
colorBlendEnables[i] = colorBlendState.pAttachments[i].blendEnable;
colorWriteMasks[i] = colorBlendState.pAttachments[i].colorWriteMask;
}
}
// Set the pipeline state in the command buffer
void cmdSetPipelineState(VkCommandBuffer cmd)
{
vkCmdSetViewportWithCount(cmd, viewportState.viewportCount, viewportState.pViewports);
vkCmdSetScissorWithCount(cmd, viewportState.scissorCount, viewportState.pScissors);
vkCmdSetLineWidth(cmd, rasterizationState.lineWidth);
vkCmdSetDepthBias(cmd, rasterizationState.depthBiasConstantFactor, rasterizationState.depthBiasClamp,
rasterizationState.depthBiasSlopeFactor);
vkCmdSetCullMode(cmd, rasterizationState.cullMode);
vkCmdSetFrontFace(cmd, rasterizationState.frontFace);
vkCmdSetDepthBiasEnable(cmd, rasterizationState.depthBiasEnable);
vkCmdSetRasterizerDiscardEnable(cmd, rasterizationState.rasterizerDiscardEnable);
vkCmdSetDepthClampEnableEXT(cmd, rasterizationState.depthClampEnable);
vkCmdSetPolygonModeEXT(cmd, rasterizationState.polygonMode);
vkCmdSetBlendConstants(cmd, colorBlendState.blendConstants);
vkCmdSetDepthBounds(cmd, depthStencilState.minDepthBounds, depthStencilState.maxDepthBounds);
vkCmdSetDepthBoundsTestEnable(cmd, depthStencilState.depthBoundsTestEnable);
vkCmdSetDepthCompareOp(cmd, depthStencilState.depthCompareOp);
vkCmdSetDepthTestEnable(cmd, depthStencilState.depthTestEnable);
vkCmdSetDepthWriteEnable(cmd, depthStencilState.depthWriteEnable);
vkCmdSetStencilTestEnable(cmd, depthStencilState.stencilTestEnable);
vkCmdSetPrimitiveRestartEnable(cmd, inputAssemblyState.primitiveRestartEnable);
vkCmdSetPrimitiveTopology(cmd, inputAssemblyState.topology);
vkCmdSetRasterizationSamplesEXT(cmd, multisampleState.rasterizationSamples);
vkCmdSetSampleMaskEXT(cmd, multisampleState.rasterizationSamples, multisampleState.pSampleMask);
vkCmdSetAlphaToCoverageEnableEXT(cmd, multisampleState.alphaToCoverageEnable);
vkCmdSetAlphaToOneEnableEXT(cmd, multisampleState.alphaToOneEnable);
vkCmdSetVertexInputEXT(cmd, vertexInputState.vertexBindingDescriptionCount, vertexBindingDescriptions2.data(),
vertexInputState.vertexAttributeDescriptionCount, vertexAttributeDescriptions2.data());
vkCmdSetColorBlendEquationEXT(cmd, 0, colorBlendState.attachmentCount, colorBlendEquationState.data());
vkCmdSetColorBlendEnableEXT(cmd, 0, colorBlendState.attachmentCount, colorBlendEnables.data());
vkCmdSetColorWriteMaskEXT(cmd, 0, colorBlendState.attachmentCount, colorWriteMasks.data());
vkCmdSetLogicOpEnableEXT(cmd, colorBlendState.logicOpEnable);
}
};
} // namespace nvvk

View file

@ -0,0 +1,200 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "profiler_vk.hpp"
#include "debug_util_vk.hpp"
#include "error_vk.hpp"
#include <assert.h>
//////////////////////////////////////////////////////////////////////////
namespace nvvk {
void ProfilerVK::init(VkDevice device, VkPhysicalDevice physicalDevice, int queueFamilyIndex)
{
assert(!m_device);
m_device = device;
#if 0
m_useCoreHostReset = supportsCoreHostReset;
#endif
VkPhysicalDeviceProperties properties;
vkGetPhysicalDeviceProperties(physicalDevice, &properties);
m_frequency = properties.limits.timestampPeriod;
std::vector<VkQueueFamilyProperties> queueProperties;
uint32_t queueFamilyCount = 0;
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, nullptr);
queueProperties.resize(queueFamilyCount);
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, queueProperties.data());
uint32_t validBits = queueProperties[queueFamilyIndex].timestampValidBits;
m_queueFamilyMask = validBits == 64 ? uint64_t(-1) : ((uint64_t(1) << validBits) - uint64_t(1));
resize();
}
void ProfilerVK::deinit()
{
if(m_queryPool)
{
vkDestroyQueryPool(m_device, m_queryPool, nullptr);
m_queryPool = VK_NULL_HANDLE;
}
m_device = VK_NULL_HANDLE;
}
void ProfilerVK::setLabelUsage(bool state)
{
m_useLabels = state;
}
void ProfilerVK::resize()
{
if(getRequiredTimers() < m_queryPoolSize)
return;
if(m_queryPool)
{
// FIXME we may loose results this way
// not exactly efficient, but when timers changed a lot, we have a slow frame anyway
// cleaner would be allocating more pools
VkResult result = vkDeviceWaitIdle(m_device);
if(nvvk::checkResult(result, __FILE__, __LINE__))
{
exit(-1);
}
vkDestroyQueryPool(m_device, m_queryPool, nullptr);
}
VkQueryPoolCreateInfo createInfo = {VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO};
createInfo.queryType = VK_QUERY_TYPE_TIMESTAMP;
createInfo.queryCount = getRequiredTimers();
m_queryPoolSize = createInfo.queryCount;
VkResult res = vkCreateQueryPool(m_device, &createInfo, nullptr, &m_queryPool);
assert(res == VK_SUCCESS);
nvvk::DebugUtil(m_device).setObjectName(m_queryPool, m_debugName);
}
nvh::Profiler::SectionID ProfilerVK::beginSection(const char* name, VkCommandBuffer cmd, bool singleShot, bool useHostReset)
{
nvh::Profiler::gpuTimeProvider_fn fnProvider = [&](SectionID i, uint32_t queryFrame, double& gpuTime) {
return getSectionTime(i, queryFrame, gpuTime);
};
SectionID slot = Profiler::beginSection(name, "VK ", fnProvider, singleShot);
if(getRequiredTimers() > m_queryPoolSize)
{
resize();
}
if(m_useLabels)
{
VkDebugUtilsLabelEXT label = {VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT};
label.pLabelName = name;
label.color[1] = 1.0f;
vkCmdBeginDebugUtilsLabelEXT(cmd, &label);
}
#if 0
else if(m_useMarkers)
{
VkDebugMarkerMarkerInfoEXT marker = {VK_STRUCTURE_TYPE_DEBUG_MARKER_MARKER_INFO_EXT};
marker.pMarkerName = name;
vkCmdDebugMarkerBeginEXT(cmd, &marker);
}
#endif
uint32_t idx = getTimerIdx(slot, getSubFrame(slot), true);
if(useHostReset)
{
#if 0
if(m_useCoreHostReset)
{
vkResetQueryPool(m_device, m_queryPool, idx, 2);
}
else
#endif
{
vkResetQueryPoolEXT(m_device, m_queryPool, idx, 2);
}
}
else
{
// not ideal to do this per query
vkCmdResetQueryPool(cmd, m_queryPool, idx, 2);
}
// log timestamp
vkCmdWriteTimestamp(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_queryPool, idx);
return slot;
}
void ProfilerVK::endSection(SectionID slot, VkCommandBuffer cmd)
{
uint32_t idx = getTimerIdx(slot, getSubFrame(slot), false);
vkCmdWriteTimestamp(cmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_queryPool, idx);
if(m_useLabels)
{
vkCmdEndDebugUtilsLabelEXT(cmd);
}
#if 0
else if(m_useMarkers)
{
vkCmdDebugMarkerEndEXT(cmd);
}
#endif
Profiler::endSection(slot);
}
bool ProfilerVK::getSectionTime(SectionID i, uint32_t queryFrame, double& gpuTime)
{
bool isRecurring = isSectionRecurring(i);
uint32_t idxBegin = getTimerIdx(i, queryFrame, true);
uint32_t idxEnd = getTimerIdx(i, queryFrame, false);
assert(idxEnd == idxBegin + 1);
uint64_t times[2];
VkResult result = vkGetQueryPoolResults(m_device, m_queryPool, idxBegin, 2, sizeof(uint64_t) * 2, times, sizeof(uint64_t),
VK_QUERY_RESULT_64_BIT | (isRecurring ? VK_QUERY_RESULT_WAIT_BIT : 0));
// validation layer bug, complains if VK_QUERY_RESULT_WAIT_BIT is not provided, even if we wait
// through another fence for the buffer containing the problem
// fixed in VK SDK fixed with 1.1.126, but we keep old logic still here
if(result == VK_SUCCESS)
{
uint64_t mask = m_queueFamilyMask;
gpuTime = (double((times[1] & mask) - (times[0] & mask)) * double(m_frequency)) / double(1000);
return true;
}
else
{
return false;
}
}
} // namespace nvvk

View file

@ -0,0 +1,168 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include "nvh/profiler.hpp"
#include <string>
#include <vulkan/vulkan_core.h>
namespace nvvk {
//////////////////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::ProfilerVK
nvvk::ProfilerVK derives from nvh::Profiler and uses vkCmdWriteTimestamp
to measure the gpu time within a section.
If profiler.setLabelUsage(true) was used then it will make use
of vkCmdDebugMarkerBeginEXT and vkCmdDebugMarkerEndEXT for each
section so that it shows up in tools like NsightGraphics and renderdoc.
Currently the commandbuffers must support vkCmdResetQueryPool as well.
When multiple queues are used there could be problems with the "nesting"
of sections. In that case multiple profilers, one per queue, are most
likely better.
Example:
```cpp
nvvk::ProfilerVK profiler;
std::string profilerStats;
profiler.init(device, physicalDevice, queueFamilyIndex);
profiler.setLabelUsage(true); // depends on VK_EXT_debug_utils
while(true)
{
profiler.beginFrame();
... setup frame ...
{
// use the Section class to time the scope
auto sec = profiler.timeRecurring("draw", cmd);
vkCmdDraw(cmd, ...);
}
... submit cmd buffer ...
profiler.endFrame();
// generic print to string
profiler.print(profilerStats);
// or access data directly
nvh::Profiler::TimerInfo info;
if( profiler.getTimerInfo("draw", info)) {
// do some updates
updateProfilerUi("draw", info.gpu.average);
}
}
```
@DOC_END */
class ProfilerVK : public nvh::Profiler
{
public:
// hostReset usage depends on VK_EXT_host_query_reset
// mandatory for transfer-only queues
//////////////////////////////////////////////////////////////////////////
// utility class to call begin/end within local scope
class Section
{
public:
Section(ProfilerVK& profiler, const char* name, VkCommandBuffer cmd, bool singleShot = false, bool hostReset = false)
: m_profiler(profiler)
{
m_id = profiler.beginSection(name, cmd, singleShot, hostReset);
m_cmd = cmd;
}
~Section() { m_profiler.endSection(m_id, m_cmd); }
private:
SectionID m_id;
VkCommandBuffer m_cmd;
ProfilerVK& m_profiler;
};
// recurring, must be within beginFrame/endFrame
Section timeRecurring(const char* name, VkCommandBuffer cmd, bool hostReset = false)
{
return Section(*this, name, cmd, false, hostReset);
}
// singleShot, results are available after FRAME_DELAY many endFrame
Section timeSingle(const char* name, VkCommandBuffer cmd, bool hostReset = false)
{
return Section(*this, name, cmd, true, hostReset);
}
//////////////////////////////////////////////////////////////////////////
ProfilerVK(nvh::Profiler* masterProfiler = nullptr)
: Profiler(masterProfiler)
{
m_debugName = "nvvk::ProfilerVK:" + std::to_string((uint64_t)this);
}
ProfilerVK(VkDevice device, VkPhysicalDevice physicalDevice, nvh::Profiler* masterProfiler = nullptr)
: Profiler(masterProfiler)
{
init(device, physicalDevice);
}
~ProfilerVK() { deinit(); }
void init(VkDevice device, VkPhysicalDevice physicalDevice, int queueFamilyIndex = 0);
void deinit();
void setDebugName(const std::string& name) { m_debugName = name; }
// enable debug label per section, requires VK_EXT_debug_utils
void setLabelUsage(bool state);
SectionID beginSection(const char* name, VkCommandBuffer cmd, bool singleShot = false, bool hostReset = false);
void endSection(SectionID slot, VkCommandBuffer cmd);
bool getSectionTime(SectionID i, uint32_t queryFrame, double& gpuTime);
private:
void resize();
bool m_useLabels = false;
#if 0
bool m_useCoreHostReset = false;
#endif
VkDevice m_device = VK_NULL_HANDLE;
VkQueryPool m_queryPool = VK_NULL_HANDLE;
uint32_t m_queryPoolSize = 0;
float m_frequency = 1.0f;
uint64_t m_queueFamilyMask = ~0;
std::string m_debugName;
};
} // namespace nvvk

View file

@ -0,0 +1,417 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
/** @DOC_START
# class nvvk::RayPickerKHR
nvvk::RayPickerKHR is a utility to get hit information under a screen coordinate.
The information returned is:
- origin and direction in world space
- hitT, the distance of the hit along the ray direction
- primitiveID, instanceID and instanceCustomIndex
- the barycentric coordinates in the triangle
Setting up:
- call setup() once with the Vulkan device, and allocator
- call setTlas with the TLAS previously build
Getting results, for example, on mouse down:
- fill the PickInfo structure
- call run()
- call getResult() to get all the information above
Example to set the camera interest point
```cpp
RayPickerKHR::PickResult pr = m_picker.getResult();
if(pr.instanceID != ~0) // Hit something
{
glm::vec3 worldPos = pr.worldRayOrigin + pr.worldRayDirection * pr.hitT;
glm::vec3 eye, center, up;
CameraManip.getLookat(eye, center, up);
CameraManip.setLookat(eye, worldPos, up, false); // Nice with CameraManip.updateAnim();
}
```
@DOC_END */
#include <glm/glm.hpp>
#include "nvvk/commands_vk.hpp"
#include "nvvk/debug_util_vk.hpp"
#include "nvvk/descriptorsets_vk.hpp"
#include "nvvk/resourceallocator_vk.hpp"
#include "nvvk/shaders_vk.hpp"
#include "nvvk/context_vk.hpp"
namespace nvvk {
struct RayPickerKHR
{
public:
struct PickInfo
{
glm::mat4 modelViewInv; // inverse model view matrix
glm::mat4 perspectiveInv; // inverse perspective matrix
float pickX{0}; // normalized X position
float pickY{0}; // normalized Y position
} m_pickInfo;
struct PickResult
{
glm::vec4 worldRayOrigin{0, 0, 0, 0};
glm::vec4 worldRayDirection{0, 0, 0, 0};
float hitT{0};
int primitiveID{0};
int instanceID{~0};
int instanceCustomIndex{0};
glm::vec3 baryCoord{0, 0, 0};
};
RayPickerKHR() = default;
RayPickerKHR(nvvk::Context* ctx, nvvk::ResourceAllocator* allocator, uint32_t queueFamilyIndex = 0)
{
setup(ctx->m_device, ctx->m_physicalDevice, queueFamilyIndex, allocator);
}
void setup(const VkDevice& device, const VkPhysicalDevice& physicalDevice, uint32_t queueFamilyIndex, nvvk::ResourceAllocator* allocator)
{
m_physicalDevice = physicalDevice;
m_device = device;
m_queueFamilyIndex = queueFamilyIndex;
m_debug.setup(device);
m_alloc = allocator;
createOutputResult();
createDescriptorSet();
createPipeline();
}
// tlas : top acceleration structure
void setTlas(const VkAccelerationStructureKHR& tlas)
{
VkWriteDescriptorSetAccelerationStructureKHR descAsInfo{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR};
descAsInfo.accelerationStructureCount = 1;
descAsInfo.pAccelerationStructures = &tlas;
VkDescriptorBufferInfo pickDesc{m_pickResult.buffer, 0, VK_WHOLE_SIZE};
std::vector<VkWriteDescriptorSet> writes;
writes.emplace_back(m_binding.makeWrite(m_descSet, 0, &descAsInfo));
writes.emplace_back(m_binding.makeWrite(m_descSet, 1, &pickDesc));
vkUpdateDescriptorSets(m_device, static_cast<uint32_t>(writes.size()), writes.data(), 0, nullptr);
}
bool isValid() { return m_pipeline != VK_NULL_HANDLE; }
void run(const VkCommandBuffer& cmdBuf, const PickInfo& pickInfo)
{
m_pickInfo = pickInfo;
vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipeline);
vkCmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipelineLayout, 0, 1, &m_descSet, 0, nullptr);
vkCmdPushConstants(cmdBuf, m_pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(PickInfo), &m_pickInfo);
vkCmdDispatch(cmdBuf, 1, 1, 1); // one pixel
// Wait for result
VkBufferMemoryBarrier bmb{VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER};
bmb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
bmb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
bmb.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bmb.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
bmb.buffer = m_pickResult.buffer;
bmb.size = VK_WHOLE_SIZE;
vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_DEPENDENCY_DEVICE_GROUP_BIT, 0, nullptr, 1, &bmb, 0, nullptr);
}
PickResult getResult()
{
PickResult pr;
void* mapped = m_alloc->map(m_pickResult);
memcpy(&pr, mapped, sizeof(PickResult));
m_alloc->unmap(m_pickResult);
return pr;
}
void destroy()
{
m_alloc->destroy(m_pickResult);
m_alloc->destroy(m_sbtBuffer);
vkDestroyDescriptorSetLayout(m_device, m_descSetLayout, nullptr);
vkDestroyDescriptorPool(m_device, m_descPool, nullptr);
vkDestroyPipeline(m_device, m_pipeline, nullptr);
vkDestroyPipelineLayout(m_device, m_pipelineLayout, nullptr);
m_pickResult = nvvk::Buffer();
m_descSetLayout = VK_NULL_HANDLE;
m_descSet = VK_NULL_HANDLE;
m_pipelineLayout = VK_NULL_HANDLE;
m_pipeline = VK_NULL_HANDLE;
m_descPool = VK_NULL_HANDLE;
}
private:
nvvk::Buffer m_pickResult;
nvvk::Buffer m_sbtBuffer;
nvvk::DescriptorSetBindings m_binding;
VkDescriptorPool m_descPool{VK_NULL_HANDLE};
VkDescriptorSetLayout m_descSetLayout{VK_NULL_HANDLE};
VkDescriptorSet m_descSet{VK_NULL_HANDLE};
VkPipelineLayout m_pipelineLayout{VK_NULL_HANDLE};
VkPipeline m_pipeline{VK_NULL_HANDLE};
VkPhysicalDevice m_physicalDevice{VK_NULL_HANDLE};
VkDevice m_device{VK_NULL_HANDLE};
uint32_t m_queueFamilyIndex{0};
nvvk::ResourceAllocator* m_alloc{nullptr};
nvvk::DebugUtil m_debug;
void createOutputResult()
{
nvvk::CommandPool sCmd(m_device, m_queueFamilyIndex);
VkCommandBuffer cmdBuf = sCmd.createCommandBuffer();
PickResult presult{};
m_pickResult = m_alloc->createBuffer(cmdBuf, sizeof(PickResult), &presult,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
sCmd.submitAndWait(cmdBuf);
m_alloc->finalizeAndReleaseStaging();
NAME_VK(m_pickResult.buffer);
}
void createDescriptorSet()
{
vkDestroyDescriptorSetLayout(m_device, m_descSetLayout, nullptr);
vkDestroyDescriptorPool(m_device, m_descPool, nullptr);
m_binding.clear();
m_binding.addBinding(0, VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, 1, VK_SHADER_STAGE_COMPUTE_BIT);
m_binding.addBinding(1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT);
m_descPool = m_binding.createPool(m_device);
m_descSetLayout = m_binding.createLayout(m_device);
VkDescriptorSetAllocateInfo allocateInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
allocateInfo.descriptorPool = m_descPool;
allocateInfo.descriptorSetCount = 1;
allocateInfo.pSetLayouts = &m_descSetLayout;
vkAllocateDescriptorSets(m_device, &allocateInfo, &m_descSet);
}
void createPipeline()
{
vkDestroyPipeline(m_device, m_pipeline, nullptr);
vkDestroyPipelineLayout(m_device, m_pipelineLayout, nullptr);
VkPushConstantRange pushConstant{VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(PickInfo)};
VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
pipelineLayoutCreateInfo.setLayoutCount = 1;
pipelineLayoutCreateInfo.pSetLayouts = &m_descSetLayout;
pipelineLayoutCreateInfo.pushConstantRangeCount = 1;
pipelineLayoutCreateInfo.pPushConstantRanges = &pushConstant;
vkCreatePipelineLayout(m_device, &pipelineLayoutCreateInfo, nullptr, &m_pipelineLayout);
NAME_VK(m_pipelineLayout);
VkComputePipelineCreateInfo computePipelineCreateInfo{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
computePipelineCreateInfo.layout = m_pipelineLayout;
computePipelineCreateInfo.stage = nvvk::createShaderStageInfo(m_device, getSpirV(), VK_SHADER_STAGE_COMPUTE_BIT);
vkCreateComputePipelines(m_device, {}, 1, &computePipelineCreateInfo, nullptr, &m_pipeline);
NAME_VK(m_pipeline);
vkDestroyShaderModule(m_device, computePipelineCreateInfo.stage.module, nullptr);
}
const std::vector<uint32_t> getSpirV()
{ // glslangValidator.exe --target-env vulkan1.2 --variable-name pick
//const uint32_t pick[] =
return {0x07230203, 0x00010500, 0x0008000a, 0x00000089, 0x00000000, 0x00020011, 0x00000001, 0x00020011, 0x00001178,
0x0006000a, 0x5f565053, 0x5f52484b, 0x5f796172, 0x72657571, 0x00000079, 0x0006000b, 0x00000001, 0x4c534c47,
0x6474732e, 0x3035342e, 0x00000000, 0x0003000e, 0x00000000, 0x00000001, 0x0008000f, 0x00000005, 0x00000004,
0x6e69616d, 0x00000000, 0x0000000e, 0x00000047, 0x0000005f, 0x00060010, 0x00000004, 0x00000011, 0x00000001,
0x00000001, 0x00000001, 0x00030003, 0x00000002, 0x000001cc, 0x00060004, 0x455f4c47, 0x725f5458, 0x715f7961,
0x79726575, 0x00000000, 0x00040005, 0x00000004, 0x6e69616d, 0x00000000, 0x00050005, 0x00000009, 0x65786970,
0x6e65436c, 0x00726574, 0x00050005, 0x0000000c, 0x736e6f43, 0x746e6174, 0x00000073, 0x00070006, 0x0000000c,
0x00000000, 0x65646f6d, 0x6569566c, 0x766e4977, 0x00000000, 0x00070006, 0x0000000c, 0x00000001, 0x73726570,
0x74636570, 0x49657669, 0x0000766e, 0x00050006, 0x0000000c, 0x00000002, 0x6b636970, 0x00000058, 0x00050006,
0x0000000c, 0x00000003, 0x6b636970, 0x00000059, 0x00030005, 0x0000000e, 0x00000000, 0x00030005, 0x00000018,
0x00000064, 0x00040005, 0x00000020, 0x6769726f, 0x00006e69, 0x00040005, 0x00000028, 0x67726174, 0x00007465,
0x00050005, 0x00000036, 0x65726964, 0x6f697463, 0x0000006e, 0x00050005, 0x00000044, 0x51796172, 0x79726575,
0x00000000, 0x00050005, 0x00000047, 0x4c706f74, 0x6c657665, 0x00005341, 0x00030005, 0x00000058, 0x00746968,
0x00050005, 0x0000005c, 0x6b636950, 0x75736552, 0x0000746c, 0x00070006, 0x0000005c, 0x00000000, 0x6c726f77,
0x79615264, 0x6769724f, 0x00006e69, 0x00080006, 0x0000005c, 0x00000001, 0x6c726f77, 0x79615264, 0x65726944,
0x6f697463, 0x0000006e, 0x00050006, 0x0000005c, 0x00000002, 0x54746968, 0x00000000, 0x00060006, 0x0000005c,
0x00000003, 0x6d697270, 0x76697469, 0x00444965, 0x00060006, 0x0000005c, 0x00000004, 0x74736e69, 0x65636e61,
0x00004449, 0x00080006, 0x0000005c, 0x00000005, 0x74736e69, 0x65636e61, 0x74737543, 0x6e496d6f, 0x00786564,
0x00060006, 0x0000005c, 0x00000006, 0x79726162, 0x726f6f43, 0x00000064, 0x00050005, 0x0000005d, 0x7365725f,
0x50746c75, 0x006b6369, 0x00060006, 0x0000005d, 0x00000000, 0x75736572, 0x6950746c, 0x00006b63, 0x00030005,
0x0000005f, 0x00000000, 0x00040005, 0x00000079, 0x79726162, 0x00000000, 0x00040048, 0x0000000c, 0x00000000,
0x00000005, 0x00050048, 0x0000000c, 0x00000000, 0x00000023, 0x00000000, 0x00050048, 0x0000000c, 0x00000000,
0x00000007, 0x00000010, 0x00040048, 0x0000000c, 0x00000001, 0x00000005, 0x00050048, 0x0000000c, 0x00000001,
0x00000023, 0x00000040, 0x00050048, 0x0000000c, 0x00000001, 0x00000007, 0x00000010, 0x00050048, 0x0000000c,
0x00000002, 0x00000023, 0x00000080, 0x00050048, 0x0000000c, 0x00000003, 0x00000023, 0x00000084, 0x00030047,
0x0000000c, 0x00000002, 0x00040047, 0x00000047, 0x00000022, 0x00000000, 0x00040047, 0x00000047, 0x00000021,
0x00000000, 0x00050048, 0x0000005c, 0x00000000, 0x00000023, 0x00000000, 0x00050048, 0x0000005c, 0x00000001,
0x00000023, 0x00000010, 0x00050048, 0x0000005c, 0x00000002, 0x00000023, 0x00000020, 0x00050048, 0x0000005c,
0x00000003, 0x00000023, 0x00000024, 0x00050048, 0x0000005c, 0x00000004, 0x00000023, 0x00000028, 0x00050048,
0x0000005c, 0x00000005, 0x00000023, 0x0000002c, 0x00050048, 0x0000005c, 0x00000006, 0x00000023, 0x00000030,
0x00050048, 0x0000005d, 0x00000000, 0x00000023, 0x00000000, 0x00030047, 0x0000005d, 0x00000002, 0x00040047,
0x0000005f, 0x00000022, 0x00000000, 0x00040047, 0x0000005f, 0x00000021, 0x00000001, 0x00020013, 0x00000002,
0x00030021, 0x00000003, 0x00000002, 0x00030016, 0x00000006, 0x00000020, 0x00040017, 0x00000007, 0x00000006,
0x00000002, 0x00040020, 0x00000008, 0x00000007, 0x00000007, 0x00040017, 0x0000000a, 0x00000006, 0x00000004,
0x00040018, 0x0000000b, 0x0000000a, 0x00000004, 0x0006001e, 0x0000000c, 0x0000000b, 0x0000000b, 0x00000006,
0x00000006, 0x00040020, 0x0000000d, 0x00000009, 0x0000000c, 0x0004003b, 0x0000000d, 0x0000000e, 0x00000009,
0x00040015, 0x0000000f, 0x00000020, 0x00000001, 0x0004002b, 0x0000000f, 0x00000010, 0x00000002, 0x00040020,
0x00000011, 0x00000009, 0x00000006, 0x0004002b, 0x0000000f, 0x00000014, 0x00000003, 0x0004002b, 0x00000006,
0x0000001a, 0x40000000, 0x0004002b, 0x00000006, 0x0000001c, 0x3f800000, 0x00040020, 0x0000001f, 0x00000007,
0x0000000a, 0x0004002b, 0x0000000f, 0x00000021, 0x00000000, 0x00040020, 0x00000022, 0x00000009, 0x0000000b,
0x0004002b, 0x00000006, 0x00000025, 0x00000000, 0x0007002c, 0x0000000a, 0x00000026, 0x00000025, 0x00000025,
0x00000025, 0x0000001c, 0x0004002b, 0x0000000f, 0x00000029, 0x00000001, 0x00040015, 0x0000002c, 0x00000020,
0x00000000, 0x0004002b, 0x0000002c, 0x0000002d, 0x00000000, 0x00040020, 0x0000002e, 0x00000007, 0x00000006,
0x0004002b, 0x0000002c, 0x00000031, 0x00000001, 0x00040017, 0x00000039, 0x00000006, 0x00000003, 0x00021178,
0x00000042, 0x00040020, 0x00000043, 0x00000007, 0x00000042, 0x000214dd, 0x00000045, 0x00040020, 0x00000046,
0x00000000, 0x00000045, 0x0004003b, 0x00000046, 0x00000047, 0x00000000, 0x0004002b, 0x0000002c, 0x00000049,
0x000000ff, 0x0004002b, 0x00000006, 0x0000004c, 0x3727c5ac, 0x0004002b, 0x00000006, 0x0000004f, 0x749dc5ae,
0x00020014, 0x00000055, 0x00040020, 0x00000057, 0x00000007, 0x00000055, 0x00030029, 0x00000055, 0x00000059,
0x0009001e, 0x0000005c, 0x0000000a, 0x0000000a, 0x00000006, 0x0000000f, 0x0000000f, 0x0000000f, 0x00000039,
0x0003001e, 0x0000005d, 0x0000005c, 0x00040020, 0x0000005e, 0x0000000c, 0x0000005d, 0x0004003b, 0x0000005e,
0x0000005f, 0x0000000c, 0x00040020, 0x00000061, 0x0000000c, 0x0000000a, 0x00040020, 0x00000066, 0x0000000c,
0x00000006, 0x00040020, 0x00000069, 0x0000000c, 0x0000000f, 0x0004002b, 0x0000000f, 0x0000006b, 0x00000004,
0x00040020, 0x0000006d, 0x00000007, 0x0000000f, 0x0004002b, 0x0000000f, 0x00000073, 0xffffffff, 0x0004002b,
0x0000000f, 0x00000076, 0x00000005, 0x0004002b, 0x0000000f, 0x0000007b, 0x00000006, 0x00040020, 0x00000087,
0x0000000c, 0x00000039, 0x00050036, 0x00000002, 0x00000004, 0x00000000, 0x00000003, 0x000200f8, 0x00000005,
0x0004003b, 0x00000008, 0x00000009, 0x00000007, 0x0004003b, 0x00000008, 0x00000018, 0x00000007, 0x0004003b,
0x0000001f, 0x00000020, 0x00000007, 0x0004003b, 0x0000001f, 0x00000028, 0x00000007, 0x0004003b, 0x0000001f,
0x00000036, 0x00000007, 0x0004003b, 0x00000043, 0x00000044, 0x00000007, 0x0004003b, 0x00000057, 0x00000058,
0x00000007, 0x0004003b, 0x0000006d, 0x0000006e, 0x00000007, 0x0004003b, 0x00000008, 0x00000079, 0x00000007,
0x00050041, 0x00000011, 0x00000012, 0x0000000e, 0x00000010, 0x0004003d, 0x00000006, 0x00000013, 0x00000012,
0x00050041, 0x00000011, 0x00000015, 0x0000000e, 0x00000014, 0x0004003d, 0x00000006, 0x00000016, 0x00000015,
0x00050050, 0x00000007, 0x00000017, 0x00000013, 0x00000016, 0x0003003e, 0x00000009, 0x00000017, 0x0004003d,
0x00000007, 0x00000019, 0x00000009, 0x0005008e, 0x00000007, 0x0000001b, 0x00000019, 0x0000001a, 0x00050050,
0x00000007, 0x0000001d, 0x0000001c, 0x0000001c, 0x00050083, 0x00000007, 0x0000001e, 0x0000001b, 0x0000001d,
0x0003003e, 0x00000018, 0x0000001e, 0x00050041, 0x00000022, 0x00000023, 0x0000000e, 0x00000021, 0x0004003d,
0x0000000b, 0x00000024, 0x00000023, 0x00050091, 0x0000000a, 0x00000027, 0x00000024, 0x00000026, 0x0003003e,
0x00000020, 0x00000027, 0x00050041, 0x00000022, 0x0000002a, 0x0000000e, 0x00000029, 0x0004003d, 0x0000000b,
0x0000002b, 0x0000002a, 0x00050041, 0x0000002e, 0x0000002f, 0x00000018, 0x0000002d, 0x0004003d, 0x00000006,
0x00000030, 0x0000002f, 0x00050041, 0x0000002e, 0x00000032, 0x00000018, 0x00000031, 0x0004003d, 0x00000006,
0x00000033, 0x00000032, 0x00070050, 0x0000000a, 0x00000034, 0x00000030, 0x00000033, 0x0000001c, 0x0000001c,
0x00050091, 0x0000000a, 0x00000035, 0x0000002b, 0x00000034, 0x0003003e, 0x00000028, 0x00000035, 0x00050041,
0x00000022, 0x00000037, 0x0000000e, 0x00000021, 0x0004003d, 0x0000000b, 0x00000038, 0x00000037, 0x0004003d,
0x0000000a, 0x0000003a, 0x00000028, 0x0008004f, 0x00000039, 0x0000003b, 0x0000003a, 0x0000003a, 0x00000000,
0x00000001, 0x00000002, 0x0006000c, 0x00000039, 0x0000003c, 0x00000001, 0x00000045, 0x0000003b, 0x00050051,
0x00000006, 0x0000003d, 0x0000003c, 0x00000000, 0x00050051, 0x00000006, 0x0000003e, 0x0000003c, 0x00000001,
0x00050051, 0x00000006, 0x0000003f, 0x0000003c, 0x00000002, 0x00070050, 0x0000000a, 0x00000040, 0x0000003d,
0x0000003e, 0x0000003f, 0x00000025, 0x00050091, 0x0000000a, 0x00000041, 0x00000038, 0x00000040, 0x0003003e,
0x00000036, 0x00000041, 0x0004003d, 0x00000045, 0x00000048, 0x00000047, 0x0004003d, 0x0000000a, 0x0000004a,
0x00000020, 0x0008004f, 0x00000039, 0x0000004b, 0x0000004a, 0x0000004a, 0x00000000, 0x00000001, 0x00000002,
0x0004003d, 0x0000000a, 0x0000004d, 0x00000036, 0x0008004f, 0x00000039, 0x0000004e, 0x0000004d, 0x0000004d,
0x00000000, 0x00000001, 0x00000002, 0x00091179, 0x00000044, 0x00000048, 0x0000002d, 0x00000049, 0x0000004b,
0x0000004c, 0x0000004e, 0x0000004f, 0x000200f9, 0x00000050, 0x000200f8, 0x00000050, 0x000400f6, 0x00000052,
0x00000053, 0x00000000, 0x000200f9, 0x00000054, 0x000200f8, 0x00000054, 0x0004117d, 0x00000055, 0x00000056,
0x00000044, 0x000400fa, 0x00000056, 0x00000051, 0x00000052, 0x000200f8, 0x00000051, 0x0002117c, 0x00000044,
0x000200f9, 0x00000053, 0x000200f8, 0x00000053, 0x000200f9, 0x00000050, 0x000200f8, 0x00000052, 0x0005117f,
0x0000002c, 0x0000005a, 0x00000044, 0x00000029, 0x000500ab, 0x00000055, 0x0000005b, 0x0000005a, 0x0000002d,
0x0003003e, 0x00000058, 0x0000005b, 0x0004003d, 0x0000000a, 0x00000060, 0x00000020, 0x00060041, 0x00000061,
0x00000062, 0x0000005f, 0x00000021, 0x00000021, 0x0003003e, 0x00000062, 0x00000060, 0x0004003d, 0x0000000a,
0x00000063, 0x00000036, 0x00060041, 0x00000061, 0x00000064, 0x0000005f, 0x00000021, 0x00000029, 0x0003003e,
0x00000064, 0x00000063, 0x00051782, 0x00000006, 0x00000065, 0x00000044, 0x00000029, 0x00060041, 0x00000066,
0x00000067, 0x0000005f, 0x00000021, 0x00000010, 0x0003003e, 0x00000067, 0x00000065, 0x00051787, 0x0000000f,
0x00000068, 0x00000044, 0x00000029, 0x00060041, 0x00000069, 0x0000006a, 0x0000005f, 0x00000021, 0x00000014,
0x0003003e, 0x0000006a, 0x00000068, 0x0004003d, 0x00000055, 0x0000006c, 0x00000058, 0x000300f7, 0x00000070,
0x00000000, 0x000400fa, 0x0000006c, 0x0000006f, 0x00000072, 0x000200f8, 0x0000006f, 0x00051784, 0x0000000f,
0x00000071, 0x00000044, 0x00000029, 0x0003003e, 0x0000006e, 0x00000071, 0x000200f9, 0x00000070, 0x000200f8,
0x00000072, 0x0003003e, 0x0000006e, 0x00000073, 0x000200f9, 0x00000070, 0x000200f8, 0x00000070, 0x0004003d,
0x0000000f, 0x00000074, 0x0000006e, 0x00060041, 0x00000069, 0x00000075, 0x0000005f, 0x00000021, 0x0000006b,
0x0003003e, 0x00000075, 0x00000074, 0x00051783, 0x0000000f, 0x00000077, 0x00000044, 0x00000029, 0x00060041,
0x00000069, 0x00000078, 0x0000005f, 0x00000021, 0x00000076, 0x0003003e, 0x00000078, 0x00000077, 0x00051788,
0x00000007, 0x0000007a, 0x00000044, 0x00000029, 0x0003003e, 0x00000079, 0x0000007a, 0x00050041, 0x0000002e,
0x0000007c, 0x00000079, 0x0000002d, 0x0004003d, 0x00000006, 0x0000007d, 0x0000007c, 0x00050083, 0x00000006,
0x0000007e, 0x0000001c, 0x0000007d, 0x00050041, 0x0000002e, 0x0000007f, 0x00000079, 0x00000031, 0x0004003d,
0x00000006, 0x00000080, 0x0000007f, 0x00050083, 0x00000006, 0x00000081, 0x0000007e, 0x00000080, 0x00050041,
0x0000002e, 0x00000082, 0x00000079, 0x0000002d, 0x0004003d, 0x00000006, 0x00000083, 0x00000082, 0x00050041,
0x0000002e, 0x00000084, 0x00000079, 0x00000031, 0x0004003d, 0x00000006, 0x00000085, 0x00000084, 0x00060050,
0x00000039, 0x00000086, 0x00000081, 0x00000083, 0x00000085, 0x00060041, 0x00000087, 0x00000088, 0x0000005f,
0x00000021, 0x0000007b, 0x0003003e, 0x00000088, 0x00000086, 0x000100fd, 0x00010038};
}
std::string getGlsl()
{
return R"(
#version 460
#extension GL_EXT_ray_query : require
// clang-format off
struct PickResult
{
vec4 worldRayOrigin;
vec4 worldRayDirection;
float hitT;
int primitiveID;
int instanceID;
int instanceCustomIndex;
vec3 baryCoord;
};
layout(set = 0, binding = 0) uniform accelerationStructureEXT topLevelAS;
layout(set = 0, binding = 1) buffer _resultPick { PickResult resultPick; };
layout(push_constant) uniform Constants
{
mat4 modelViewInv;
mat4 perspectiveInv;
float pickX; // normalized
float pickY;
};
void main()
{
const vec2 pixelCenter = vec2(pickX, pickY);
vec2 d = pixelCenter * 2.0 - 1.0;
vec4 origin = modelViewInv * vec4(0, 0, 0, 1);
vec4 target = perspectiveInv * vec4(d.x, d.y, 1, 1);
vec4 direction = modelViewInv * vec4(normalize(target.xyz), 0);
rayQueryEXT rayQuery;
rayQueryInitializeEXT(rayQuery, topLevelAS, 0, 0xff, origin.xyz, 0.00001, direction.xyz, 1e32);
while(rayQueryProceedEXT(rayQuery)) {rayQueryConfirmIntersectionEXT(rayQuery); }
bool hit = (rayQueryGetIntersectionTypeEXT(rayQuery, true) != gl_RayQueryCommittedIntersectionNoneEXT);
resultPick.worldRayOrigin = origin;
resultPick.worldRayDirection = direction;
resultPick.hitT = rayQueryGetIntersectionTEXT(rayQuery, true);
resultPick.primitiveID = rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, true);
resultPick.instanceID = hit ? rayQueryGetIntersectionInstanceIdEXT(rayQuery, true) : ~0;
resultPick.instanceCustomIndex = rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, true);
vec2 bary = rayQueryGetIntersectionBarycentricsEXT(rayQuery, true);
resultPick.baryCoord = vec3(1.0 - bary.x - bary.y, bary.x, bary.y);
}
// clang-format on
)";
}
};
} // namespace nvvk

View file

@ -0,0 +1,429 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "raytraceKHR_vk.hpp"
#include <cinttypes>
#include <numeric>
#include "nvh/timesampler.hpp"
//--------------------------------------------------------------------------------------------------
// Initializing the allocator and querying the raytracing properties
//
void nvvk::RaytracingBuilderKHR::setup(const VkDevice& device, nvvk::ResourceAllocator* allocator, uint32_t queueIndex)
{
m_device = device;
m_queueIndex = queueIndex;
m_debug.setup(device);
m_alloc = allocator;
}
//--------------------------------------------------------------------------------------------------
// Destroying all allocations
//
void nvvk::RaytracingBuilderKHR::destroy()
{
if(m_alloc)
{
for(auto& b : m_blas)
{
m_alloc->destroy(b);
}
m_alloc->destroy(m_tlas);
}
m_blas.clear();
}
//--------------------------------------------------------------------------------------------------
// Returning the constructed top-level acceleration structure
//
VkAccelerationStructureKHR nvvk::RaytracingBuilderKHR::getAccelerationStructure() const
{
return m_tlas.accel;
}
//--------------------------------------------------------------------------------------------------
// Return the device address of a Blas previously created.
//
VkDeviceAddress nvvk::RaytracingBuilderKHR::getBlasDeviceAddress(uint32_t blasId)
{
assert(size_t(blasId) < m_blas.size());
VkAccelerationStructureDeviceAddressInfoKHR addressInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR};
addressInfo.accelerationStructure = m_blas[blasId].accel;
return vkGetAccelerationStructureDeviceAddressKHR(m_device, &addressInfo);
}
//--------------------------------------------------------------------------------------------------
// Create all the BLAS from the vector of BlasInput
// - There will be one BLAS per input-vector entry
// - There will be as many BLAS as input.size()
// - The resulting BLAS (along with the inputs used to build) are stored in m_blas,
// and can be referenced by index.
// - if flag has the 'Compact' flag, the BLAS will be compacted
//
void nvvk::RaytracingBuilderKHR::buildBlas(const std::vector<BlasInput>& input, VkBuildAccelerationStructureFlagsKHR flags)
{
m_cmdPool.init(m_device, m_queueIndex);
auto nbBlas = static_cast<uint32_t>(input.size());
VkDeviceSize asTotalSize{0}; // Memory size of all allocated BLAS
uint32_t nbCompactions{0}; // Nb of BLAS requesting compaction
VkDeviceSize maxScratchSize{0}; // Largest scratch size
// Preparing the information for the acceleration build commands.
std::vector<BuildAccelerationStructure> buildAs(nbBlas);
for(uint32_t idx = 0; idx < nbBlas; idx++)
{
// Filling partially the VkAccelerationStructureBuildGeometryInfoKHR for querying the build sizes.
// Other information will be filled in the createBlas (see #2)
buildAs[idx].buildInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
buildAs[idx].buildInfo.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
buildAs[idx].buildInfo.flags = input[idx].flags | flags;
buildAs[idx].buildInfo.geometryCount = static_cast<uint32_t>(input[idx].asGeometry.size());
buildAs[idx].buildInfo.pGeometries = input[idx].asGeometry.data();
// Build range information
buildAs[idx].rangeInfo = input[idx].asBuildOffsetInfo.data();
// Finding sizes to create acceleration structures and scratch
std::vector<uint32_t> maxPrimCount(input[idx].asBuildOffsetInfo.size());
for(auto tt = 0; tt < input[idx].asBuildOffsetInfo.size(); tt++)
maxPrimCount[tt] = input[idx].asBuildOffsetInfo[tt].primitiveCount; // Number of primitives/triangles
vkGetAccelerationStructureBuildSizesKHR(m_device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,
&buildAs[idx].buildInfo, maxPrimCount.data(), &buildAs[idx].sizeInfo);
// Extra info
asTotalSize += buildAs[idx].sizeInfo.accelerationStructureSize;
maxScratchSize = std::max(maxScratchSize, buildAs[idx].sizeInfo.buildScratchSize);
nbCompactions += hasFlag(buildAs[idx].buildInfo.flags, VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR);
}
// Allocate the scratch buffers holding the temporary data of the acceleration structure builder
nvvk::Buffer scratchBuffer =
m_alloc->createBuffer(maxScratchSize, VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
VkBufferDeviceAddressInfo bufferInfo{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, nullptr, scratchBuffer.buffer};
VkDeviceAddress scratchAddress = vkGetBufferDeviceAddress(m_device, &bufferInfo);
NAME_VK(scratchBuffer.buffer);
// Allocate a query pool for storing the needed size for every BLAS compaction.
VkQueryPool queryPool{VK_NULL_HANDLE};
if(nbCompactions > 0) // Is compaction requested?
{
assert(nbCompactions == nbBlas); // Don't allow mix of on/off compaction
VkQueryPoolCreateInfo qpci{VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO};
qpci.queryCount = nbBlas;
qpci.queryType = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR;
vkCreateQueryPool(m_device, &qpci, nullptr, &queryPool);
}
// Batching creation/compaction of BLAS to allow staying in restricted amount of memory
std::vector<uint32_t> indices; // Indices of the BLAS to create
VkDeviceSize batchSize{0};
VkDeviceSize batchLimit{256'000'000}; // 256 MB
for(uint32_t idx = 0; idx < nbBlas; idx++)
{
indices.push_back(idx);
batchSize += buildAs[idx].sizeInfo.accelerationStructureSize;
// Over the limit or last BLAS element
if(batchSize >= batchLimit || idx == nbBlas - 1)
{
VkCommandBuffer cmdBuf = m_cmdPool.createCommandBuffer();
cmdCreateBlas(cmdBuf, indices, buildAs, scratchAddress, queryPool);
m_cmdPool.submitAndWait(cmdBuf);
if(queryPool)
{
VkCommandBuffer cmdBuf = m_cmdPool.createCommandBuffer();
cmdCompactBlas(cmdBuf, indices, buildAs, queryPool);
m_cmdPool.submitAndWait(cmdBuf); // Submit command buffer and call vkQueueWaitIdle
// Destroy the non-compacted version
destroyNonCompacted(indices, buildAs);
}
// Reset
batchSize = 0;
indices.clear();
}
}
// Logging reduction
if(queryPool)
{
VkDeviceSize compactSize = std::accumulate(buildAs.begin(), buildAs.end(), 0ULL, [](const auto& a, const auto& b) {
return a + b.sizeInfo.accelerationStructureSize;
});
const float fractionSmaller = (asTotalSize == 0) ? 0 : (asTotalSize - compactSize) / float(asTotalSize);
LOGI("%sRT BLAS: reducing from: %" PRIu64 " to: %" PRIu64 " = %" PRIu64 " (%2.2f%s smaller) \n",
nvh::ScopedTimer::indent().c_str(), asTotalSize, compactSize, asTotalSize - compactSize, fractionSmaller * 100.f, "%");
}
// Keeping all the created acceleration structures
for(auto& b : buildAs)
{
m_blas.emplace_back(b.as);
}
// Clean up
vkDestroyQueryPool(m_device, queryPool, nullptr);
m_alloc->finalizeAndReleaseStaging();
m_alloc->destroy(scratchBuffer);
m_cmdPool.deinit();
}
//--------------------------------------------------------------------------------------------------
// Creating the bottom level acceleration structure for all indices of `buildAs` vector.
// The array of BuildAccelerationStructure was created in buildBlas and the vector of
// indices limits the number of BLAS to create at once. This limits the amount of
// memory needed when compacting the BLAS.
void nvvk::RaytracingBuilderKHR::cmdCreateBlas(VkCommandBuffer cmdBuf,
std::vector<uint32_t> indices,
std::vector<BuildAccelerationStructure>& buildAs,
VkDeviceAddress scratchAddress,
VkQueryPool queryPool)
{
if(queryPool) // For querying the compaction size
vkResetQueryPool(m_device, queryPool, 0, static_cast<uint32_t>(indices.size()));
uint32_t queryCnt{0};
for(const auto& idx : indices)
{
// Actual allocation of buffer and acceleration structure.
VkAccelerationStructureCreateInfoKHR createInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR};
createInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
createInfo.size = buildAs[idx].sizeInfo.accelerationStructureSize; // Will be used to allocate memory.
buildAs[idx].as = m_alloc->createAcceleration(createInfo);
NAME_IDX_VK(buildAs[idx].as.accel, idx);
NAME_IDX_VK(buildAs[idx].as.buffer.buffer, idx);
// BuildInfo #2 part
buildAs[idx].buildInfo.dstAccelerationStructure = buildAs[idx].as.accel; // Setting where the build lands
buildAs[idx].buildInfo.scratchData.deviceAddress = scratchAddress; // All build are using the same scratch buffer
// Building the bottom-level-acceleration-structure
vkCmdBuildAccelerationStructuresKHR(cmdBuf, 1, &buildAs[idx].buildInfo, &buildAs[idx].rangeInfo);
// Since the scratch buffer is reused across builds, we need a barrier to ensure one build
// is finished before starting the next one.
VkMemoryBarrier barrier{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
barrier.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
barrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, 0, 1, &barrier, 0, nullptr, 0, nullptr);
if(queryPool)
{
// Add a query to find the 'real' amount of memory needed, use for compaction
vkCmdWriteAccelerationStructuresPropertiesKHR(cmdBuf, 1, &buildAs[idx].buildInfo.dstAccelerationStructure,
VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, queryPool, queryCnt++);
}
}
}
//--------------------------------------------------------------------------------------------------
// Create and replace a new acceleration structure and buffer based on the size retrieved by the
// Query.
void nvvk::RaytracingBuilderKHR::cmdCompactBlas(VkCommandBuffer cmdBuf,
std::vector<uint32_t> indices,
std::vector<BuildAccelerationStructure>& buildAs,
VkQueryPool queryPool)
{
uint32_t queryCtn{0};
// Get the compacted size result back
std::vector<VkDeviceSize> compactSizes(static_cast<uint32_t>(indices.size()));
vkGetQueryPoolResults(m_device, queryPool, 0, (uint32_t)compactSizes.size(), compactSizes.size() * sizeof(VkDeviceSize),
compactSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_WAIT_BIT);
for(auto idx : indices)
{
buildAs[idx].cleanupAS = buildAs[idx].as; // previous AS to destroy
buildAs[idx].sizeInfo.accelerationStructureSize = compactSizes[queryCtn++]; // new reduced size
// Creating a compact version of the AS
VkAccelerationStructureCreateInfoKHR asCreateInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR};
asCreateInfo.size = buildAs[idx].sizeInfo.accelerationStructureSize;
asCreateInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
buildAs[idx].as = m_alloc->createAcceleration(asCreateInfo);
NAME_IDX_VK(buildAs[idx].as.accel, idx);
NAME_IDX_VK(buildAs[idx].as.buffer.buffer, idx);
// Copy the original BLAS to a compact version
VkCopyAccelerationStructureInfoKHR copyInfo{VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR};
copyInfo.src = buildAs[idx].buildInfo.dstAccelerationStructure;
copyInfo.dst = buildAs[idx].as.accel;
copyInfo.mode = VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR;
vkCmdCopyAccelerationStructureKHR(cmdBuf, &copyInfo);
}
}
//--------------------------------------------------------------------------------------------------
// Destroy all the non-compacted acceleration structures
//
void nvvk::RaytracingBuilderKHR::destroyNonCompacted(std::vector<uint32_t> indices, std::vector<BuildAccelerationStructure>& buildAs)
{
for(auto& i : indices)
{
m_alloc->destroy(buildAs[i].cleanupAS);
}
}
void nvvk::RaytracingBuilderKHR::buildTlas(const std::vector<VkAccelerationStructureInstanceKHR>& instances,
VkBuildAccelerationStructureFlagsKHR flags /*= VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR*/,
bool update /*= false*/)
{
buildTlas(instances, flags, update, false);
}
#ifdef VK_NV_ray_tracing_motion_blur
void nvvk::RaytracingBuilderKHR::buildTlas(const std::vector<VkAccelerationStructureMotionInstanceNV>& instances,
VkBuildAccelerationStructureFlagsKHR flags /*= VK_BUILD_ACCELERATION_STRUCTURE_MOTION_BIT_NV*/,
bool update /*= false*/)
{
buildTlas(instances, flags, update, true);
}
#endif
//--------------------------------------------------------------------------------------------------
// Low level of Tlas creation - see buildTlas
//
void nvvk::RaytracingBuilderKHR::cmdCreateTlas(VkCommandBuffer cmdBuf,
uint32_t countInstance,
VkDeviceAddress instBufferAddr,
nvvk::Buffer& scratchBuffer,
VkBuildAccelerationStructureFlagsKHR flags,
bool update,
bool motion)
{
// Wraps a device pointer to the above uploaded instances.
VkAccelerationStructureGeometryInstancesDataKHR instancesVk{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR};
instancesVk.data.deviceAddress = instBufferAddr;
// Put the above into a VkAccelerationStructureGeometryKHR. We need to put the instances struct in a union and label it as instance data.
VkAccelerationStructureGeometryKHR topASGeometry{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR};
topASGeometry.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR;
topASGeometry.geometry.instances = instancesVk;
// Find sizes
VkAccelerationStructureBuildGeometryInfoKHR buildInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR};
buildInfo.flags = flags;
buildInfo.geometryCount = 1;
buildInfo.pGeometries = &topASGeometry;
buildInfo.mode = update ? VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR : VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
buildInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
buildInfo.srcAccelerationStructure = VK_NULL_HANDLE;
VkAccelerationStructureBuildSizesInfoKHR sizeInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR};
vkGetAccelerationStructureBuildSizesKHR(m_device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &buildInfo,
&countInstance, &sizeInfo);
#ifdef VK_NV_ray_tracing_motion_blur
VkAccelerationStructureMotionInfoNV motionInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MOTION_INFO_NV};
motionInfo.maxInstances = countInstance;
#endif
// Create TLAS
if(update == false)
{
VkAccelerationStructureCreateInfoKHR createInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR};
createInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
createInfo.size = sizeInfo.accelerationStructureSize;
#ifdef VK_NV_ray_tracing_motion_blur
if(motion)
{
createInfo.createFlags = VK_ACCELERATION_STRUCTURE_CREATE_MOTION_BIT_NV;
createInfo.pNext = &motionInfo;
}
#endif
m_tlas = m_alloc->createAcceleration(createInfo);
NAME_VK(m_tlas.accel);
NAME_VK(m_tlas.buffer.buffer);
}
// Allocate the scratch memory
scratchBuffer = m_alloc->createBuffer(sizeInfo.buildScratchSize,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
VkBufferDeviceAddressInfo bufferInfo{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, nullptr, scratchBuffer.buffer};
VkDeviceAddress scratchAddress = vkGetBufferDeviceAddress(m_device, &bufferInfo);
NAME_VK(scratchBuffer.buffer);
// Update build information
buildInfo.srcAccelerationStructure = update ? m_tlas.accel : VK_NULL_HANDLE;
buildInfo.dstAccelerationStructure = m_tlas.accel;
buildInfo.scratchData.deviceAddress = scratchAddress;
// Build Offsets info: n instances
VkAccelerationStructureBuildRangeInfoKHR buildOffsetInfo{countInstance, 0, 0, 0};
const VkAccelerationStructureBuildRangeInfoKHR* pBuildOffsetInfo = &buildOffsetInfo;
// Build the TLAS
vkCmdBuildAccelerationStructuresKHR(cmdBuf, 1, &buildInfo, &pBuildOffsetInfo);
}
//--------------------------------------------------------------------------------------------------
// Refit BLAS number blasIdx from updated buffer contents.
//
void nvvk::RaytracingBuilderKHR::updateBlas(uint32_t blasIdx, BlasInput& blas, VkBuildAccelerationStructureFlagsKHR flags)
{
assert(size_t(blasIdx) < m_blas.size());
// Preparing all build information, acceleration is filled later
VkAccelerationStructureBuildGeometryInfoKHR buildInfos{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR};
buildInfos.flags = flags;
buildInfos.geometryCount = (uint32_t)blas.asGeometry.size();
buildInfos.pGeometries = blas.asGeometry.data();
buildInfos.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR; // UPDATE
buildInfos.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
buildInfos.srcAccelerationStructure = m_blas[blasIdx].accel; // UPDATE
buildInfos.dstAccelerationStructure = m_blas[blasIdx].accel;
// Find size to build on the device
std::vector<uint32_t> maxPrimCount(blas.asBuildOffsetInfo.size());
for(auto tt = 0; tt < blas.asBuildOffsetInfo.size(); tt++)
maxPrimCount[tt] = blas.asBuildOffsetInfo[tt].primitiveCount; // Number of primitives/triangles
VkAccelerationStructureBuildSizesInfoKHR sizeInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR};
vkGetAccelerationStructureBuildSizesKHR(m_device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &buildInfos,
maxPrimCount.data(), &sizeInfo);
// Allocate the scratch buffer and setting the scratch info
nvvk::Buffer scratchBuffer =
m_alloc->createBuffer(sizeInfo.buildScratchSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
VkBufferDeviceAddressInfo bufferInfo{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO};
bufferInfo.buffer = scratchBuffer.buffer;
buildInfos.scratchData.deviceAddress = vkGetBufferDeviceAddress(m_device, &bufferInfo);
NAME_VK(scratchBuffer.buffer);
std::vector<const VkAccelerationStructureBuildRangeInfoKHR*> pBuildOffset(blas.asBuildOffsetInfo.size());
for(size_t i = 0; i < blas.asBuildOffsetInfo.size(); i++)
pBuildOffset[i] = &blas.asBuildOffsetInfo[i];
// Update the instance buffer on the device side and build the TLAS
nvvk::CommandPool genCmdBuf(m_device, m_queueIndex);
VkCommandBuffer cmdBuf = genCmdBuf.createCommandBuffer();
// Update the acceleration structure. Note the VK_TRUE parameter to trigger the update,
// and the existing BLAS being passed and updated in place
vkCmdBuildAccelerationStructuresKHR(cmdBuf, 1, &buildInfos, pBuildOffset.data());
genCmdBuf.submitAndWait(cmdBuf);
m_alloc->destroy(scratchBuffer);
}

View file

@ -0,0 +1,230 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
/** @DOC_START
# class nvvk::RaytracingBuilderKHR
> nvvk::RaytracingBuilderKHR is a base functionality of raytracing
This class acts as an owning container for a single top-level acceleration
structure referencing any number of bottom-level acceleration structures.
We provide functions for building (on the device) an array of BLASs and a
single TLAS from vectors of BlasInput and Instance, respectively, and
a destroy function for cleaning up the created acceleration structures.
Generally, we reference BLASs by their index in the stored BLAS array,
rather than using raw device pointers as the pure Vulkan acceleration
structure API uses.
This class does not support replacing acceleration structures once
built, but you can update the acceleration structures. For educational
purposes, this class prioritizes (relative) understandability over
performance, so vkQueueWaitIdle is implicitly used everywhere.
# Setup and Usage
```cpp
// Borrow a VkDevice and memory allocator pointer (must remain
// valid throughout our use of the ray trace builder), and
// instantiate an unspecified queue of the given family for use.
m_rtBuilder.setup(device, memoryAllocator, queueIndex);
// You create a vector of RayTracingBuilderKHR::BlasInput then
// pass it to buildBlas.
std::vector<RayTracingBuilderKHR::BlasInput> inputs = // ...
m_rtBuilder.buildBlas(inputs);
// You create a vector of RaytracingBuilder::Instance and pass to
// buildTlas. The blasId member of each instance must be below
// inputs.size() (above).
std::vector<VkAccelerationStructureInstanceKHR> instances = // ...
m_rtBuilder.buildTlas(instances);
// Retrieve the handle to the acceleration structure.
const VkAccelerationStructureKHR tlas = m.rtBuilder.getAccelerationStructure()
```
@DOC_END */
#include <mutex>
#include <vulkan/vulkan_core.h>
#if VK_KHR_acceleration_structure
#include "resourceallocator_vk.hpp"
#include "commands_vk.hpp" // this is only needed here to satisfy some samples that rely on it
#include "debug_util_vk.hpp"
#include "nvh/nvprint.hpp" // this is only needed here to satisfy some samples that rely on it
#include <glm/glm.hpp>
#include <type_traits>
namespace nvvk {
// Convert a Mat4x4 to the matrix required by acceleration structures
inline VkTransformMatrixKHR toTransformMatrixKHR(glm::mat4 matrix)
{
// VkTransformMatrixKHR uses a row-major memory layout, while glm::mat4
// uses a column-major memory layout. We transpose the matrix so we can
// memcpy the matrix's data directly.
glm::mat4 temp = glm::transpose(matrix);
VkTransformMatrixKHR out_matrix;
memcpy(&out_matrix, &temp, sizeof(VkTransformMatrixKHR));
return out_matrix;
}
// Ray tracing BLAS and TLAS builder
class RaytracingBuilderKHR
{
public:
// Inputs used to build Bottom-level acceleration structure.
// You manage the lifetime of the buffer(s) referenced by the VkAccelerationStructureGeometryKHRs within.
// In particular, you must make sure they are still valid and not being modified when the BLAS is built or updated.
struct BlasInput
{
// Data used to build acceleration structure geometry
std::vector<VkAccelerationStructureGeometryKHR> asGeometry;
std::vector<VkAccelerationStructureBuildRangeInfoKHR> asBuildOffsetInfo;
VkBuildAccelerationStructureFlagsKHR flags{0};
};
// Initializing the allocator and querying the raytracing properties
void setup(const VkDevice& device, nvvk::ResourceAllocator* allocator, uint32_t queueIndex);
// Destroying all allocations
void destroy();
// Returning the constructed top-level acceleration structure
VkAccelerationStructureKHR getAccelerationStructure() const;
// Return the Acceleration Structure Device Address of a BLAS Id
VkDeviceAddress getBlasDeviceAddress(uint32_t blasId);
// Create all the BLAS from the vector of BlasInput
void buildBlas(const std::vector<BlasInput>& input,
VkBuildAccelerationStructureFlagsKHR flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR);
// Refit BLAS number blasIdx from updated buffer contents.
void updateBlas(uint32_t blasIdx, BlasInput& blas, VkBuildAccelerationStructureFlagsKHR flags);
// Build TLAS for static acceleration structures
void buildTlas(const std::vector<VkAccelerationStructureInstanceKHR>& instances,
VkBuildAccelerationStructureFlagsKHR flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR,
bool update = false);
#ifdef VK_NV_ray_tracing_motion_blur
// Build TLAS for mix of motion and static acceleration structures
void buildTlas(const std::vector<VkAccelerationStructureMotionInstanceNV>& instances,
VkBuildAccelerationStructureFlagsKHR flags = VK_BUILD_ACCELERATION_STRUCTURE_MOTION_BIT_NV,
bool update = false);
#endif
// Build TLAS from an array of VkAccelerationStructureInstanceKHR
// - Use motion=true with VkAccelerationStructureMotionInstanceNV
// - The resulting TLAS will be stored in m_tlas
// - update is to rebuild the Tlas with updated matrices, flag must have the 'allow_update'
template <class T>
void buildTlas(const std::vector<T>& instances,
VkBuildAccelerationStructureFlagsKHR flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR,
bool update = false,
bool motion = false)
{
// Cannot call buildTlas twice except to update.
assert(m_tlas.accel == VK_NULL_HANDLE || update);
uint32_t countInstance = static_cast<uint32_t>(instances.size());
// Command buffer to create the TLAS
nvvk::CommandPool genCmdBuf(m_device, m_queueIndex);
VkCommandBuffer cmdBuf = genCmdBuf.createCommandBuffer();
// Create a buffer holding the actual instance data (matrices++) for use by the AS builder
nvvk::Buffer instancesBuffer; // Buffer of instances containing the matrices and BLAS ids
instancesBuffer = m_alloc->createBuffer(cmdBuf, instances,
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
| VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR);
NAME_VK(instancesBuffer.buffer);
VkBufferDeviceAddressInfo bufferInfo{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, nullptr, instancesBuffer.buffer};
VkDeviceAddress instBufferAddr = vkGetBufferDeviceAddress(m_device, &bufferInfo);
// Make sure the copy of the instance buffer are copied before triggering the acceleration structure build
VkMemoryBarrier barrier{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
0, 1, &barrier, 0, nullptr, 0, nullptr);
// Creating the TLAS
nvvk::Buffer scratchBuffer;
cmdCreateTlas(cmdBuf, countInstance, instBufferAddr, scratchBuffer, flags, update, motion);
// Finalizing and destroying temporary data
genCmdBuf.submitAndWait(cmdBuf); // queueWaitIdle inside.
m_alloc->finalizeAndReleaseStaging();
m_alloc->destroy(scratchBuffer);
m_alloc->destroy(instancesBuffer);
}
// Creating the TLAS, called by buildTlas
void cmdCreateTlas(VkCommandBuffer cmdBuf, // Command buffer
uint32_t countInstance, // number of instances
VkDeviceAddress instBufferAddr, // Buffer address of instances
nvvk::Buffer& scratchBuffer, // Scratch buffer for construction
VkBuildAccelerationStructureFlagsKHR flags, // Build creation flag
bool update, // Update == animation
bool motion // Motion Blur
);
protected:
std::vector<nvvk::AccelKHR> m_blas; // Bottom-level acceleration structure
nvvk::AccelKHR m_tlas; // Top-level acceleration structure
// Setup
VkDevice m_device{VK_NULL_HANDLE};
uint32_t m_queueIndex{0};
nvvk::ResourceAllocator* m_alloc{nullptr};
nvvk::DebugUtil m_debug;
nvvk::CommandPool m_cmdPool;
struct BuildAccelerationStructure
{
VkAccelerationStructureBuildGeometryInfoKHR buildInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR};
VkAccelerationStructureBuildSizesInfoKHR sizeInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR};
const VkAccelerationStructureBuildRangeInfoKHR* rangeInfo;
nvvk::AccelKHR as; // result acceleration structure
nvvk::AccelKHR cleanupAS;
};
void cmdCreateBlas(VkCommandBuffer cmdBuf,
std::vector<uint32_t> indices,
std::vector<BuildAccelerationStructure>& buildAs,
VkDeviceAddress scratchAddress,
VkQueryPool queryPool);
void cmdCompactBlas(VkCommandBuffer cmdBuf, std::vector<uint32_t> indices, std::vector<BuildAccelerationStructure>& buildAs, VkQueryPool queryPool);
void destroyNonCompacted(std::vector<uint32_t> indices, std::vector<BuildAccelerationStructure>& buildAs);
bool hasFlag(VkFlags item, VkFlags flag) { return (item & flag) == flag; }
};
} // namespace nvvk
#else
#error This include requires VK_KHR_acceleration_structure support in the Vulkan SDK.
#endif

View file

@ -0,0 +1,347 @@
#include "raytraceNV_vk.hpp"
#include <cinttypes>
void nvvk::RaytracingBuilderNV::setup(VkDevice device, nvvk::ResourceAllocator* allocator, uint32_t queueIndex)
{
m_device = device;
m_queueIndex = queueIndex;
m_debug.setup(device);
m_alloc = allocator;
}
void nvvk::RaytracingBuilderNV::destroy()
{
for(auto& b : m_blas)
{
m_alloc->destroy(b.as);
}
m_alloc->destroy(m_tlas.as);
m_alloc->destroy(m_instBuffer);
}
VkAccelerationStructureNV nvvk::RaytracingBuilderNV::getAccelerationStructure() const
{
return m_tlas.as.accel;
}
void nvvk::RaytracingBuilderNV::buildBlas(const std::vector<std::vector<VkGeometryNV>>& geoms, VkBuildAccelerationStructureFlagsNV flags)
{
m_blas.resize(geoms.size());
VkDeviceSize maxScratch{0};
// Is compaction requested?
bool doCompaction = (flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_NV)
== VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_NV;
std::vector<VkDeviceSize> originalSizes;
originalSizes.resize(m_blas.size());
// Iterate over the groups of geometries, creating one BLAS for each group
for(size_t i = 0; i < geoms.size(); i++)
{
Blas& blas{m_blas[i]};
// Set the geometries that will be part of the BLAS
blas.asInfo.geometryCount = static_cast<uint32_t>(geoms[i].size());
blas.asInfo.pGeometries = geoms[i].data();
blas.asInfo.flags = flags;
VkAccelerationStructureCreateInfoNV createinfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NV};
createinfo.info = blas.asInfo;
// Create an acceleration structure identifier and allocate memory to store the
// resulting structure data
blas.as = m_alloc->createAcceleration(createinfo);
m_debug.setObjectName(blas.as.accel, (std::string("Blas" + std::to_string(i)).c_str()));
// Estimate the amount of scratch memory required to build the BLAS, and update the
// size of the scratch buffer that will be allocated to sequentially build all BLASes
VkAccelerationStructureMemoryRequirementsInfoNV memoryRequirementsInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV};
memoryRequirementsInfo.type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BUILD_SCRATCH_NV;
memoryRequirementsInfo.accelerationStructure = blas.as.accel;
VkMemoryRequirements2 reqMem{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
vkGetAccelerationStructureMemoryRequirementsNV(m_device, &memoryRequirementsInfo, &reqMem);
VkDeviceSize scratchSize = reqMem.memoryRequirements.size;
// Original size
memoryRequirementsInfo.type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV;
vkGetAccelerationStructureMemoryRequirementsNV(m_device, &memoryRequirementsInfo, &reqMem);
originalSizes[i] = reqMem.memoryRequirements.size;
maxScratch = std::max(maxScratch, scratchSize);
}
// Allocate the scratch buffers holding the temporary data of the acceleration structure builder
nvvk::Buffer scratchBuffer = m_alloc->createBuffer(maxScratch, VK_BUFFER_USAGE_RAY_TRACING_BIT_NV);
// Query size of compact BLAS
VkQueryPoolCreateInfo qpci{VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO};
qpci.queryCount = (uint32_t)m_blas.size();
qpci.queryType = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_NV;
VkQueryPool queryPool;
vkCreateQueryPool(m_device, &qpci, nullptr, &queryPool);
// Create a command buffer containing all the BLAS builds
nvvk::CommandPool genCmdBuf(m_device, m_queueIndex);
int ctr{0};
std::vector<VkCommandBuffer> allCmdBufs;
allCmdBufs.reserve(m_blas.size());
for(auto& blas : m_blas)
{
VkCommandBuffer cmdBuf = genCmdBuf.createCommandBuffer();
allCmdBufs.push_back(cmdBuf);
vkCmdBuildAccelerationStructureNV(cmdBuf, &blas.asInfo, nullptr, 0, VK_FALSE, blas.as.accel, nullptr, scratchBuffer.buffer, 0);
// Since the scratch buffer is reused across builds, we need a barrier to ensure one build
// is finished before starting the next one
VkMemoryBarrier barrier{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
barrier.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_NV;
barrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NV;
vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV,
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV, 0, 1, &barrier, 0, nullptr, 0, nullptr);
// Query the compact size
if(doCompaction)
{
vkCmdWriteAccelerationStructuresPropertiesNV(cmdBuf, 1, &blas.as.accel,
VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_NV, queryPool, ctr++);
}
}
genCmdBuf.submitAndWait(allCmdBufs);
allCmdBufs.clear();
// Compacting all BLAS
if(doCompaction)
{
VkCommandBuffer cmdBuf = genCmdBuf.createCommandBuffer();
// Get the size result back
std::vector<VkDeviceSize> compactSizes(m_blas.size());
vkGetQueryPoolResults(m_device, queryPool, 0, (uint32_t)compactSizes.size(), compactSizes.size() * sizeof(VkDeviceSize),
compactSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_WAIT_BIT);
// Compacting
std::vector<nvvk::AccelNV> cleanupAS(m_blas.size());
uint32_t totOriginalSize{0}, totCompactSize{0};
for(int i = 0; i < m_blas.size(); i++)
{
LOGI("Reducing %i, from %" PRIu64 " to %" PRIu64 " \n", i, originalSizes[i], compactSizes[i]);
totOriginalSize += (uint32_t)originalSizes[i];
totCompactSize += (uint32_t)compactSizes[i];
// Creating a compact version of the AS
VkAccelerationStructureInfoNV asInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV};
asInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV;
asInfo.flags = flags;
VkAccelerationStructureCreateInfoNV asCreateInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NV};
asCreateInfo.compactedSize = compactSizes[i];
asCreateInfo.info = asInfo;
auto as = m_alloc->createAcceleration(asCreateInfo);
// Copy the original BLAS to a compact version
vkCmdCopyAccelerationStructureNV(cmdBuf, as.accel, m_blas[i].as.accel, VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_NV);
cleanupAS[i] = m_blas[i].as;
m_blas[i].as = as;
}
genCmdBuf.submitAndWait(cmdBuf);
// Destroying the previous version
for(auto as : cleanupAS)
m_alloc->destroy(as);
LOGI("------------------\n");
const float fractionSmaller = (totOriginalSize == 0) ? 0 : (totOriginalSize - totCompactSize) / float(totOriginalSize);
LOGI("Total: %d -> %d = %d (%2.2f%s smaller) \n", totOriginalSize, totCompactSize, totOriginalSize - totCompactSize,
fractionSmaller * 100.f, "%%");
}
vkDestroyQueryPool(m_device, queryPool, nullptr);
m_alloc->destroy(scratchBuffer);
m_alloc->finalizeAndReleaseStaging();
}
VkGeometryInstanceNV nvvk::RaytracingBuilderNV::instanceToVkGeometryInstanceNV(const nvvk::RaytracingBuilderNV::Instance& instance)
{
Blas& blas{m_blas[instance.blasId]};
// For each BLAS, fetch the acceleration structure handle that will allow the builder to
// directly access it from the device
uint64_t asHandle = 0;
vkGetAccelerationStructureHandleNV(m_device, blas.as.accel, sizeof(uint64_t), &asHandle);
VkGeometryInstanceNV gInst{};
// The matrices for the instance transforms are row-major, instead of column-major in the
// rest of the application
glm::mat4 transp = glm::transpose(instance.transform);
// The gInst.transform value only contains 12 values, corresponding to a 4x3 matrix, hence
// saving the last row that is anyway always (0,0,0,1). Since the matrix is row-major,
// we simply copy the first 12 values of the original 4x4 matrix
memcpy(gInst.transform, &transp, sizeof(gInst.transform));
gInst.instanceId = instance.instanceId;
gInst.mask = instance.mask;
gInst.hitGroupId = instance.hitGroupId;
gInst.flags = static_cast<uint32_t>(instance.flags);
gInst.accelerationStructureHandle = asHandle;
return gInst;
}
void nvvk::RaytracingBuilderNV::buildTlas(const std::vector<nvvk::RaytracingBuilderNV::Instance>& instances,
VkBuildAccelerationStructureFlagsNV flags)
{
// Set the instance count required to determine how much memory the TLAS will use
m_tlas.asInfo.instanceCount = static_cast<uint32_t>(instances.size());
m_tlas.asInfo.flags = flags;
VkAccelerationStructureCreateInfoNV accelerationStructureInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NV};
accelerationStructureInfo.info = m_tlas.asInfo;
// Create the acceleration structure object and allocate the memory required to hold the TLAS data
m_tlas.as = m_alloc->createAcceleration(accelerationStructureInfo);
m_debug.setObjectName(m_tlas.as.accel, "Tlas");
// Compute the amount of scratch memory required by the acceleration structure builder
VkAccelerationStructureMemoryRequirementsInfoNV memoryRequirementsInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV};
memoryRequirementsInfo.type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BUILD_SCRATCH_NV;
memoryRequirementsInfo.accelerationStructure = m_tlas.as.accel;
VkMemoryRequirements2 reqMem{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
vkGetAccelerationStructureMemoryRequirementsNV(m_device, &memoryRequirementsInfo, &reqMem);
VkDeviceSize scratchSize = reqMem.memoryRequirements.size;
// Allocate the scratch memory
nvvk::Buffer scratchBuffer = m_alloc->createBuffer(scratchSize, VK_BUFFER_USAGE_RAY_TRACING_BIT_NV);
// For each instance, build the corresponding instance descriptor
std::vector<VkGeometryInstanceNV> geometryInstances;
geometryInstances.reserve(instances.size());
for(const auto& inst : instances)
{
geometryInstances.push_back(instanceToVkGeometryInstanceNV(inst));
}
// Building the TLAS
nvvk::CommandPool genCmdBuf(m_device, m_queueIndex);
VkCommandBuffer cmdBuf = genCmdBuf.createCommandBuffer();
// Allocate the instance buffer and copy its contents from host to device memory
m_instBuffer = m_alloc->createBuffer(cmdBuf, geometryInstances, VK_BUFFER_USAGE_RAY_TRACING_BIT_NV);
m_debug.setObjectName(m_instBuffer.buffer, "TLASInstances");
// Make sure the copy of the instance buffer are copied before triggering the
// acceleration structure build
VkMemoryBarrier barrier{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV, 0,
1, &barrier, 0, nullptr, 0, nullptr);
// Build the TLAS
vkCmdBuildAccelerationStructureNV(cmdBuf, &m_tlas.asInfo, m_instBuffer.buffer, 0, VK_FALSE, m_tlas.as.accel, nullptr,
scratchBuffer.buffer, 0);
genCmdBuf.submitAndWait(cmdBuf);
m_alloc->finalizeAndReleaseStaging();
m_alloc->destroy(scratchBuffer);
}
void nvvk::RaytracingBuilderNV::updateTlasMatrices(const std::vector<nvvk::RaytracingBuilderNV::Instance>& instances)
{
VkDeviceSize bufferSize = instances.size() * sizeof(VkGeometryInstanceNV);
// Create a staging buffer on the host to upload the new instance data
nvvk::Buffer stagingBuffer = m_alloc->createBuffer(bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
#if defined(NVVK_ALLOC_VMA)
VmaMemoryUsage::VMA_MEMORY_USAGE_CPU_TO_GPU
#else
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
#endif
);
// Copy the instance data into the staging buffer
auto* gInst = reinterpret_cast<VkGeometryInstanceNV*>(m_alloc->map(stagingBuffer));
for(int i = 0; i < instances.size(); i++)
{
gInst[i] = instanceToVkGeometryInstanceNV(instances[i]);
}
m_alloc->unmap(stagingBuffer);
// Compute the amount of scratch memory required by the AS builder to update the TLAS
VkAccelerationStructureMemoryRequirementsInfoNV memoryRequirementsInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV};
memoryRequirementsInfo.type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_UPDATE_SCRATCH_NV;
memoryRequirementsInfo.accelerationStructure = m_tlas.as.accel;
VkMemoryRequirements2 reqMem{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
vkGetAccelerationStructureMemoryRequirementsNV(m_device, &memoryRequirementsInfo, &reqMem);
VkDeviceSize scratchSize = reqMem.memoryRequirements.size;
// Allocate the scratch buffer
nvvk::Buffer scratchBuffer = m_alloc->createBuffer(scratchSize, VK_BUFFER_USAGE_RAY_TRACING_BIT_NV);
// Update the instance buffer on the device side and build the TLAS
nvvk::CommandPool genCmdBuf(m_device, m_queueIndex);
VkCommandBuffer cmdBuf = genCmdBuf.createCommandBuffer();
VkBufferCopy region{0, 0, bufferSize};
vkCmdCopyBuffer(cmdBuf, stagingBuffer.buffer, m_instBuffer.buffer, 1, &region);
// Make sure the copy of the instance buffer are copied before triggering the
// acceleration structure build
VkMemoryBarrier barrier{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV, 0,
1, &barrier, 0, nullptr, 0, nullptr);
// Update the acceleration structure. Note the VK_TRUE parameter to trigger the update,
// and the existing TLAS being passed and updated in place
vkCmdBuildAccelerationStructureNV(cmdBuf, &m_tlas.asInfo, m_instBuffer.buffer, 0, VK_TRUE, m_tlas.as.accel,
m_tlas.as.accel, scratchBuffer.buffer, 0);
genCmdBuf.submitAndWait(cmdBuf);
m_alloc->destroy(scratchBuffer);
m_alloc->destroy(stagingBuffer);
}
void nvvk::RaytracingBuilderNV::updateBlas(uint32_t blasIdx)
{
Blas& blas = m_blas[blasIdx];
// Compute the amount of scratch memory required by the AS builder to update the TLAS
VkAccelerationStructureMemoryRequirementsInfoNV memoryRequirementsInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV};
memoryRequirementsInfo.type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_UPDATE_SCRATCH_NV;
memoryRequirementsInfo.accelerationStructure = blas.as.accel;
VkMemoryRequirements2 reqMem{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
vkGetAccelerationStructureMemoryRequirementsNV(m_device, &memoryRequirementsInfo, &reqMem);
VkDeviceSize scratchSize = reqMem.memoryRequirements.size;
// Allocate the scratch buffer
nvvk::Buffer scratchBuffer = m_alloc->createBuffer(scratchSize, VK_BUFFER_USAGE_RAY_TRACING_BIT_NV);
// Update the instance buffer on the device side and build the TLAS
nvvk::CommandPool genCmdBuf(m_device, m_queueIndex);
VkCommandBuffer cmdBuf = genCmdBuf.createCommandBuffer();
// Update the acceleration structure. Note the VK_TRUE parameter to trigger the update,
// and the existing BLAS being passed and updated in place
vkCmdBuildAccelerationStructureNV(cmdBuf, &blas.asInfo, nullptr, 0, VK_TRUE, blas.as.accel, blas.as.accel,
scratchBuffer.buffer, 0);
genCmdBuf.submitAndWait(cmdBuf);
m_alloc->destroy(scratchBuffer);
}

View file

@ -0,0 +1,176 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
/* @DOC_START
# class nvvk::RaytracingBuilderNV
> nvvk::RaytracingBuilderNV is a base functionality of raytracing
This class does not implement all what you need to do raytracing, but
helps creating the BLAS and TLAS, which then can be used by different
raytracing usage.
# Setup and Usage
```cpp
m_rtBuilder.setup(device, memoryAllocator, queueIndex);
// Create array of VkGeometryNV
m_rtBuilder.buildBlas(allBlas);
// Create array of RaytracingBuilder::instance
m_rtBuilder.buildTlas(instances);
// Retrieve the acceleration structure
const VkAccelerationStructureNV& tlas = m.rtBuilder.getAccelerationStructure()
```
@DOC_END */
#include <mutex>
#include <vulkan/vulkan_core.h>
#if VK_NV_ray_tracing
#include "resourceallocator_vk.hpp"
#include "commands_vk.hpp" // this is only needed here to satisfy some samples that rely on it
#include "debug_util_vk.hpp"
#include "nvh/nvprint.hpp" // this is only needed here to satisfy some samples that rely on it
#include <glm/glm.hpp>
// See https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/chap33.html#acceleration-structure
struct VkGeometryInstanceNV
{
/// Transform matrix, containing only the top 3 rows
float transform[12];
/// Instance index
uint32_t instanceId : 24;
/// Visibility mask
uint32_t mask : 8;
/// Index of the hit group which will be invoked when a ray hits the instance
uint32_t hitGroupId : 24;
/// Instance flags, such as culling
uint32_t flags : 8;
/// Opaque handle of the bottom-level acceleration structure
uint64_t accelerationStructureHandle;
};
namespace nvvk {
class RaytracingBuilderNV
{
public:
RaytracingBuilderNV(RaytracingBuilderNV const&) = delete;
RaytracingBuilderNV& operator=(RaytracingBuilderNV const&) = delete;
RaytracingBuilderNV() = default;
//--------------------------------------------------------------------------------------------------
// Initializing the allocator and querying the raytracing properties
//
void setup(VkDevice device, nvvk::ResourceAllocator* allocator, uint32_t queueIndex);
// This is an instance of a BLAS
struct Instance
{
uint32_t blasId{0}; // Index of the BLAS in m_blas
uint32_t instanceId{0}; // Instance Index (gl_InstanceID)
uint32_t hitGroupId{0}; // Hit group index in the SBT
uint32_t mask{0xFF}; // Visibility mask, will be AND-ed with ray mask
VkGeometryInstanceFlagsNV flags = VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_DISABLE_BIT_NV;
glm::mat4 transform{glm::mat4(1)}; // Identity
};
//--------------------------------------------------------------------------------------------------
// Destroying all allocations
//
void destroy();
// Returning the constructed top-level acceleration structure
VkAccelerationStructureNV getAccelerationStructure() const;
//--------------------------------------------------------------------------------------------------
// Create all the BLAS from the vector of vectors of VkGeometryNV
// - There will be one BLAS per vector of VkGeometryNV
// - There will be as many BLAS there are items in the geoms vector
// - The resulting BLAS are stored in m_blas
//
void buildBlas(const std::vector<std::vector<VkGeometryNV>>& geoms,
VkBuildAccelerationStructureFlagsNV flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_NV);
//--------------------------------------------------------------------------------------------------
// Convert an Instance object into a VkGeometryInstanceNV
VkGeometryInstanceNV instanceToVkGeometryInstanceNV(const Instance& instance);
//--------------------------------------------------------------------------------------------------
// Creating the top-level acceleration structure from the vector of Instance
// - See struct of Instance
// - The resulting TLAS will be stored in m_tlas
//
void buildTlas(const std::vector<Instance>& instances,
VkBuildAccelerationStructureFlagsNV flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_NV);
//--------------------------------------------------------------------------------------------------
// Refit the TLAS using new instance matrices
//
void updateTlasMatrices(const std::vector<Instance>& instances);
//--------------------------------------------------------------------------------------------------
// Refit the BLAS from updated buffers
//
void updateBlas(uint32_t blasIdx);
private:
// Bottom-level acceleration structure
struct Blas
{
nvvk::AccelNV as;
VkAccelerationStructureInfoNV asInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV, nullptr,
VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV};
VkGeometryNV geometry;
};
// Top-level acceleration structure
struct Tlas
{
nvvk::AccelNV as;
VkAccelerationStructureInfoNV asInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV, nullptr,
VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV};
};
//--------------------------------------------------------------------------------------------------
// Vector containing all the BLASes built and referenced by the TLAS
std::vector<Blas> m_blas;
// Top-level acceleration structure
Tlas m_tlas;
// Instance buffer containing the matrices and BLAS ids
nvvk::Buffer m_instBuffer;
VkDevice m_device;
uint32_t m_queueIndex{0};
nvvk::ResourceAllocator* m_alloc = nullptr;
nvvk::DebugUtil m_debug;
};
} // namespace nvvk
#else
#error This include requires VK_NV_ray_tracing support in the Vulkan SDK.
#endif

View file

@ -0,0 +1,158 @@
/*
* Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "renderpasses_vk.hpp"
#include "error_vk.hpp"
#include <assert.h>
namespace nvvk {
VkFormat findSupportedFormat(VkPhysicalDevice physicalDevice, const std::vector<VkFormat>& candidates, VkImageTiling tiling, VkFormatFeatureFlags features)
{
for(VkFormat format : candidates)
{
VkFormatProperties props;
vkGetPhysicalDeviceFormatProperties(physicalDevice, format, &props);
if(tiling == VK_IMAGE_TILING_LINEAR && (props.linearTilingFeatures & features) == features)
{
return format;
}
if(tiling == VK_IMAGE_TILING_OPTIMAL && (props.optimalTilingFeatures & features) == features)
{
return format;
}
}
assert(0 && "failed to find supported format!");
return VK_FORMAT_UNDEFINED;
}
VkFormat findDepthFormat(VkPhysicalDevice physicalDevice)
{
return findSupportedFormat(physicalDevice,
{VK_FORMAT_X8_D24_UNORM_PACK32, VK_FORMAT_D24_UNORM_S8_UINT, VK_FORMAT_D32_SFLOAT,
VK_FORMAT_D32_SFLOAT_S8_UINT, VK_FORMAT_D16_UNORM, VK_FORMAT_D16_UNORM_S8_UINT},
VK_IMAGE_TILING_OPTIMAL, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT);
}
VkFormat findDepthStencilFormat(VkPhysicalDevice physicalDevice)
{
return findSupportedFormat(physicalDevice, {VK_FORMAT_D24_UNORM_S8_UINT, VK_FORMAT_D32_SFLOAT_S8_UINT, VK_FORMAT_D16_UNORM_S8_UINT},
VK_IMAGE_TILING_OPTIMAL, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT);
}
//////////////////////////////////////////////////////////////////////////
VkRenderPass createRenderPass(VkDevice device,
const std::vector<VkFormat>& colorAttachmentFormats,
VkFormat depthAttachmentFormat,
uint32_t subpassCount /*= 1*/,
bool clearColor /*= true*/,
bool clearDepth /*= true*/,
VkImageLayout initialLayout /*= VK_IMAGE_LAYOUT_UNDEFINED*/,
VkImageLayout finalLayout /*= VK_IMAGE_LAYOUT_PRESENT_SRC_KHR*/)
{
std::vector<VkAttachmentDescription> allAttachments;
std::vector<VkAttachmentReference> colorAttachmentRefs;
bool hasDepth = (depthAttachmentFormat != VK_FORMAT_UNDEFINED);
for(const auto& format : colorAttachmentFormats)
{
VkAttachmentDescription colorAttachment = {};
colorAttachment.format = format;
colorAttachment.samples = VK_SAMPLE_COUNT_1_BIT;
colorAttachment.loadOp = clearColor ? VK_ATTACHMENT_LOAD_OP_CLEAR :
((initialLayout == VK_IMAGE_LAYOUT_UNDEFINED) ? VK_ATTACHMENT_LOAD_OP_DONT_CARE :
VK_ATTACHMENT_LOAD_OP_LOAD);
colorAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
colorAttachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
colorAttachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
colorAttachment.initialLayout = initialLayout;
colorAttachment.finalLayout = finalLayout;
VkAttachmentReference colorAttachmentRef = {};
colorAttachmentRef.attachment = static_cast<uint32_t>(allAttachments.size());
colorAttachmentRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
allAttachments.push_back(colorAttachment);
colorAttachmentRefs.push_back(colorAttachmentRef);
}
VkAttachmentReference depthAttachmentRef = {};
if(hasDepth)
{
VkAttachmentDescription depthAttachment = {};
depthAttachment.format = depthAttachmentFormat;
depthAttachment.samples = VK_SAMPLE_COUNT_1_BIT;
depthAttachment.loadOp = clearDepth ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD;
depthAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
depthAttachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
depthAttachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
depthAttachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
depthAttachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
depthAttachmentRef.attachment = static_cast<uint32_t>(allAttachments.size());
depthAttachmentRef.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
allAttachments.push_back(depthAttachment);
}
std::vector<VkSubpassDescription> subpasses;
std::vector<VkSubpassDependency> subpassDependencies;
for(uint32_t i = 0; i < subpassCount; i++)
{
VkSubpassDescription subpass = {};
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.colorAttachmentCount = static_cast<uint32_t>(colorAttachmentRefs.size());
subpass.pColorAttachments = colorAttachmentRefs.data();
subpass.pDepthStencilAttachment = hasDepth ? &depthAttachmentRef : VK_NULL_HANDLE;
VkSubpassDependency dependency = {};
dependency.srcSubpass = i == 0 ? (VK_SUBPASS_EXTERNAL) : (i - 1);
dependency.dstSubpass = i;
dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependency.srcAccessMask = 0;
dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
subpasses.push_back(subpass);
subpassDependencies.push_back(dependency);
}
VkRenderPassCreateInfo renderPassInfo{VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO};
renderPassInfo.attachmentCount = static_cast<uint32_t>(allAttachments.size());
renderPassInfo.pAttachments = allAttachments.data();
renderPassInfo.subpassCount = static_cast<uint32_t>(subpasses.size());
renderPassInfo.pSubpasses = subpasses.data();
renderPassInfo.dependencyCount = static_cast<uint32_t>(subpassDependencies.size());
renderPassInfo.pDependencies = subpassDependencies.data();
VkRenderPass renderPass;
NVVK_CHECK(vkCreateRenderPass(device, &renderPassInfo, nullptr, &renderPass));
return renderPass;
}
} // namespace nvvk

View file

@ -0,0 +1,52 @@
/*
* Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <vector>
#include <vulkan/vulkan_core.h>
namespace nvvk {
/** @DOC_START
# functions in nvvk
- findSupportedFormat : returns supported VkFormat from a list of candidates (returns first match)
- findDepthFormat : returns supported depth format (24, 32, 16-bit)
- findDepthStencilFormat : returns supported depth-stencil format (24/8, 32/8, 16/8-bit)
- createRenderPass : wrapper for vkCreateRenderPass
@DOC_END */
VkFormat findSupportedFormat(VkPhysicalDevice physicalDevice, const std::vector<VkFormat>& candidates, VkImageTiling tiling, VkFormatFeatureFlags features);
VkFormat findDepthFormat(VkPhysicalDevice physicalDevice);
VkFormat findDepthStencilFormat(VkPhysicalDevice physicalDevice);
//////////////////////////////////////////////////////////////////////////
VkRenderPass createRenderPass(VkDevice device,
const std::vector<VkFormat>& colorAttachmentFormats,
VkFormat depthAttachmentFormat,
uint32_t subpassCount = 1,
bool clearColor = true,
bool clearDepth = true,
VkImageLayout initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
VkImageLayout finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR);
} // namespace nvvk

View file

@ -0,0 +1,739 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "resourceallocator_vk.hpp"
#include "memallocator_dma_vk.hpp"
#include "memallocator_dedicated_vk.hpp"
#include "error_vk.hpp"
#include "images_vk.hpp"
namespace nvvk {
ResourceAllocator::ResourceAllocator(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, VkDeviceSize stagingBlockSize)
{
init(device, physicalDevice, memAlloc, stagingBlockSize);
}
ResourceAllocator::~ResourceAllocator()
{
deinit();
}
void ResourceAllocator::init(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, VkDeviceSize stagingBlockSize)
{
m_device = device;
m_physicalDevice = physicalDevice;
m_memAlloc = memAlloc;
vkGetPhysicalDeviceMemoryProperties(physicalDevice, &m_memoryProperties);
m_samplerPool.init(device);
m_staging = std::make_unique<StagingMemoryManager>(memAlloc, stagingBlockSize);
}
void ResourceAllocator::deinit()
{
m_samplerPool.deinit();
m_staging.reset();
}
Buffer ResourceAllocator::createBuffer(const VkBufferCreateInfo& info_, const VkMemoryPropertyFlags memProperties_)
{
Buffer resultBuffer;
// Create Buffer (can be overloaded)
CreateBufferEx(info_, &resultBuffer.buffer);
// Find memory requirements
VkMemoryRequirements2 memReqs{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
VkMemoryDedicatedRequirements dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
VkBufferMemoryRequirementsInfo2 bufferReqs{VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2};
memReqs.pNext = &dedicatedRegs;
bufferReqs.buffer = resultBuffer.buffer;
vkGetBufferMemoryRequirements2(m_device, &bufferReqs, &memReqs);
// Build up allocation info
MemAllocateInfo allocInfo(memReqs.memoryRequirements, memProperties_, false);
if(info_.usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT)
{
allocInfo.setAllocationFlags(VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT);
}
if(dedicatedRegs.requiresDedicatedAllocation)
{
allocInfo.setDedicatedBuffer(resultBuffer.buffer);
}
// Allocate memory
resultBuffer.memHandle = AllocateMemory(allocInfo);
if(resultBuffer.memHandle)
{
const auto memInfo = m_memAlloc->getMemoryInfo(resultBuffer.memHandle);
// Bind memory to buffer
NVVK_CHECK(vkBindBufferMemory(m_device, resultBuffer.buffer, memInfo.memory, memInfo.offset));
}
else
{
destroy(resultBuffer);
}
return resultBuffer;
}
Buffer ResourceAllocator::createBuffer(VkDeviceSize size_, VkBufferUsageFlags usage_, const VkMemoryPropertyFlags memUsage_)
{
VkBufferCreateInfo info{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
info.size = size_;
info.usage = usage_ | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
return createBuffer(info, memUsage_);
}
Buffer ResourceAllocator::createBuffer(const VkCommandBuffer& cmdBuf,
const VkDeviceSize& size_,
const void* data_,
VkBufferUsageFlags usage_,
VkMemoryPropertyFlags memProps)
{
VkBufferCreateInfo createInfoR{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
createInfoR.size = size_;
createInfoR.usage = usage_ | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
Buffer resultBuffer = createBuffer(createInfoR, memProps);
if(data_)
{
m_staging->cmdToBuffer(cmdBuf, resultBuffer.buffer, 0, size_, data_);
}
return resultBuffer;
}
Image ResourceAllocator::createImage(const VkImageCreateInfo& info_, const VkMemoryPropertyFlags memUsage_)
{
Image resultImage;
// Create image
CreateImageEx(info_, &resultImage.image);
// Find memory requirements
VkMemoryRequirements2 memReqs{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
VkMemoryDedicatedRequirements dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
VkImageMemoryRequirementsInfo2 imageReqs{VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2};
imageReqs.image = resultImage.image;
memReqs.pNext = &dedicatedRegs;
vkGetImageMemoryRequirements2(m_device, &imageReqs, &memReqs);
// Build up allocation info
MemAllocateInfo allocInfo(memReqs.memoryRequirements, memUsage_, true);
if(dedicatedRegs.requiresDedicatedAllocation)
{
allocInfo.setDedicatedImage(resultImage.image);
}
// Allocate memory
resultImage.memHandle = AllocateMemory(allocInfo);
if(resultImage.memHandle)
{
const auto memInfo = m_memAlloc->getMemoryInfo(resultImage.memHandle);
// Bind memory to image
NVVK_CHECK(vkBindImageMemory(m_device, resultImage.image, memInfo.memory, memInfo.offset));
}
else
{
destroy(resultImage);
}
return resultImage;
}
Image ResourceAllocator::createImage(const VkCommandBuffer& cmdBuf,
size_t size_,
const void* data_,
const VkImageCreateInfo& info_,
const VkImageLayout& layout_)
{
Image resultImage = createImage(info_, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
// Copy the data to staging buffer than to image
if(data_ != nullptr)
{
// Copy buffer to image
VkImageSubresourceRange subresourceRange{};
subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
subresourceRange.baseArrayLayer = 0;
subresourceRange.baseMipLevel = 0;
subresourceRange.layerCount = 1;
subresourceRange.levelCount = info_.mipLevels;
// doing these transitions per copy is not efficient, should do in bulk for many images
nvvk::cmdBarrierImageLayout(cmdBuf, resultImage.image, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresourceRange);
VkOffset3D offset = {0};
VkImageSubresourceLayers subresource = {0};
subresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
subresource.layerCount = 1;
m_staging->cmdToImage(cmdBuf, resultImage.image, offset, info_.extent, subresource, size_, data_);
// Setting final image layout
nvvk::cmdBarrierImageLayout(cmdBuf, resultImage.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, layout_);
}
else
{
// Setting final image layout
nvvk::cmdBarrierImageLayout(cmdBuf, resultImage.image, VK_IMAGE_LAYOUT_UNDEFINED, layout_);
}
return resultImage;
}
nvvk::Texture ResourceAllocator::createTexture(const Image& image,
const VkImageViewCreateInfo& imageViewCreateInfo,
const VkSamplerCreateInfo& samplerCreateInfo)
{
Texture resultTexture = createTexture(image, imageViewCreateInfo);
resultTexture.descriptor.sampler = m_samplerPool.acquireSampler(samplerCreateInfo);
return resultTexture;
}
Texture ResourceAllocator::createTexture(const Image& image, const VkImageViewCreateInfo& imageViewCreateInfo)
{
Texture resultTexture;
resultTexture.image = image.image;
resultTexture.memHandle = image.memHandle;
resultTexture.descriptor.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
assert(imageViewCreateInfo.image == image.image);
NVVK_CHECK(vkCreateImageView(m_device, &imageViewCreateInfo, nullptr, &resultTexture.descriptor.imageView));
return resultTexture;
}
Texture ResourceAllocator::createTexture(const VkCommandBuffer& cmdBuf,
size_t size_,
const void* data_,
const VkImageCreateInfo& info_,
const VkSamplerCreateInfo& samplerCreateInfo,
const VkImageLayout& layout_,
bool isCube)
{
Image image = createImage(cmdBuf, size_, data_, info_, layout_);
VkImageViewCreateInfo viewInfo{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
viewInfo.pNext = nullptr;
viewInfo.image = image.image;
viewInfo.format = info_.format;
viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
viewInfo.subresourceRange.baseMipLevel = 0;
viewInfo.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
viewInfo.subresourceRange.baseArrayLayer = 0;
viewInfo.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
switch(info_.imageType)
{
case VK_IMAGE_TYPE_1D:
viewInfo.viewType = (info_.arrayLayers > 1 ? VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D);
break;
case VK_IMAGE_TYPE_2D:
viewInfo.viewType = isCube ? VK_IMAGE_VIEW_TYPE_CUBE :
(info_.arrayLayers > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D);
break;
case VK_IMAGE_TYPE_3D:
viewInfo.viewType = VK_IMAGE_VIEW_TYPE_3D;
break;
default:
assert(0);
}
Texture resultTexture = createTexture(image, viewInfo, samplerCreateInfo);
resultTexture.descriptor.imageLayout = layout_;
return resultTexture;
}
SparseImage ResourceAllocator::createSparseImage(VkImageCreateInfo info_, const VkMemoryPropertyFlags memUsage_ /*= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT*/)
{
SparseImage resultImage;
std::array<VkImage, SparseImage::s_sparseImageCount> images;
for(size_t i = 0; i < images.size(); i++)
{
if(NVVK_CHECK(vkCreateImage(m_device, &info_, nullptr, &images[i])))
{
LOGE("Could not create requested image\n");
return {};
}
}
std::vector<VkMemoryRequirements> mipTailMemRequirements =
resultImage.create(m_device, images, info_.mipLevels, info_.arrayLayers, info_.extent);
std::vector<std::pair<VkDeviceMemory, VkDeviceSize>> mipTailMemAndOffsets;
for(const auto& memReq : mipTailMemRequirements)
{
nvvk::MemAllocateInfo allocInfo(m_device, images[0], memUsage_);
allocInfo.setMemoryRequirements(memReq);
nvvk::MemHandle mipTailAllocationID = AllocateMemory(allocInfo);
nvvk::MemAllocator::MemInfo memInfo = m_memAlloc->getMemoryInfo(mipTailAllocationID);
resultImage.mipTailAllocations.push_back(mipTailAllocationID);
mipTailMemAndOffsets.push_back({memInfo.memory, memInfo.offset});
}
resultImage.bindMipTailMemory(mipTailMemAndOffsets);
resultImage.memoryProperties = memUsage_;
return resultImage;
}
void ResourceAllocator::flushSparseImage(SparseImage& sparseImage)
{
sparseImage.sparseImageMemoryBinds.clear();
sparseImage.sparseImageMemoryBinds.reserve(sparseImage.allocatedPages.size());
for(auto it : sparseImage.allocatedPages)
{
auto& page = it.second;
if(!page.hasBoundMemory())
continue;
m_memAlloc->freeMemory(page.allocation);
page.allocation = {};
page.imageMemoryBind.memory = {};
page.imageMemoryBind.memoryOffset = {};
sparseImage.sparseImageMemoryBinds.push_back(page.imageMemoryBind);
}
sparseImage.allocatedPages.clear();
sparseImage.updateSparseBindInfo();
}
// Returns true if the allocation was performed, false if it was already allocated
bool ResourceAllocator::createSparseImagePage(SparseImage& sparseImage, uint32_t pageIndex, uint32_t layer /*= 0u*/)
{
SparseImage::PageId id{layer, pageIndex};
auto it = sparseImage.allocatedPages.find(id);
// If already allocated, nothing to do
if(it != sparseImage.allocatedPages.end())
{
return false;
}
SparseImagePage page = sparseImage.createPageInfo(pageIndex, layer);
VkMemoryRequirements memReqs = sparseImage.memoryReqs;
memReqs.size = page.size;
nvvk::MemAllocateInfo allocInfo(m_device, sparseImage.images[0], sparseImage.memoryProperties);
allocInfo.setMemoryRequirements(memReqs);
nvvk::MemHandle allocationID = AllocateMemory(static_cast<VkMemoryRequirements>(memReqs));
nvvk::MemAllocator::MemInfo memInfo = m_memAlloc->getMemoryInfo(allocationID);
page.allocation = allocationID;
page.bindDeviceMemory(memInfo.memory, memInfo.offset);
sparseImage.allocatedPages[id] = page;
return true;
}
void ResourceAllocator::finalizeStaging(VkFence fence /*= VK_NULL_HANDLE*/)
{
m_staging->finalizeResources(fence);
}
void ResourceAllocator::releaseStaging()
{
m_staging->releaseResources();
}
void ResourceAllocator::finalizeAndReleaseStaging(VkFence fence /*= VK_NULL_HANDLE*/)
{
m_staging->finalizeResources(fence);
m_staging->releaseResources();
}
nvvk::StagingMemoryManager* ResourceAllocator::getStaging()
{
return m_staging.get();
}
const nvvk::StagingMemoryManager* ResourceAllocator::getStaging() const
{
return m_staging.get();
}
void ResourceAllocator::destroy(Buffer& b_)
{
vkDestroyBuffer(m_device, b_.buffer, nullptr);
m_memAlloc->freeMemory(b_.memHandle);
b_ = Buffer();
}
void ResourceAllocator::destroy(Image& i_)
{
vkDestroyImage(m_device, i_.image, nullptr);
m_memAlloc->freeMemory(i_.memHandle);
i_ = Image();
}
void ResourceAllocator::destroy(Texture& t_)
{
vkDestroyImageView(m_device, t_.descriptor.imageView, nullptr);
vkDestroyImage(m_device, t_.image, nullptr);
m_memAlloc->freeMemory(t_.memHandle);
if(t_.descriptor.sampler)
{
m_samplerPool.releaseSampler(t_.descriptor.sampler);
}
t_ = Texture();
}
void ResourceAllocator::destroy(nvvk::SparseImage& i_)
{
flushSparseImage(i_);
for(auto& mipTailAlloc : i_.mipTailAllocations)
{
m_memAlloc->freeMemory(mipTailAlloc);
}
i_.mipTailAllocations.clear();
i_.unbindMipTailMemory();
for(size_t i = 0; i < nvvk::SparseImage::s_sparseImageCount; i++)
vkDestroyImage(m_device, i_.images[i], nullptr);
}
bool ResourceAllocator::destroy(nvvk::SparseImage& i_, uint32_t pageIndex, uint32_t layer)
{
auto it = i_.allocatedPages.find({layer, pageIndex});
if(it == i_.allocatedPages.end())
{
return false;
}
SparseImagePage& page = it->second;
if(!page.hasBoundMemory())
return false;
m_memAlloc->freeMemory(page.allocation);
i_.allocatedPages.erase(it);
return true;
}
void* ResourceAllocator::map(const Buffer& buffer)
{
void* pData = m_memAlloc->map(buffer.memHandle);
return pData;
}
void ResourceAllocator::unmap(const Buffer& buffer)
{
m_memAlloc->unmap(buffer.memHandle);
}
void* ResourceAllocator::map(const Image& buffer)
{
void* pData = m_memAlloc->map(buffer.memHandle);
return pData;
}
void ResourceAllocator::unmap(const Image& image)
{
m_memAlloc->unmap(image.memHandle);
}
MemHandle ResourceAllocator::AllocateMemory(const MemAllocateInfo& allocateInfo)
{
return m_memAlloc->allocMemory(allocateInfo);
}
void ResourceAllocator::CreateBufferEx(const VkBufferCreateInfo& info_, VkBuffer* buffer)
{
NVVK_CHECK(vkCreateBuffer(m_device, &info_, nullptr, buffer));
}
void ResourceAllocator::CreateImageEx(const VkImageCreateInfo& info_, VkImage* image)
{
NVVK_CHECK(vkCreateImage(m_device, &info_, nullptr, image));
}
uint32_t ResourceAllocator::getMemoryType(uint32_t typeBits, const VkMemoryPropertyFlags& properties)
{
for(uint32_t i = 0; i < m_memoryProperties.memoryTypeCount; i++)
{
if(((typeBits & (1 << i)) > 0) && (m_memoryProperties.memoryTypes[i].propertyFlags & properties) == properties)
{
return i;
}
}
assert(0);
return ~0u;
}
AccelNV ResourceAllocator::createAcceleration(VkAccelerationStructureCreateInfoNV& accel_)
{
AccelNV resultAccel;
// Create the acceleration structure
NVVK_CHECK(vkCreateAccelerationStructureNV(m_device, &accel_, nullptr, &resultAccel.accel));
// Find memory requirements
VkAccelerationStructureMemoryRequirementsInfoNV accelMemInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV};
accelMemInfo.accelerationStructure = resultAccel.accel;
VkMemoryRequirements2 memReqs{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
vkGetAccelerationStructureMemoryRequirementsNV(m_device, &accelMemInfo, &memReqs);
// Allocate memory
MemAllocateInfo info(memReqs.memoryRequirements, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, false);
resultAccel.memHandle = AllocateMemory(info);
if(resultAccel.memHandle)
{
const auto memInfo = m_memAlloc->getMemoryInfo(resultAccel.memHandle);
// Bind memory with acceleration structure
VkBindAccelerationStructureMemoryInfoNV bind{VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV};
bind.accelerationStructure = resultAccel.accel;
bind.memory = memInfo.memory;
bind.memoryOffset = memInfo.offset;
NVVK_CHECK(vkBindAccelerationStructureMemoryNV(m_device, 1, &bind));
}
else
{
destroy(resultAccel);
}
return resultAccel;
}
void ResourceAllocator::destroy(AccelNV& a_)
{
vkDestroyAccelerationStructureNV(m_device, a_.accel, nullptr);
m_memAlloc->freeMemory(a_.memHandle);
a_ = AccelNV();
}
AccelKHR ResourceAllocator::createAcceleration(VkAccelerationStructureCreateInfoKHR& accel_)
{
AccelKHR resultAccel;
// Allocating the buffer to hold the acceleration structure
resultAccel.buffer = createBuffer(accel_.size, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR
| VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
// Setting the buffer
accel_.buffer = resultAccel.buffer.buffer;
// Create the acceleration structure
vkCreateAccelerationStructureKHR(m_device, &accel_, nullptr, &resultAccel.accel);
return resultAccel;
}
void ResourceAllocator::destroy(AccelKHR& a_)
{
vkDestroyAccelerationStructureKHR(m_device, a_.accel, nullptr);
destroy(a_.buffer);
a_ = AccelKHR();
}
VkSampler ResourceAllocator::acquireSampler(const VkSamplerCreateInfo& info)
{
return m_samplerPool.acquireSampler(info);
}
void ResourceAllocator::releaseSampler(VkSampler sampler)
{
m_samplerPool.releaseSampler(sampler);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
ExportResourceAllocator::ExportResourceAllocator(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAllocator, VkDeviceSize stagingBlockSize)
: ResourceAllocator(device, physicalDevice, memAllocator, stagingBlockSize)
{
}
void ExportResourceAllocator::CreateBufferEx(const VkBufferCreateInfo& info_, VkBuffer* buffer)
{
VkBufferCreateInfo info = info_;
VkExternalMemoryBufferCreateInfo infoEx{VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO};
#ifdef WIN32
infoEx.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
#else
infoEx.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
#endif
info.pNext = &infoEx;
NVVK_CHECK(vkCreateBuffer(m_device, &info, nullptr, buffer));
}
void ExportResourceAllocator::CreateImageEx(const VkImageCreateInfo& info_, VkImage* image)
{
auto info = info_;
VkExternalMemoryImageCreateInfo infoEx{VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO};
#ifdef WIN32
infoEx.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
#else
infoEx.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
#endif
info.pNext = &infoEx;
NVVK_CHECK(vkCreateImage(m_device, &info, nullptr, image));
}
MemHandle ExportResourceAllocator::AllocateMemory(const MemAllocateInfo& allocateInfo)
{
MemAllocateInfo exportAllocateInfo(allocateInfo);
exportAllocateInfo.setExportable(true);
return ResourceAllocator::AllocateMemory(exportAllocateInfo);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
ExportResourceAllocatorDedicated::ExportResourceAllocatorDedicated(VkDevice device,
VkPhysicalDevice physicalDevice,
VkDeviceSize stagingBlockSize /*= NVVK_DEFAULT_STAGING_BLOCKSIZE*/)
{
init(device, physicalDevice, stagingBlockSize);
}
ExportResourceAllocatorDedicated::~ExportResourceAllocatorDedicated()
{
deinit();
}
void ExportResourceAllocatorDedicated::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize /*= NVVK_DEFAULT_STAGING_BLOCKSIZE*/)
{
m_memAlloc = std::make_unique<DedicatedMemoryAllocator>(device, physicalDevice);
ExportResourceAllocator::init(device, physicalDevice, m_memAlloc.get(), stagingBlockSize);
}
void ExportResourceAllocatorDedicated::deinit()
{
ExportResourceAllocator::deinit();
m_memAlloc.reset();
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
ExplicitDeviceMaskResourceAllocator::ExplicitDeviceMaskResourceAllocator(VkDevice device,
VkPhysicalDevice physicalDevice,
MemAllocator* memAlloc,
uint32_t deviceMask)
{
init(device, physicalDevice, memAlloc, deviceMask);
}
void ExplicitDeviceMaskResourceAllocator::init(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, uint32_t deviceMask)
{
ResourceAllocator::init(device, physicalDevice, memAlloc);
m_deviceMask = deviceMask;
}
MemHandle ExplicitDeviceMaskResourceAllocator::AllocateMemory(const MemAllocateInfo& allocateInfo)
{
MemAllocateInfo deviceMaskAllocateInfo(allocateInfo);
deviceMaskAllocateInfo.setDeviceMask(m_deviceMask);
return ResourceAllocator::AllocateMemory(deviceMaskAllocateInfo);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
ResourceAllocatorDma::ResourceAllocatorDma(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize, VkDeviceSize memBlockSize)
{
init(device, physicalDevice, stagingBlockSize, memBlockSize);
}
ResourceAllocatorDma::~ResourceAllocatorDma()
{
deinit();
}
void ResourceAllocatorDma::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize, VkDeviceSize memBlockSize)
{
m_dma = std::make_unique<DeviceMemoryAllocator>(device, physicalDevice, memBlockSize);
ResourceAllocator::init(device, physicalDevice, m_dma.get(), stagingBlockSize);
}
void ResourceAllocatorDma::init(VkInstance, VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize, VkDeviceSize memBlockSize)
{
init(device, physicalDevice, stagingBlockSize, memBlockSize);
}
void ResourceAllocatorDma::deinit()
{
ResourceAllocator::deinit();
m_dma.reset();
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
ResourceAllocatorDedicated::ResourceAllocatorDedicated(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize)
{
init(device, physicalDevice, stagingBlockSize);
}
ResourceAllocatorDedicated::~ResourceAllocatorDedicated()
{
deinit();
}
void ResourceAllocatorDedicated::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize)
{
m_memAlloc = std::make_unique<DedicatedMemoryAllocator>(device, physicalDevice);
ResourceAllocator::init(device, physicalDevice, m_memAlloc.get(), stagingBlockSize);
}
void ResourceAllocatorDedicated::init(VkInstance, // unused
VkDevice device,
VkPhysicalDevice physicalDevice,
VkDeviceSize stagingBlockSize /*= NVVK_DEFAULT_STAGING_BLOCKSIZE*/)
{
init(device, physicalDevice, stagingBlockSize);
}
void ResourceAllocatorDedicated::deinit()
{
ResourceAllocator::deinit();
m_memAlloc.reset();
}
} // namespace nvvk

View file

@ -0,0 +1,467 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <vulkan/vulkan_core.h>
#include <memory>
#include <vector>
#include "memallocator_vk.hpp"
#include "samplers_vk.hpp"
#include "stagingmemorymanager_vk.hpp"
#include "sparse_image_vk.hpp"
/** @DOC_START
# class nvvk::ResourceAllocator
The goal of nvvk::ResourceAllocator is to aid creation of typical Vulkan
resources (VkBuffer, VkImage and VkAccelerationStructure).
All memory is allocated using the provided [nvvk::MemAllocator](#class-nvvkmemallocator)
and bound to the appropriate resources. The allocator contains a
[nvvk::StagingMemoryManager](#class-nvvkstagingmemorymanager) and
[nvvk::SamplerPool](#class-nvvksamplerpool) to aid this process.
ResourceAllocator separates object creation and memory allocation by delegating allocation
of memory to an object of interface type 'nvvk::MemAllocator'.
This way the ResourceAllocator can be used with different memory allocation strategies, depending on needs.
nvvk provides three implementations of MemAllocator:
* nvvk::DedicatedMemoryAllocator is using a very simple allocation scheme, one VkDeviceMemory object per allocation.
This strategy is only useful for very simple applications due to the overhead of vkAllocateMemory and
an implementation dependent bounded number of vkDeviceMemory allocations possible.
* nvvk::DMAMemoryAllocator delegates memory requests to a 'nvvk:DeviceMemoryAllocator',
as an example implemention of a suballocator
* nvvk::VMAMemoryAllocator delegates memory requests to a [Vulkan Memory Allocator](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator)
Utility wrapper structs contain the appropriate Vulkan resource and the
appropriate nvvk::MemHandle :
- nvvk::Buffer
- nvvk::Image
- nvvk::Texture contains VkImage and VkImageView as well as an
optional VkSampler stored witin VkDescriptorImageInfo
- nvvk::AccelNV
- nvvk::AccelKHR
nvvk::Buffer, nvvk::Image, nvvk::Texture and nvvk::AccelKHR nvvk::AccelNV objects can be copied
by value. They do not track lifetime of the underlying Vulkan objects and memory allocations.
The corresponding destroy() functions of nvvk::ResourceAllocator destroy created objects and
free up their memory. ResourceAllocator does not track usage of objects either. Thus, one has to
make sure that objects are no longer in use by the GPU when they get destroyed.
> Note: These classes are foremost to showcase principle components that
> a Vulkan engine would most likely have.
> They are geared towards ease of use in this sample framework, and
> not optimized nor meant for production code.
```cpp
nvvk::DeviceMemoryAllocator memAllocator;
nvvk::ResourceAllocator resAllocator;
memAllocator.init(device, physicalDevice);
resAllocator.init(device, physicalDevice, &memAllocator);
...
VkCommandBuffer cmd = ... transfer queue command buffer
// creates new resources and
// implicitly triggers staging transfer copy operations into cmd
nvvk::Buffer vbo = resAllocator.createBuffer(cmd, vboSize, vboData, vboUsage);
nvvk::Buffer ibo = resAllocator.createBuffer(cmd, iboSize, iboData, iboUsage);
// use functions from staging memory manager
// here we associate the temporary staging resources with a fence
resAllocator.finalizeStaging( fence );
// submit cmd buffer with staging copy operations
vkQueueSubmit(... cmd ... fence ...)
...
// if you do async uploads you would
// trigger garbage collection somewhere per frame
resAllocator.releaseStaging();
```
Separation of memory allocation and resource creation is very flexible, but it
can be tedious to set up for simple usecases. nvvk offers three helper ResourceAllocator
derived classes which internally contain the MemAllocator object and manage its lifetime:
* [ResourceAllocatorDedicated](#class nvvk::ResourceAllocatorDedicated)
* [ResourceAllocatorDma](#class nvvk::ResourceAllocatorDma)
* [ResourceAllocatorVma](#cass nvvk::ResourceAllocatorVma)
In these cases, only one object needs to be created and initialized.
ResourceAllocator can also be subclassed to specialize some of its functionality.
Examples are [ExportResourceAllocator](#class ExportResourceAllocator) and [ExplicitDeviceMaskResourceAllocator](#class ExplicitDeviceMaskResourceAllocator).
ExportResourceAllocator injects itself into the object allocation process such that
the resulting allocations can be exported or created objects may be bound to exported
memory
ExplicitDeviceMaskResourceAllocator overrides the devicemask of allocations such that
objects can be created on a specific device in a device group.
@DOC_END */
namespace nvvk {
// Objects
struct Buffer
{
VkBuffer buffer = VK_NULL_HANDLE;
MemHandle memHandle{nullptr};
};
struct Image
{
VkImage image = VK_NULL_HANDLE;
MemHandle memHandle{nullptr};
};
struct Texture
{
VkImage image = VK_NULL_HANDLE;
MemHandle memHandle{nullptr};
VkDescriptorImageInfo descriptor{};
};
struct AccelNV
{
VkAccelerationStructureNV accel = VK_NULL_HANDLE;
MemHandle memHandle{nullptr};
};
struct AccelKHR
{
VkAccelerationStructureKHR accel = VK_NULL_HANDLE;
nvvk::Buffer buffer;
};
//--------------------------------------------------------------------------------------------------
// Allocator for buffers, images and acceleration structures
//
class StagingMemoryManager;
class ResourceAllocator
{
public:
ResourceAllocator(ResourceAllocator const&) = delete;
ResourceAllocator& operator=(ResourceAllocator const&) = delete;
ResourceAllocator() = default;
ResourceAllocator(VkDevice device,
VkPhysicalDevice physicalDevice,
MemAllocator* memAllocator,
VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
// All staging buffers must be cleared before
virtual ~ResourceAllocator();
//--------------------------------------------------------------------------------------------------
// Initialization of the allocator
void init(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
void deinit();
MemAllocator* getMemoryAllocator() { return m_memAlloc; }
//--------------------------------------------------------------------------------------------------
// Basic buffer creation
nvvk::Buffer createBuffer(const VkBufferCreateInfo& info_, const VkMemoryPropertyFlags memUsage_ = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
//--------------------------------------------------------------------------------------------------
// Simple buffer creation
// implicitly sets VK_BUFFER_USAGE_TRANSFER_DST_BIT
nvvk::Buffer createBuffer(VkDeviceSize size_ = 0,
VkBufferUsageFlags usage_ = VkBufferUsageFlags(),
const VkMemoryPropertyFlags memUsage_ = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
//--------------------------------------------------------------------------------------------------
// Simple buffer creation with data uploaded through staging manager
// implicitly sets VK_BUFFER_USAGE_TRANSFER_DST_BIT
nvvk::Buffer createBuffer(const VkCommandBuffer& cmdBuf,
const VkDeviceSize& size_,
const void* data_,
VkBufferUsageFlags usage_,
VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
//--------------------------------------------------------------------------------------------------
// Simple buffer creation with data uploaded through staging manager
// implicitly sets VK_BUFFER_USAGE_TRANSFER_DST_BIT
template <typename T>
nvvk::Buffer createBuffer(const VkCommandBuffer& cmdBuf,
const std::vector<T>& data_,
VkBufferUsageFlags usage_,
VkMemoryPropertyFlags memProps_ = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
{
return createBuffer(cmdBuf, sizeof(T) * data_.size(), data_.data(), usage_, memProps_);
}
//--------------------------------------------------------------------------------------------------
// Basic image creation
nvvk::Image createImage(const VkImageCreateInfo& info_, const VkMemoryPropertyFlags memUsage_ = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
//--------------------------------------------------------------------------------------------------
// Create an image with data uploaded through staging manager
nvvk::Image createImage(const VkCommandBuffer& cmdBuf,
size_t size_,
const void* data_,
const VkImageCreateInfo& info_,
const VkImageLayout& layout_ = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
//--------------------------------------------------------------------------------------------------
// other variants could exist with a few defaults but we already have nvvk::makeImage2DViewCreateInfo()
// we could always override viewCreateInfo.image
nvvk::Texture createTexture(const Image& image, const VkImageViewCreateInfo& imageViewCreateInfo);
nvvk::Texture createTexture(const Image& image, const VkImageViewCreateInfo& imageViewCreateInfo, const VkSamplerCreateInfo& samplerCreateInfo);
//--------------------------------------------------------------------------------------------------
// shortcut that creates the image for the texture
// - creates the image
// - creates the texture part by associating image and sampler
//
nvvk::Texture createTexture(const VkCommandBuffer& cmdBuf,
size_t size_,
const void* data_,
const VkImageCreateInfo& info_,
const VkSamplerCreateInfo& samplerCreateInfo,
const VkImageLayout& layout_ = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
bool isCube = false);
nvvk::SparseImage createSparseImage(VkImageCreateInfo info_,
const VkMemoryPropertyFlags memUsage_ = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
void flushSparseImage(SparseImage& sparseImage);
bool createSparseImagePage(SparseImage& sparseImage, uint32_t pageIndex, uint32_t layer = 0u);
//--------------------------------------------------------------------------------------------------
// Create the acceleration structure
//
nvvk::AccelNV createAcceleration(VkAccelerationStructureCreateInfoNV& accel_);
//--------------------------------------------------------------------------------------------------
// Create the acceleration structure
//
nvvk::AccelKHR createAcceleration(VkAccelerationStructureCreateInfoKHR& accel_);
//--------------------------------------------------------------------------------------------------
// Acquire a sampler with the provided information (see nvvk::SamplerPool for details).
// Every acquire must have an appropriate release for appropriate internal reference counting
VkSampler acquireSampler(const VkSamplerCreateInfo& info);
void releaseSampler(VkSampler sampler);
//--------------------------------------------------------------------------------------------------
// implicit staging operations triggered by create are managed here
void finalizeStaging(VkFence fence = VK_NULL_HANDLE);
void finalizeAndReleaseStaging(VkFence fence = VK_NULL_HANDLE);
void releaseStaging();
StagingMemoryManager* getStaging();
const StagingMemoryManager* getStaging() const;
//--------------------------------------------------------------------------------------------------
// Destroy
//
void destroy(nvvk::Buffer& b_);
void destroy(nvvk::Image& i_);
void destroy(nvvk::AccelNV& a_);
void destroy(nvvk::AccelKHR& a_);
void destroy(nvvk::Texture& t_);
void destroy(nvvk::SparseImage& i_);
// Destroy a sparse image page. Returns true if that page actually was present in memory
bool destroy(nvvk::SparseImage& i_, uint32_t pageIndex, uint32_t layer = 0);
//--------------------------------------------------------------------------------------------------
// Other
//
void* map(const nvvk::Buffer& buffer);
void unmap(const nvvk::Buffer& buffer);
void* map(const nvvk::Image& image);
void unmap(const nvvk::Image& image);
VkDevice getDevice() const { return m_device; }
VkPhysicalDevice getPhysicalDevice() const { return m_physicalDevice; }
protected:
// If necessary, these can be overriden to specialize the allocation, for instance to
// enforce allocation of exportable
virtual MemHandle AllocateMemory(const MemAllocateInfo& allocateInfo);
virtual void CreateBufferEx(const VkBufferCreateInfo& info_, VkBuffer* buffer);
virtual void CreateImageEx(const VkImageCreateInfo& info_, VkImage* image);
//--------------------------------------------------------------------------------------------------
// Finding the memory type for memory allocation
//
uint32_t getMemoryType(uint32_t typeBits, const VkMemoryPropertyFlags& properties);
VkDevice m_device{VK_NULL_HANDLE};
VkPhysicalDevice m_physicalDevice{VK_NULL_HANDLE};
VkPhysicalDeviceMemoryProperties m_memoryProperties{};
MemAllocator* m_memAlloc{nullptr};
std::unique_ptr<StagingMemoryManager> m_staging;
SamplerPool m_samplerPool;
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class DeviceMemoryAllocator;
/** @DOC_START
# class nvvk::ResourceAllocatorDma
nvvk::ResourceAllocatorDMA is a convencience class owning a nvvk::DMAMemoryAllocator and nvvk::DeviceMemoryAllocator object
@DOC_END */
class ResourceAllocatorDma : public ResourceAllocator
{
public:
ResourceAllocatorDma() = default;
ResourceAllocatorDma(VkDevice device,
VkPhysicalDevice physicalDevice,
VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE,
VkDeviceSize memBlockSize = 0);
virtual ~ResourceAllocatorDma();
void init(VkDevice device,
VkPhysicalDevice physicalDevice,
VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE,
VkDeviceSize memBlockSize = 0);
// Provided such that ResourceAllocatorDedicated, ResourceAllocatorDma and ResourceAllocatorVma all have the same interface
void init(VkInstance,
VkDevice device,
VkPhysicalDevice physicalDevice,
VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE,
VkDeviceSize memBlockSize = 0);
void deinit();
nvvk::DeviceMemoryAllocator* getDMA() { return m_dma.get(); }
const nvvk::DeviceMemoryAllocator* getDMA() const { return m_dma.get(); }
protected:
std::unique_ptr<nvvk::DeviceMemoryAllocator> m_dma;
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::ResourceAllocatorDedicated
> nvvk::ResourceAllocatorDedicated is a convencience class automatically creating and owning a DedicatedMemoryAllocator object
@DOC_END */
class ResourceAllocatorDedicated : public ResourceAllocator
{
public:
ResourceAllocatorDedicated() = default;
ResourceAllocatorDedicated(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
virtual ~ResourceAllocatorDedicated();
void init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
// Provided such that ResourceAllocatorDedicated, ResourceAllocatorDma and ResourceAllocatorVma all have the same interface
void init(VkInstance, VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
void deinit();
protected:
std::unique_ptr<MemAllocator> m_memAlloc;
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/** @DOC_START
#class nvvk::ExportResourceAllocator
ExportResourceAllocator specializes the object allocation process such that resulting memory allocations are
exportable and buffers and images can be bound to external memory.
@DOC_END */
class ExportResourceAllocator : public ResourceAllocator
{
public:
ExportResourceAllocator() = default;
ExportResourceAllocator(VkDevice device,
VkPhysicalDevice physicalDevice,
MemAllocator* memAlloc,
VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
protected:
virtual MemHandle AllocateMemory(const MemAllocateInfo& allocateInfo) override;
virtual void CreateBufferEx(const VkBufferCreateInfo& info_, VkBuffer* buffer) override;
virtual void CreateImageEx(const VkImageCreateInfo& info_, VkImage* image) override;
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::ExportResourceAllocatorDedicated
nvvk::ExportResourceAllocatorDedicated is a resource allocator that is using DedicatedMemoryAllocator to allocate memory
and at the same time it'll make all allocations exportable.
@DOC_END */
class ExportResourceAllocatorDedicated : public ExportResourceAllocator
{
public:
ExportResourceAllocatorDedicated() = default;
ExportResourceAllocatorDedicated(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
virtual ~ExportResourceAllocatorDedicated() override;
void init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
void deinit();
protected:
std::unique_ptr<MemAllocator> m_memAlloc;
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::ExplicitDeviceMaskResourceAllocator
nvvk::ExplicitDeviceMaskResourceAllocator is a resource allocator that will inject a specific devicemask into each
allocation, making the created allocations and objects available to only the devices in the mask.
@DOC_END */
class ExplicitDeviceMaskResourceAllocator : public ResourceAllocator
{
public:
ExplicitDeviceMaskResourceAllocator() = default;
ExplicitDeviceMaskResourceAllocator(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, uint32_t deviceMask);
void init(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, uint32_t deviceMask);
protected:
virtual MemHandle AllocateMemory(const MemAllocateInfo& allocateInfo) override;
uint32_t m_deviceMask;
};
} // namespace nvvk

View file

@ -0,0 +1,166 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2020-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "samplers_vk.hpp"
namespace nvvk {
//////////////////////////////////////////////////////////////////////////
void SamplerPool::deinit()
{
if(!m_device)
return;
for(auto it : m_entries)
{
if(it.sampler)
{
vkDestroySampler(m_device, it.sampler, nullptr);
}
}
m_freeIndex = ~0;
m_entries.clear();
m_samplerMap.clear();
m_stateMap.clear();
m_device = nullptr;
}
VkSampler SamplerPool::acquireSampler(const VkSamplerCreateInfo& createInfo)
{
SamplerState state;
state.createInfo = createInfo;
const Chain* ext = (const Chain*)createInfo.pNext;
while(ext)
{
switch(ext->sType)
{
case VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO:
state.reduction = *(const VkSamplerReductionModeCreateInfo*)ext;
break;
case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO:
state.ycbr = *(const VkSamplerYcbcrConversionCreateInfo*)ext;
break;
default:
assert(0 && "unsupported sampler create");
}
ext = ext->pNext;
}
// always remove pointers for comparison lookup
state.createInfo.pNext = nullptr;
state.reduction.pNext = nullptr;
state.ycbr.pNext = nullptr;
auto it = m_stateMap.find(state);
if(it == m_stateMap.end())
{
uint32_t index = 0;
if(m_freeIndex != ~0)
{
index = m_freeIndex;
m_freeIndex = m_entries[index].nextFreeIndex;
}
else
{
index = (uint32_t)m_entries.size();
m_entries.resize(m_entries.size() + 1);
}
VkSampler sampler;
VkResult result = vkCreateSampler(m_device, &createInfo, nullptr, &sampler);
assert(result == VK_SUCCESS);
m_entries[index].refCount = 1;
m_entries[index].sampler = sampler;
m_entries[index].state = state;
m_stateMap.insert({state, index});
m_samplerMap.insert({sampler, index});
return sampler;
}
else
{
m_entries[it->second].refCount++;
return m_entries[it->second].sampler;
}
}
void SamplerPool::releaseSampler(VkSampler sampler)
{
auto it = m_samplerMap.find(sampler);
assert(it != m_samplerMap.end());
uint32_t index = it->second;
Entry& entry = m_entries[index];
assert(entry.sampler == sampler);
assert(entry.refCount);
entry.refCount--;
if(!entry.refCount)
{
vkDestroySampler(m_device, sampler, nullptr);
entry.sampler = nullptr;
entry.nextFreeIndex = m_freeIndex;
m_freeIndex = index;
m_stateMap.erase(entry.state);
m_samplerMap.erase(sampler);
}
}
VkSamplerCreateInfo makeSamplerCreateInfo(VkFilter magFilter,
VkFilter minFilter,
VkSamplerAddressMode addressModeU,
VkSamplerAddressMode addressModeV,
VkSamplerAddressMode addressModeW,
VkBool32 anisotropyEnable,
float maxAnisotropy,
VkSamplerMipmapMode mipmapMode,
float minLod,
float maxLod,
float mipLodBias,
VkBool32 compareEnable,
VkCompareOp compareOp,
VkBorderColor borderColor,
VkBool32 unnormalizedCoordinates)
{
VkSamplerCreateInfo samplerInfo = {VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO};
samplerInfo.flags = 0;
samplerInfo.pNext = nullptr;
samplerInfo.magFilter = magFilter;
samplerInfo.minFilter = minFilter;
samplerInfo.mipmapMode = mipmapMode;
samplerInfo.addressModeU = addressModeU;
samplerInfo.addressModeV = addressModeV;
samplerInfo.addressModeW = addressModeW;
samplerInfo.anisotropyEnable = anisotropyEnable;
samplerInfo.maxAnisotropy = maxAnisotropy;
samplerInfo.borderColor = borderColor;
samplerInfo.unnormalizedCoordinates = unnormalizedCoordinates;
samplerInfo.compareEnable = compareEnable;
samplerInfo.compareOp = compareOp;
return samplerInfo;
}
} // namespace nvvk

View file

@ -0,0 +1,135 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2020-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <vulkan/vulkan_core.h>
#include <assert.h>
#include <float.h>
#include <functional>
#include <string.h> //memcmp
#include <unordered_map>
#include <vector>
#include "nvh/container_utils.hpp"
namespace nvvk {
//////////////////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::SamplerPool
This nvvk::SamplerPool class manages unique VkSampler objects. To minimize the total
number of sampler objects, this class ensures that identical configurations
return the same sampler
Example :
```cpp
nvvk::SamplerPool pool(device);
for (auto it : textures) {
VkSamplerCreateInfo info = {...};
// acquire ensures we create the minimal subset of samplers
it.sampler = pool.acquireSampler(info);
}
// you can manage releases individually, or just use deinit/destructor of pool
for (auto it : textures) {
pool.releaseSampler(it.sampler);
}
```
- makeSamplerCreateInfo : aids for sampler creation
@DOC_END */
class SamplerPool
{
public:
SamplerPool(SamplerPool const&) = delete;
SamplerPool& operator=(SamplerPool const&) = delete;
SamplerPool() {}
SamplerPool(VkDevice device) { init(device); }
~SamplerPool() { deinit(); }
void init(VkDevice device) { m_device = device; }
void deinit();
// creates a new sampler or re-uses an existing one with ref-count
// createInfo may contain VkSamplerReductionModeCreateInfo and VkSamplerYcbcrConversionCreateInfo
VkSampler acquireSampler(const VkSamplerCreateInfo& createInfo);
// decrements ref-count and destroys sampler if possible
void releaseSampler(VkSampler sampler);
private:
struct SamplerState
{
VkSamplerCreateInfo createInfo;
VkSamplerReductionModeCreateInfo reduction;
VkSamplerYcbcrConversionCreateInfo ycbr;
SamplerState() { memset(this, 0, sizeof(SamplerState)); }
bool operator==(const SamplerState& other) const { return memcmp(this, &other, sizeof(SamplerState)) == 0; }
};
struct Chain
{
VkStructureType sType;
const Chain* pNext;
};
struct Entry
{
VkSampler sampler = nullptr;
uint32_t nextFreeIndex = ~0;
uint32_t refCount = 0;
SamplerState state;
};
VkDevice m_device = nullptr;
uint32_t m_freeIndex = ~0;
std::vector<Entry> m_entries;
std::unordered_map<SamplerState, uint32_t, nvh::HashAligned32<SamplerState>> m_stateMap;
std::unordered_map<VkSampler, uint32_t> m_samplerMap;
};
VkSamplerCreateInfo makeSamplerCreateInfo(VkFilter magFilter = VK_FILTER_LINEAR,
VkFilter minFilter = VK_FILTER_LINEAR,
VkSamplerAddressMode addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
VkSamplerAddressMode addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
VkSamplerAddressMode addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
VkBool32 anisotropyEnable = VK_FALSE,
float maxAnisotropy = 16,
VkSamplerMipmapMode mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR,
float minLod = 0.0f,
float maxLod = FLT_MAX,
float mipLodBias = 0.0f,
VkBool32 compareEnable = VK_FALSE,
VkCompareOp compareOp = VK_COMPARE_OP_ALWAYS,
VkBorderColor borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK,
VkBool32 unnormalizedCoordinates = VK_FALSE);
} // namespace nvvk

View file

@ -0,0 +1,246 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "sbtwrapper_vk.hpp"
#include "nvvk/commands_vk.hpp"
#include "nvvk/debug_util_vk.hpp"
#include "nvvk/error_vk.hpp"
#include "nvh/nvprint.hpp"
#include "nvh/alignment.hpp"
using namespace nvvk;
//--------------------------------------------------------------------------------------------------
// Default setup
//
void nvvk::SBTWrapper::setup(VkDevice device,
uint32_t familyIndex,
nvvk::ResourceAllocator* allocator,
const VkPhysicalDeviceRayTracingPipelinePropertiesKHR& rtProperties)
{
m_device = device;
m_queueIndex = familyIndex;
m_pAlloc = allocator;
m_debug.setup(device);
m_handleSize = rtProperties.shaderGroupHandleSize; // Size of a program identifier
m_handleAlignment = rtProperties.shaderGroupHandleAlignment; // Alignment in bytes for each SBT entry
m_shaderGroupBaseAlignment = rtProperties.shaderGroupBaseAlignment;
}
//--------------------------------------------------------------------------------------------------
// Destroying the allocated buffers and clearing all vectors
//
void SBTWrapper::destroy()
{
if(m_pAlloc)
{
for(auto& b : m_buffer)
m_pAlloc->destroy(b);
}
for(auto& i : m_index)
i = {};
}
//--------------------------------------------------------------------------------------------------
// Finding the handle index position of each group type in the pipeline creation info.
// If the pipeline was created like: raygen, miss, hit, miss, hit, hit
// The result will be: raygen[0], miss[1, 3], hit[2, 4, 5], callable[]
//
void SBTWrapper::addIndices(VkRayTracingPipelineCreateInfoKHR rayPipelineInfo,
const std::vector<VkRayTracingPipelineCreateInfoKHR>& libraries)
{
for(auto& i : m_index)
i = {};
// Libraries contain stages referencing their internal groups. When those groups
// are used in the final pipeline we need to offset them to ensure each group has
// a unique index
uint32_t groupOffset = 0;
for(size_t i = 0; i < libraries.size() + 1; i++)
{
// When using libraries, their groups and stages are appended after the groups and
// stages defined in the main VkRayTracingPipelineCreateInfoKHR
const auto& info = (i == 0) ? rayPipelineInfo : libraries[i - 1];
// Finding the handle position of each group, splitting by raygen, miss and hit group
for(uint32_t g = 0; g < info.groupCount; g++)
{
if(info.pGroups[g].type == VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR)
{
uint32_t genShader = info.pGroups[g].generalShader;
assert(genShader < info.stageCount);
if(info.pStages[genShader].stage == VK_SHADER_STAGE_RAYGEN_BIT_KHR)
{
m_index[eRaygen].push_back(g + groupOffset);
}
else if(info.pStages[genShader].stage == VK_SHADER_STAGE_MISS_BIT_KHR)
{
m_index[eMiss].push_back(g + groupOffset);
}
else if(info.pStages[genShader].stage == VK_SHADER_STAGE_CALLABLE_BIT_KHR)
{
m_index[eCallable].push_back(g + groupOffset);
}
}
else
{
m_index[eHit].push_back(g + groupOffset);
}
}
groupOffset += info.groupCount;
}
}
//--------------------------------------------------------------------------------------------------
// This function creates 4 buffers, for raygen, miss, hit and callable shader.
// Each buffer will have the handle + 'data (if any)', .. n-times they have entries in the pipeline.
//
void SBTWrapper::create(VkPipeline rtPipeline,
VkRayTracingPipelineCreateInfoKHR rayPipelineInfo /*= {}*/,
const std::vector<VkRayTracingPipelineCreateInfoKHR>& librariesInfo /*= {}*/)
{
for(auto& b : m_buffer)
m_pAlloc->destroy(b);
// Get the total number of groups and handle index position
uint32_t totalGroupCount{0};
std::vector<uint32_t> groupCountPerInput;
// A pipeline is defined by at least its main VkRayTracingPipelineCreateInfoKHR, plus a number of external libraries
groupCountPerInput.reserve(1 + librariesInfo.size());
if(rayPipelineInfo.sType == VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR)
{
addIndices(rayPipelineInfo, librariesInfo);
groupCountPerInput.push_back(rayPipelineInfo.groupCount);
totalGroupCount += rayPipelineInfo.groupCount;
for(const auto& lib : librariesInfo)
{
groupCountPerInput.push_back(lib.groupCount);
totalGroupCount += lib.groupCount;
}
}
else
{
// Find how many groups when added manually, by finding the largest index and adding 1
// See also addIndex for manual entries
for(auto& i : m_index)
{
if(!i.empty())
totalGroupCount = std::max(totalGroupCount, *std::max_element(std::begin(i), std::end(i)));
}
totalGroupCount++;
groupCountPerInput.push_back(totalGroupCount);
}
// Fetch all the shader handles used in the pipeline, so that they can be written in the SBT
uint32_t sbtSize = totalGroupCount * m_handleSize;
std::vector<uint8_t> shaderHandleStorage(sbtSize);
NVVK_CHECK(vkGetRayTracingShaderGroupHandlesKHR(m_device, rtPipeline, 0, totalGroupCount, sbtSize, shaderHandleStorage.data()));
// Find the max stride, minimum is the handle size + size of 'data (if any)' aligned to shaderGroupBaseAlignment
auto findStride = [&](auto entry, auto& stride) {
stride = nvh::align_up(m_handleSize, m_handleAlignment); // minimum stride
for(auto& e : entry)
{
// Find the largest data + handle size, all aligned
uint32_t dataHandleSize =
nvh::align_up(static_cast<uint32_t>(m_handleSize + e.second.size() * sizeof(uint8_t)), m_handleAlignment);
stride = std::max(stride, dataHandleSize);
}
};
findStride(m_data[eRaygen], m_stride[eRaygen]);
findStride(m_data[eMiss], m_stride[eMiss]);
findStride(m_data[eHit], m_stride[eHit]);
findStride(m_data[eCallable], m_stride[eCallable]);
// Special case, all Raygen must start aligned on GroupBase
m_stride[eRaygen] = nvh::align_up(m_stride[eRaygen], m_shaderGroupBaseAlignment);
// Buffer holding the staging information
std::array<std::vector<uint8_t>, 4> stage;
stage[eRaygen] = std::vector<uint8_t>(m_stride[eRaygen] * indexCount(eRaygen));
stage[eMiss] = std::vector<uint8_t>(m_stride[eMiss] * indexCount(eMiss));
stage[eHit] = std::vector<uint8_t>(m_stride[eHit] * indexCount(eHit));
stage[eCallable] = std::vector<uint8_t>(m_stride[eCallable] * indexCount(eCallable));
// Write the handles in the SBT buffer + data info (if any)
auto copyHandles = [&](std::vector<uint8_t>& buffer, std::vector<uint32_t>& indices, uint32_t stride, auto& data) {
auto* pBuffer = buffer.data();
for(uint32_t index = 0; index < static_cast<uint32_t>(indices.size()); index++)
{
auto* pStart = pBuffer;
// Copy the handle
memcpy(pBuffer, shaderHandleStorage.data() + (indices[index] * m_handleSize), m_handleSize);
// If there is data for this group index, copy it too
auto it = data.find(index);
if(it != std::end(data))
{
pBuffer += m_handleSize;
memcpy(pBuffer, it->second.data(), it->second.size() * sizeof(uint8_t));
}
pBuffer = pStart + stride; // Jumping to next group
}
};
// Copy the handles/data to each staging buffer
copyHandles(stage[eRaygen], m_index[eRaygen], m_stride[eRaygen], m_data[eRaygen]);
copyHandles(stage[eMiss], m_index[eMiss], m_stride[eMiss], m_data[eMiss]);
copyHandles(stage[eHit], m_index[eHit], m_stride[eHit], m_data[eHit]);
copyHandles(stage[eCallable], m_index[eCallable], m_stride[eCallable], m_data[eCallable]);
// Creating device local buffers where handles will be stored
auto usage_flags = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR;
auto mem_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
nvvk::CommandPool genCmdBuf(m_device, m_queueIndex);
VkCommandBuffer cmdBuf = genCmdBuf.createCommandBuffer();
for(uint32_t i = 0; i < 4; i++)
{
if(!stage[i].empty())
{
m_buffer[i] = m_pAlloc->createBuffer(cmdBuf, stage[i], usage_flags, mem_flags);
NAME_IDX_VK(m_buffer[i].buffer, i);
}
}
genCmdBuf.submitAndWait(cmdBuf);
m_pAlloc->finalizeAndReleaseStaging();
}
VkDeviceAddress SBTWrapper::getAddress(GroupType t)
{
if(m_buffer[t].buffer == VK_NULL_HANDLE)
return 0;
VkBufferDeviceAddressInfo i{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, nullptr, m_buffer[t].buffer};
return vkGetBufferDeviceAddress(m_device, &i); // Aligned on VkMemoryRequirements::alignment which includes shaderGroupBaseAlignment
}
const VkStridedDeviceAddressRegionKHR SBTWrapper::getRegion(GroupType t, uint32_t indexOffset)
{
return VkStridedDeviceAddressRegionKHR{getAddress(t) + indexOffset * getStride(t), getStride(t), getSize(t)};
}
const std::array<VkStridedDeviceAddressRegionKHR, 4> SBTWrapper::getRegions(uint32_t rayGenIndexOffset)
{
std::array<VkStridedDeviceAddressRegionKHR, 4> regions{getRegion(eRaygen, rayGenIndexOffset), getRegion(eMiss),
getRegion(eHit), getRegion(eCallable)};
return regions;
}

View file

@ -0,0 +1,179 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
/** @DOC_START
# class nvvk::SBTWrapper
nvvk::SBTWrapper is a generic SBT builder from the ray tracing pipeline
The builder will iterate through the pipeline create info `VkRayTracingPipelineCreateInfoKHR`
to find the number of raygen, miss, hit and callable shader groups were created.
The handles for those group will be retrieved from the pipeline and written in the right order in
separated buffer.
Convenient functions exist to retrieve all information to be used in TraceRayKHR.
## Usage
- Setup the builder (`setup()`)
- After the pipeline creation, call `create()` with the same info used for the creation of the pipeline.
- Use `getRegions()` to get all the vk::StridedDeviceAddressRegionKHR needed by TraceRayKHR()
### Example
```cpp
m_sbtWrapper.setup(m_device, m_graphicsQueueIndex, &m_alloc, m_rtProperties);
// ...
m_sbtWrapper.create(m_rtPipeline, rayPipelineInfo);
// ...
auto& regions = m_stbWrapper.getRegions();
vkCmdTraceRaysKHR(cmdBuf, &regions[0], &regions[1], &regions[2], &regions[3], size.width, size.height, 1);
```
## Extra
If data are attached to a shader group (see shaderRecord), it need to be provided independently.
In this case, the user must know the group index for the group type.
Here the Hit group 1 and 2 has data, but not the group 0.
Those functions must be called before create.
```cpp
m_sbtWrapper.addData(SBTWrapper::eHit, 1, m_hitShaderRecord[0]);
m_sbtWrapper.addData(SBTWrapper::eHit, 2, m_hitShaderRecord[1]);
```
## Special case
It is also possible to create a pipeline with only a few groups but having a SBT representing many more groups.
The following example shows a more complex setup.
There are: 1 x raygen, 2 x miss, 2 x hit.
BUT the SBT will have 3 hit by duplicating the second hit in its table.
So, the same hit shader defined in the pipeline, can be called with different data.
In this case, the use must provide manually the information to the SBT.
All extra group must be explicitly added.
The following show how to get handle indices provided in the pipeline, and we are adding another hit group, re-using the 4th pipeline entry.
Note: we are not providing the pipelineCreateInfo, because we are manually defining it.
```cpp
// Manually defining group indices
m_sbtWrapper.addIndices(rayPipelineInfo); // Add raygen(0), miss(1), miss(2), hit(3), hit(4) from the pipeline info
m_sbtWrapper.addIndex(SBTWrapper::eHit, 4); // Adding a 3rd hit, duplicate from the hit:1, which make hit:2 available.
m_sbtWrapper.addHitData(SBTWrapper::eHit, 2, m_hitShaderRecord[1]); // Adding data to this hit shader
m_sbtWrapper.create(m_rtPipeline);
```
@DOC_END */
#include <array>
#include "nvvk/resourceallocator_vk.hpp"
#include "nvvk/debug_util_vk.hpp"
namespace nvvk {
class SBTWrapper
{
public:
enum GroupType
{
eRaygen,
eMiss,
eHit,
eCallable
};
void setup(VkDevice device,
uint32_t familyIndex,
nvvk::ResourceAllocator* allocator,
const VkPhysicalDeviceRayTracingPipelinePropertiesKHR& rtProperties);
void destroy();
// To call after the ray tracer pipeline creation
// The rayPipelineInfo parameter is the structure used to define the pipeline,
// while librariesInfo describe the potential input pipeline libraries
void create(VkPipeline rtPipeline,
VkRayTracingPipelineCreateInfoKHR rayPipelineInfo = {},
const std::vector<VkRayTracingPipelineCreateInfoKHR>& librariesInfo = {});
// Optional, to be used in combination with addIndex. Leave create() `rayPipelineInfo`
// and 'librariesInfo' empty. The rayPipelineInfo parameter is the structure used to
// define the pipeline, while librariesInfo describe the potential input pipeline libraries
void addIndices(VkRayTracingPipelineCreateInfoKHR rayPipelineInfo,
const std::vector<VkRayTracingPipelineCreateInfoKHR>& libraries = {});
// Pushing back a GroupType and the handle pipeline index to use
// i.e addIndex(eHit, 3) is pushing a Hit shader group using the 3rd entry in the pipeline
void addIndex(GroupType t, uint32_t index) { m_index[t].push_back(index); }
// Adding 'Shader Record' data to the group index.
// i.e. addData(eHit, 0, myValue) is adding 'myValue' to the HIT group 0.
template <typename T>
void addData(GroupType t, uint32_t groupIndex, T& data)
{
addData(t, groupIndex, (uint8_t*)&data, sizeof(T));
}
void addData(GroupType t, uint32_t groupIndex, uint8_t* data, size_t dataSize)
{
std::vector<uint8_t> dst(data, data + dataSize);
m_data[t][groupIndex] = dst;
}
// Getters
uint32_t indexCount(GroupType t) { return static_cast<uint32_t>(m_index[t].size()); }
uint32_t getStride(GroupType t) { return m_stride[t]; }
VkDeviceAddress getAddress(GroupType t);
// returns the entire size of a group. Raygen Stride and Size must be equal, even if the buffer contains many of them.
uint32_t getSize(GroupType t) { return t == eRaygen ? getStride(eRaygen) : getStride(t) * indexCount(t); }
// Return the address region of a group. indexOffset allow to offset the starting shader of the group.
const VkStridedDeviceAddressRegionKHR getRegion(GroupType t, uint32_t indexOffset = 0);
// Return the address regions of all groups. The offset allows to select which RayGen to use.
const std::array<VkStridedDeviceAddressRegionKHR, 4> getRegions(uint32_t rayGenIndexOffset = 0);
private:
using entry = std::unordered_map<uint32_t, std::vector<uint8_t>>;
std::array<std::vector<uint32_t>, 4> m_index; // Offset index in pipeline
std::array<nvvk::Buffer, 4> m_buffer; // Buffer of handles + data
std::array<uint32_t, 4> m_stride{0, 0, 0, 0}; // Stride of each group
std::array<entry, 4> m_data; // Local data to groups (Shader Record)
uint32_t m_handleSize{0};
uint32_t m_handleAlignment{0};
uint32_t m_shaderGroupBaseAlignment{0};
VkDevice m_device{VK_NULL_HANDLE};
nvvk::ResourceAllocator* m_pAlloc{nullptr}; // Allocator for buffer, images, acceleration structures
nvvk::DebugUtil m_debug; // Utility to name objects
uint32_t m_queueIndex{0};
};
} // namespace nvvk

View file

@ -0,0 +1,582 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "shadermodulemanager_vk.hpp"
#include <algorithm>
#include <assert.h>
#include <fstream>
#include <iostream>
#include <sstream>
#include <stdarg.h>
#include <stdio.h>
#include <nvh/fileoperations.hpp>
#include <nvh/nvprint.hpp>
#if NVP_SUPPORTS_SHADERC
#include <shaderc/shaderc.hpp>
#endif
#define NV_LINE_MARKERS 1
namespace nvvk {
const VkShaderModule ShaderModuleManager::PREPROCESS_ONLY_MODULE = (VkShaderModule)~0;
#if NVP_SUPPORTS_SHADERC
// Shared shaderc compiler, and reference count + mutex protecting it.
shaderc_compiler_t ShaderModuleManager::s_shadercCompiler = nullptr;
uint32_t ShaderModuleManager::s_shadercCompilerUsers{0};
std::mutex ShaderModuleManager::s_shadercCompilerMutex;
// Adapts the include file loader of nvh::ShaderFileManager to what shaderc expects.
class ShadercIncludeBridge : public shaderc::CompileOptions::IncluderInterface
{
// Borrowed pointer to our include file loader.
nvvk::ShaderModuleManager* m_pShaderFileManager;
// Inputs/outputs reused for manualInclude.
std::string m_filenameFound;
const std::string m_emptyString;
// Subtype of shaderc_include_result that holds the include data
// we found; MUST be static_cast to this type before delete-ing as
// shaderc_include_result lacks virtual destructor.
class Result : public shaderc_include_result
{
// Containers for actual data; shaderc_include_result pointers
// point to data held within.
const std::string m_content;
const std::string m_filenameFound;
public:
Result(std::string content, std::string filenameFound)
: m_content(std::move(content))
, m_filenameFound(std::move(filenameFound))
{
this->source_name = m_filenameFound.data();
this->source_name_length = m_filenameFound.size();
this->content = m_content.data();
this->content_length = m_content.size();
this->user_data = nullptr;
}
};
public:
ShadercIncludeBridge(nvvk::ShaderModuleManager* pShaderFileManager) { m_pShaderFileManager = pShaderFileManager; }
// Handles shaderc_include_resolver_fn callbacks.
virtual shaderc_include_result* GetInclude(const char* requested_source,
shaderc_include_type type,
const char* requesting_source,
size_t /*include_depth*/) override
{
std::string filename = requested_source;
std::string includeFileText;
bool versionFound = false; // Trying to match glslc behavior: it doesn't allow #version directives in include files.
if(type == shaderc_include_type_relative) // "header.h"
{
includeFileText = m_pShaderFileManager->getContentWithRequestingSourceDirectory(filename, m_filenameFound, requesting_source);
}
else // shaderc_include_type_standard <header.h>
{
includeFileText = m_pShaderFileManager->getContent(filename, m_filenameFound);
}
std::string content = m_pShaderFileManager->manualIncludeText(includeFileText, m_filenameFound, m_emptyString, versionFound);
return new Result(std::move(content), std::move(m_filenameFound));
}
// Handles shaderc_include_result_release_fn callbacks.
virtual void ReleaseInclude(shaderc_include_result* data) override { delete static_cast<Result*>(data); }
// Set as the includer for the given shaderc_compile_options_t.
// This ShadercIncludeBridge MUST not be destroyed while in-use by a
// shaderc compiler using these options.
void setAsIncluder(shaderc_compile_options_t options)
{
shaderc_compile_options_set_include_callbacks(
options,
[](void* pvShadercIncludeBridge, const char* requestedSource, int type, const char* requestingSource, size_t includeDepth) {
return static_cast<ShadercIncludeBridge*>(pvShadercIncludeBridge)
->GetInclude(requestedSource, (shaderc_include_type)type, requestingSource, includeDepth);
},
[](void* pvShadercIncludeBridge, shaderc_include_result* includeResult) {
return static_cast<ShadercIncludeBridge*>(pvShadercIncludeBridge)->ReleaseInclude(includeResult);
},
this);
}
};
#endif /* NVP_SUPPORTS_SHADERC */
std::string ShaderModuleManager::DefaultInterface::getTypeDefine(uint32_t type) const
{
switch(type)
{
case VK_SHADER_STAGE_VERTEX_BIT:
return "#define _VERTEX_SHADER_ 1\n";
case VK_SHADER_STAGE_FRAGMENT_BIT:
return "#define _FRAGMENT_SHADER_ 1\n";
case VK_SHADER_STAGE_COMPUTE_BIT:
return "#define _COMPUTE_SHADER_ 1\n";
case VK_SHADER_STAGE_GEOMETRY_BIT:
return "#define _GEOMETRY_SHADER_ 1\n";
case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
return "#define _TESS_CONTROL_SHADER_ 1\n";
case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
return "#define _TESS_EVALUATION_SHADER_ 1\n";
#if VK_NV_mesh_shader
case VK_SHADER_STAGE_MESH_BIT_NV:
return "#define _MESH_SHADER_ 1\n";
case VK_SHADER_STAGE_TASK_BIT_NV:
return "#define _TASK_SHADER_ 1\n";
#endif
#if VK_NV_ray_tracing
case VK_SHADER_STAGE_RAYGEN_BIT_NV:
return "#define _RAY_GENERATION_SHADER_ 1\n";
case VK_SHADER_STAGE_ANY_HIT_BIT_NV:
return "#define _RAY_ANY_HIT_SHADER_ 1\n";
case VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV:
return "#define _RAY_CLOSEST_HIT_SHADER_ 1\n";
case VK_SHADER_STAGE_MISS_BIT_NV:
return "#define _RAY_MISS_SHADER_ 1\n";
case VK_SHADER_STAGE_INTERSECTION_BIT_NV:
return "#define _RAY_INTERSECTION_SHADER_ 1\n";
case VK_SHADER_STAGE_CALLABLE_BIT_NV:
return "#define _RAY_CALLABLE_BIT_SHADER_ 1\n";
#endif
}
return std::string();
}
uint32_t ShaderModuleManager::DefaultInterface::getTypeShadercKind(uint32_t type) const
{
#if NVP_SUPPORTS_SHADERC
switch(type)
{
case VK_SHADER_STAGE_VERTEX_BIT:
return shaderc_glsl_vertex_shader;
case VK_SHADER_STAGE_FRAGMENT_BIT:
return shaderc_glsl_fragment_shader;
case VK_SHADER_STAGE_COMPUTE_BIT:
return shaderc_glsl_compute_shader;
case VK_SHADER_STAGE_GEOMETRY_BIT:
return shaderc_glsl_geometry_shader;
case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
return shaderc_glsl_tess_control_shader;
case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
return shaderc_glsl_tess_evaluation_shader;
#if VK_NV_mesh_shader
case VK_SHADER_STAGE_MESH_BIT_NV:
return shaderc_glsl_mesh_shader;
case VK_SHADER_STAGE_TASK_BIT_NV:
return shaderc_glsl_task_shader;
#endif
#if VK_NV_ray_tracing
case VK_SHADER_STAGE_RAYGEN_BIT_NV:
return shaderc_glsl_raygen_shader;
case VK_SHADER_STAGE_ANY_HIT_BIT_NV:
return shaderc_glsl_anyhit_shader;
case VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV:
return shaderc_glsl_closesthit_shader;
case VK_SHADER_STAGE_MISS_BIT_NV:
return shaderc_glsl_miss_shader;
case VK_SHADER_STAGE_INTERSECTION_BIT_NV:
return shaderc_glsl_intersection_shader;
case VK_SHADER_STAGE_CALLABLE_BIT_NV:
return shaderc_glsl_callable_shader;
#endif
}
return shaderc_glsl_infer_from_source;
#else
return 0;
#endif
}
bool ShaderModuleManager::setupShaderModule(ShaderModule& module)
{
Definition& definition = module.definition;
module.module = VK_NULL_HANDLE;
if(definition.filetype == FILETYPE_DEFAULT)
{
definition.filetype = m_filetype;
}
std::string combinedPrepend = m_prepend;
std::string combinedFilenames;
combinedPrepend += definition.prepend;
combinedFilenames += definition.filename;
if(definition.filetype == FILETYPE_SPIRV)
{
std::string filenameFound;
definition.content = nvh::loadFile(definition.filename, true, m_directories, filenameFound);
}
else
{
std::string prepend = m_usedSetupIF->getTypeDefine(definition.type);
definition.content =
manualInclude(definition.filename, definition.filenameFound, prepend + m_prepend + definition.prepend, false);
}
if(definition.content.empty())
{
return false;
}
if(m_preprocessOnly)
{
module.module = PREPROCESS_ONLY_MODULE;
return true;
}
else
{
VkResult vkresult = VK_ERROR_INVALID_SHADER_NV;
VkShaderModuleCreateInfo shaderModuleInfo = {VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
#if NVP_SUPPORTS_SHADERC
shaderc_compilation_result_t result = nullptr;
if(definition.filetype == FILETYPE_GLSL)
{
std::lock_guard<std::mutex> guard(s_shadercCompilerMutex);
shaderc_shader_kind shaderkind = (shaderc_shader_kind)m_usedSetupIF->getTypeShadercKind(definition.type);
shaderc_compile_options_t options = (shaderc_compile_options_t)m_usedSetupIF->getShadercCompileOption(s_shadercCompiler);
if(!options)
{
if(m_apiMajor == 1 && m_apiMinor == 0)
{
shaderc_compile_options_set_target_env(m_shadercOptions, shaderc_target_env_vulkan, shaderc_env_version_vulkan_1_0);
}
else if(m_apiMajor == 1 && m_apiMinor == 1)
{
shaderc_compile_options_set_target_env(m_shadercOptions, shaderc_target_env_vulkan, shaderc_env_version_vulkan_1_1);
}
else if(m_apiMajor == 1 && m_apiMinor == 2)
{
shaderc_compile_options_set_target_env(m_shadercOptions, shaderc_target_env_vulkan, shaderc_env_version_vulkan_1_2);
}
else if(m_apiMajor == 1 && m_apiMinor == 3)
{
shaderc_compile_options_set_target_env(m_shadercOptions, shaderc_target_env_vulkan, shaderc_env_version_vulkan_1_3);
}
else
{
LOGE("nvvk::ShaderModuleManager: Unsupported Vulkan version: %i.%i\n", int(m_apiMajor), int(m_apiMinor));
assert(0);
}
shaderc_compile_options_set_optimization_level(m_shadercOptions, m_shadercOptimizationLevel);
// Keep debug info, doesn't cost shader execution perf, only compile-time and memory size.
// Improves usage for debugging tools, not recommended for shipping application,
// but good for developmenent builds.
shaderc_compile_options_set_generate_debug_info(m_shadercOptions);
options = m_shadercOptions;
}
// Tell shaderc to use this class (really our base class, nvh::ShaderFileManager) to load include files.
ShadercIncludeBridge shadercIncludeBridge(this);
shadercIncludeBridge.setAsIncluder(options);
// Note: need filenameFound, not filename, so that relative includes work.
result = shaderc_compile_into_spv(s_shadercCompiler, definition.content.c_str(), definition.content.size(),
shaderkind, definition.filenameFound.c_str(), "main", options);
if(!result)
{
return false;
}
if(shaderc_result_get_compilation_status(result) != shaderc_compilation_status_success)
{
bool failedToOptimize = strstr(shaderc_result_get_error_message(result), "failed to optimize");
int level = failedToOptimize ? LOGLEVEL_WARNING : LOGLEVEL_ERROR;
nvprintfLevel(level, "%s: optimization_level_performance\n", definition.filename.c_str());
nvprintfLevel(level, " %s\n", definition.prepend.c_str());
nvprintfLevel(level, " %s\n", shaderc_result_get_error_message(result));
shaderc_result_release(result);
if(!failedToOptimize || options != m_shadercOptions)
{
return false;
}
// try again without optimization
shaderc_compile_options_set_optimization_level(m_shadercOptions, shaderc_optimization_level_zero);
result = shaderc_compile_into_spv(s_shadercCompiler, definition.content.c_str(), definition.content.size(),
shaderkind, definition.filename.c_str(), "main", options);
}
if(shaderc_result_get_compilation_status(result) != shaderc_compilation_status_success)
{
LOGE("%s: optimization_level_zero\n", definition.filename.c_str());
LOGE(" %s\n", definition.prepend.c_str());
LOGE(" %s\n", shaderc_result_get_error_message(result));
shaderc_result_release(result);
return false;
}
shaderModuleInfo.codeSize = shaderc_result_get_length(result);
shaderModuleInfo.pCode = (const uint32_t*)shaderc_result_get_bytes(result);
}
else
#else
if(definition.filetype == FILETYPE_GLSL)
{
LOGW("No direct GLSL support\n");
return false;
}
else
#endif
{
shaderModuleInfo.codeSize = definition.content.size();
shaderModuleInfo.pCode = (const uint32_t*)definition.content.c_str();
}
vkresult = ::vkCreateShaderModule(m_device, &shaderModuleInfo, nullptr, &module.module);
if(vkresult == VK_SUCCESS && m_keepModuleSPIRV)
{
module.moduleSPIRV = std::string((const char*)shaderModuleInfo.pCode, shaderModuleInfo.codeSize);
}
#if NVP_SUPPORTS_SHADERC
if(result)
{
shaderc_result_release(result);
}
#endif
return vkresult == VK_SUCCESS;
}
}
void ShaderModuleManager::init(VkDevice device, int apiMajor, int apiMinor)
{
assert(!m_device);
m_device = device;
m_apiMajor = apiMajor;
m_apiMinor = apiMinor;
#if NVP_SUPPORTS_SHADERC
// First user initializes compiler.
std::lock_guard<std::mutex> lock(s_shadercCompilerMutex);
s_shadercCompilerUsers++;
if(!s_shadercCompiler)
{
s_shadercCompiler = shaderc_compiler_initialize();
}
m_shadercOptions = shaderc_compile_options_initialize();
#endif
}
void ShaderModuleManager::deinit()
{
if(m_device)
{
#if NVP_SUPPORTS_SHADERC
// Last user de-inits compiler.
std::lock_guard<std::mutex> lock(s_shadercCompilerMutex);
s_shadercCompilerUsers--;
if(s_shadercCompiler && s_shadercCompilerUsers == 0)
{
shaderc_compiler_release(s_shadercCompiler);
s_shadercCompiler = nullptr;
}
if(m_shadercOptions)
{
shaderc_compile_options_release(m_shadercOptions);
}
#endif
}
deleteShaderModules();
m_device = nullptr;
}
ShaderModuleID ShaderModuleManager::createShaderModule(const Definition& definition)
{
ShaderModule module;
module.definition = definition;
setupShaderModule(module);
// find unused
for(size_t i = 0; i < m_shadermodules.size(); i++)
{
if(m_shadermodules[i].definition.type == 0)
{
m_shadermodules[i] = module;
return i;
}
}
m_shadermodules.push_back(module);
return m_shadermodules.size() - 1;
}
ShaderModuleID ShaderModuleManager::createShaderModule(uint32_t type,
std::string const& filename,
std::string const& prepend,
FileType fileType /*= FILETYPE_DEFAULT*/,
std::string const& entryname /*= "main"*/)
{
Definition def;
def.type = type;
def.filename = filename;
def.prepend = prepend;
def.filetype = fileType;
def.entry = entryname;
return createShaderModule(def);
}
bool ShaderModuleManager::areShaderModulesValid()
{
bool valid = true;
for(size_t i = 0; i < m_shadermodules.size(); i++)
{
valid = valid && isValid(i);
}
return valid;
}
void ShaderModuleManager::deleteShaderModules()
{
for(size_t i = 0; i < m_shadermodules.size(); i++)
{
destroyShaderModule((ShaderModuleID)i);
}
m_shadermodules.clear();
}
void ShaderModuleManager::reloadModule(ShaderModuleID idx)
{
if(!isValid(idx))
return;
ShaderModule& module = getShaderModule(idx);
bool old = m_preprocessOnly;
m_preprocessOnly = module.module == PREPROCESS_ONLY_MODULE;
if(module.module && module.module != PREPROCESS_ONLY_MODULE)
{
vkDestroyShaderModule(m_device, module.module, nullptr);
module.module = nullptr;
}
if(module.definition.type != 0)
{
setupShaderModule(module);
}
m_preprocessOnly = old;
}
void ShaderModuleManager::reloadShaderModules()
{
LOGI("Reloading programs...\n");
for(size_t i = 0; i < m_shadermodules.size(); i++)
{
reloadModule((ShaderModuleID)i);
}
LOGI("done\n");
}
bool ShaderModuleManager::isValid(ShaderModuleID idx) const
{
return idx.isValid()
&& ((m_shadermodules[idx].definition.type && m_shadermodules[idx].module != 0)
|| !m_shadermodules[idx].definition.type);
}
VkShaderModule ShaderModuleManager::get(ShaderModuleID idx) const
{
return m_shadermodules[idx].module;
}
ShaderModuleManager::ShaderModule& ShaderModuleManager::getShaderModule(ShaderModuleID idx)
{
return m_shadermodules[idx];
}
const ShaderModuleManager::ShaderModule& ShaderModuleManager::getShaderModule(ShaderModuleID idx) const
{
return m_shadermodules[idx];
}
void ShaderModuleManager::destroyShaderModule(ShaderModuleID idx)
{
if(!isValid(idx))
return;
ShaderModule& module = getShaderModule(idx);
if(module.module && module.module != PREPROCESS_ONLY_MODULE)
{
vkDestroyShaderModule(m_device, module.module, nullptr);
module.module = 0;
}
module.definition = Definition();
}
const char* ShaderModuleManager::getCode(ShaderModuleID idx, size_t* len) const
{
return m_shadermodules[idx].definition.content.c_str();
}
const size_t ShaderModuleManager::getCodeLen(ShaderModuleID idx) const
{
return m_shadermodules[idx].definition.content.size();
}
bool ShaderModuleManager::dumpSPIRV(ShaderModuleID idx, const char* filename) const
{
if(m_shadermodules[idx].moduleSPIRV.empty())
return false;
FILE* f = fopen(filename, "wb");
if(f)
{
fwrite(m_shadermodules[idx].moduleSPIRV.data(), m_shadermodules[idx].moduleSPIRV.size(), 1, f);
fclose(f);
return true;
}
return false;
}
bool ShaderModuleManager::getSPIRV(ShaderModuleID idx, size_t* pLen, const uint32_t** pCode) const
{
if(m_shadermodules[idx].moduleSPIRV.empty())
return false;
*pLen = m_shadermodules[idx].moduleSPIRV.size();
*pCode = reinterpret_cast<const uint32_t*>(m_shadermodules[idx].moduleSPIRV.data());
return true;
}
} // namespace nvvk

View file

@ -0,0 +1,243 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef NV_SHADERMODULEMANAGER_INCLUDED
#define NV_SHADERMODULEMANAGER_INCLUDED
#include <mutex>
#include <stdio.h>
#include <string>
#include <vector>
#include <vulkan/vulkan_core.h>
#if NVP_SUPPORTS_SHADERC
#define NV_EXTENSIONS
#include <shaderc/shaderc.h>
#undef NV_EXTENSIONS
#endif
#include <nvh/shaderfilemanager.hpp>
namespace nvvk {
//////////////////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::ShaderModuleManager
The nvvk::ShaderModuleManager manages VkShaderModules stored in files (SPIR-V or GLSL)
Using ShaderFileManager it will find the files and resolve #include for GLSL.
You must add include directories to the base-class for this.
It also comes with some convenience functions to reload shaders etc.
That is why we pass out the ShaderModuleID rather than a VkShaderModule directly.
To change the compilation behavior manipulate the public member variables
prior createShaderModule.
m_filetype is crucial for this. You can pass raw spir-v files or GLSL.
If GLSL is used, shaderc must be used as well (which must be added via
_add_package_ShaderC() in CMake of the project)
Example:
```cpp
ShaderModuleManager mgr(myDevice);
// derived from ShaderFileManager
mgr.addDirectory("spv/");
// all shaders get this injected after #version statement
mgr.m_prepend = "#define USE_NOISE 1\n";
vid = mgr.createShaderModule(VK_SHADER_STAGE_VERTEX_BIT, "object.vert.glsl");
fid = mgr.createShaderModule(VK_SHADER_STAGE_FRAGMENT_BIT, "object.frag.glsl");
// ... later use module
info.module = mgr.get(vid);
```
@DOC_END */
class ShaderModuleID
{
public:
size_t m_value;
ShaderModuleID()
: m_value(size_t(~0))
{
}
ShaderModuleID(size_t b)
: m_value(b)
{
}
ShaderModuleID& operator=(size_t b)
{
m_value = b;
return *this;
}
bool isValid() const { return m_value != size_t(~0); }
operator bool() const { return isValid(); }
operator size_t() const { return m_value; }
friend bool operator==(const ShaderModuleID& lhs, const ShaderModuleID& rhs) { return rhs.m_value == lhs.m_value; }
};
class ShaderModuleManager : public nvh::ShaderFileManager
{
public:
struct ShaderModule
{
ShaderModule()
: module(0)
{
}
VkShaderModule module;
std::string moduleSPIRV;
Definition definition;
};
void init(VkDevice device, int apiMajor = 1, int apiMinor = 1);
// also calls deleteShaderModules
void deinit();
ShaderModuleID createShaderModule(uint32_t type,
std::string const& filename,
std::string const& prepend = "",
FileType fileType = FILETYPE_DEFAULT,
std::string const& entryname = "main");
void destroyShaderModule(ShaderModuleID idx);
void reloadModule(ShaderModuleID idx);
void reloadShaderModules();
void deleteShaderModules();
bool areShaderModulesValid();
#if NVP_SUPPORTS_SHADERC
void setOptimizationLevel(shaderc_optimization_level level) { m_shadercOptimizationLevel = level; }
#endif
bool isValid(ShaderModuleID idx) const;
VkShaderModule get(ShaderModuleID idx) const;
ShaderModule& getShaderModule(ShaderModuleID idx);
const ShaderModule& getShaderModule(ShaderModuleID idx) const;
const char* getCode(ShaderModuleID idx, size_t* len = NULL) const;
const size_t getCodeLen(ShaderModuleID idx) const;
bool dumpSPIRV(ShaderModuleID idx, const char* filename) const;
bool getSPIRV(ShaderModuleID idx, size_t* pLen, const uint32_t** pCode) const;
// state will affect the next created shader module
// also keep m_filetype in mind!
bool m_preprocessOnly = false;
bool m_keepModuleSPIRV = false;
//////////////////////////////////////////////////////////////////////////
//
// for internal development, useful when we have new shader types that
// are not covered by public VulkanSDK
struct SetupInterface
{
// This class is to aid using a shaderc library version that is not
// provided by the Vulkan SDK, but custom. Therefore it allows custom settings etc.
// Useful for driver development of new shader stages, otherwise can be pretty much ignored.
virtual std::string getTypeDefine(uint32_t type) const = 0;
virtual uint32_t getTypeShadercKind(uint32_t type) const = 0;
virtual void* getShadercCompileOption(void* shadercCompiler) { return nullptr; }
};
void setSetupIF(SetupInterface* setupIF);
ShaderModuleManager(ShaderModuleManager const&) = delete;
ShaderModuleManager& operator=(ShaderModuleManager const&) = delete;
// Constructors reference-count the shared shaderc compiler, and
// disable ShaderFileManager's homemade #include mechanism iff we're
// using shaderc.
#if NVP_SUPPORTS_SHADERC
static constexpr bool s_handleIncludePasting = false;
#else
static constexpr bool s_handleIncludePasting = true;
#endif
ShaderModuleManager(VkDevice device = nullptr)
: ShaderFileManager(s_handleIncludePasting)
{
m_usedSetupIF = &m_defaultSetupIF;
m_supportsExtendedInclude = true;
if(device)
init(device);
}
~ShaderModuleManager() { deinit(); }
// Shaderc has its own interface for handling include files that I
// have to subclass; this needs access to protected
// ShaderFileManager functions.
friend class ShadercIncludeBridge;
private:
ShaderModuleID createShaderModule(const Definition& def);
bool setupShaderModule(ShaderModule& prog);
struct DefaultInterface : public SetupInterface
{
std::string getTypeDefine(uint32_t type) const override;
uint32_t getTypeShadercKind(uint32_t type) const override;
};
static const VkShaderModule PREPROCESS_ONLY_MODULE;
VkDevice m_device = nullptr;
DefaultInterface m_defaultSetupIF;
SetupInterface* m_usedSetupIF = nullptr;
int m_apiMajor = 1;
int m_apiMinor = 1;
#if NVP_SUPPORTS_SHADERC
static uint32_t s_shadercCompilerUsers;
static shaderc_compiler_t s_shadercCompiler; // Lock mutex below while using.
static std::mutex s_shadercCompilerMutex;
shaderc_compile_options_t m_shadercOptions = nullptr;
shaderc_optimization_level m_shadercOptimizationLevel = shaderc_optimization_level_performance;
#endif
std::vector<ShaderModule> m_shadermodules;
};
} // namespace nvvk
#endif //NV_PROGRAM_INCLUDED

View file

@ -0,0 +1,101 @@
/*
* Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <assert.h>
#include <string>
#include <vector>
#include <vulkan/vulkan_core.h>
namespace nvvk {
/** @DOC_START
# functions in nvvk
- createShaderModule : create the shader module from various binary code inputs
- createShaderStageInfo: create the shader module and setup the stage from the incoming binary code
@DOC_END */
inline VkShaderModule createShaderModule(VkDevice device, const uint32_t* binarycode, size_t sizeInBytes)
{
VkShaderModuleCreateInfo createInfo = {};
createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
createInfo.codeSize = sizeInBytes;
createInfo.pCode = binarycode;
VkShaderModule shaderModule = VK_NULL_HANDLE;
if(vkCreateShaderModule(device, &createInfo, nullptr, &shaderModule) != VK_SUCCESS)
{
assert(0 && "failed to create shader module!");
}
return shaderModule;
}
inline VkShaderModule createShaderModule(VkDevice device, const char* binarycode, size_t numInt32)
{
return createShaderModule(device, (const uint32_t*)binarycode, numInt32 * 4);
}
inline VkShaderModule createShaderModule(VkDevice device, const std::vector<char>& code)
{
return createShaderModule(device, (const uint32_t*)code.data(), code.size());
}
inline VkShaderModule createShaderModule(VkDevice device, const std::vector<uint8_t>& code)
{
return createShaderModule(device, (const uint32_t*)code.data(), code.size());
}
inline VkShaderModule createShaderModule(VkDevice device, const std::vector<uint32_t>& code)
{
return createShaderModule(device, code.data(), 4 * code.size());
}
inline VkShaderModule createShaderModule(VkDevice device, const std::string& code)
{
return createShaderModule(device, (const uint32_t*)code.data(), code.size());
}
template <typename T>
inline VkPipelineShaderStageCreateInfo createShaderStageInfo(VkDevice device,
const std::vector<T>& code,
VkShaderStageFlagBits stage,
const char* entryPoint = "main")
{
VkPipelineShaderStageCreateInfo shaderStage{VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO};
shaderStage.stage = stage;
shaderStage.module = createShaderModule(device, code);
shaderStage.pName = entryPoint;
return shaderStage;
}
inline VkPipelineShaderStageCreateInfo createShaderStageInfo(VkDevice device,
const std::string& code,
VkShaderStageFlagBits stage,
const char* entryPoint = "main")
{
VkPipelineShaderStageCreateInfo shaderStage{VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO};
shaderStage.stage = stage;
shaderStage.module = createShaderModule(device, code);
shaderStage.pName = entryPoint;
return shaderStage;
}
} // namespace nvvk

View file

@ -0,0 +1,455 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "sparse_image_vk.hpp"
// Compute the number of pages of size `granularity` would be required to represent a texture of size `extent`
static inline glm::uvec3 alignedDivision(const VkExtent3D& extent, const VkExtent3D& granularity)
{
glm::uvec3 res;
if(granularity.width == 0 || granularity.height == 0 || granularity.depth == 0)
{
LOGE("alignedDivision: invalid granularity\n");
assert(false);
return glm::uvec3(0u);
}
res.x = (extent.width + granularity.width - 1) / granularity.width;
res.y = (extent.height + granularity.height - 1) / granularity.height;
res.z = (extent.depth + granularity.depth - 1) / granularity.depth;
return res;
}
// Set the pointers for the VkBindSparseInfo stored in `image` prior to
// calling vkQueueBindSparse
void applySparseMemoryBinds(nvvk::SparseImage& image)
{
image.bindSparseInfo = VkBindSparseInfo{VK_STRUCTURE_TYPE_BIND_SPARSE_INFO};
// Sparse Image memory binds
image.imageMemoryBindInfo.image = image.getWorkImage();
image.imageMemoryBindInfo.bindCount = static_cast<uint32_t>(image.sparseImageMemoryBinds.size());
image.imageMemoryBindInfo.pBinds = image.sparseImageMemoryBinds.data();
image.bindSparseInfo.imageBindCount = ((image.imageMemoryBindInfo.bindCount > 0) ? 1 : 0);
image.bindSparseInfo.pImageBinds = &image.imageMemoryBindInfo;
// Opaque image memory binds (mip tail)
image.opaqueMemoryBindInfo.image = image.getWorkImage();
image.opaqueMemoryBindInfo.bindCount = static_cast<uint32_t>(image.opaqueMemoryBinds.size());
image.opaqueMemoryBindInfo.pBinds = image.opaqueMemoryBinds.data();
image.bindSparseInfo.imageOpaqueBindCount = ((image.opaqueMemoryBindInfo.bindCount > 0) ? 1 : 0);
image.bindSparseInfo.pImageOpaqueBinds = &image.opaqueMemoryBindInfo;
}
// Add mip tail information to the image, return the requested memory requirements for the mip tail
VkMemoryRequirements nvvk::SparseImage::addMipTail(VkMemoryRequirements generalMemoryReqs,
VkSparseImageMemoryRequirements& sparseMemoryReq,
uint32_t layer /*= 0*/)
{
// Compute the size of the required mip tail allocation
VkMemoryRequirements memReqs = generalMemoryReqs;
memReqs.size = sparseMemoryReq.imageMipTailSize;
// Add an `opaque` memory bind representing the mip tail
VkSparseMemoryBind sparseMemoryBind{sparseMemoryReq.imageMipTailOffset + layer * sparseMemoryReq.imageMipTailStride,
sparseMemoryReq.imageMipTailSize, VK_NULL_HANDLE};
opaqueMemoryBinds.push_back(sparseMemoryBind);
// Return the memory requirements for that mip tail
return memReqs;
}
// Compute and store the number of pages contained in each sparse mip level
void nvvk::SparseImage::computeMipPageCounts()
{
uint32_t dimensionCount = 1;
if(size.height > 1)
{
dimensionCount++;
}
if(size.depth > 1)
{
dimensionCount++;
}
// Since the finest mip level has index 0, the number
// of sparse levels is equal to the index of the beginning of the
// mip tail
uint32_t sparseMipLevels = mipTailStart;
sparseMipStartIndices.resize(sparseMipLevels);
sparseMipPageCounts.resize(sparseMipLevels);
// Compute the page count at the coarsest sparse level (just above the mip tail)
// For each dimension we compare the resolution of the mip level with the page granularity and
// keep the highest ration. This is particularly necessary for 3D textures, where the depth
// granularity is typically lower than the width and height granularities
uint32_t pageCountAtCoarsestLevel = (size.width >> (sparseMipLevels - 1)) / imageGranularity.width;
pageCountAtCoarsestLevel =
std::max(pageCountAtCoarsestLevel,
pageCountAtCoarsestLevel * ((size.height >> (sparseMipLevels - 1)) / imageGranularity.height));
pageCountAtCoarsestLevel =
std::max(pageCountAtCoarsestLevel,
pageCountAtCoarsestLevel * (size.depth >> (sparseMipLevels - 1)) / imageGranularity.depth);
// When going from level n+1 to level n each dimension will
// be divided by 2, hence each page at level n+1 will be represented
// by 2^dimensionCount children at level n
uint32_t childCount = 1 << dimensionCount;
// The indices of the pages start from the coarsest level, so the
// first page of that level will have index 0, and the pages of the
// finest level will have the highest indices
uint32_t finalIndex = 0;
uint32_t currentPagesInLevel = pageCountAtCoarsestLevel;
uint32_t currentMipLevel = sparseMipLevels - 1;
sparseMipTotalPageCount = 0u;
// Iterate from coarsest to finest level, accumulating the
// page counts for each level
for(uint32_t i = 0; i < sparseMipLevels; i++, currentMipLevel--)
{
sparseMipStartIndices[currentMipLevel] = finalIndex;
sparseMipPageCounts[currentMipLevel] = currentPagesInLevel;
finalIndex += currentPagesInLevel;
currentPagesInLevel *= childCount;
}
sparseMipTotalPageCount = finalIndex;
}
// Create the sparse image, return the memory requirements for the mip tail(s)
std::vector<VkMemoryRequirements> nvvk::SparseImage::create(VkDevice device,
const std::array<VkImage, s_sparseImageCount> imageDesc,
uint32_t mipLevels,
uint32_t arrayLayers,
const VkExtent3D& extent)
{
if(mipLevels > NVVK_SPARSE_IMAGE_MAX_MIP_LEVELS)
{
LOGE("SparseImage::create: invalid mip level count\n");
assert(false);
return {};
}
std::vector<VkMemoryRequirements> mipTailRequirements;
// Create the image descriptor
size.width = extent.width;
size.height = extent.height;
size.depth = extent.depth;
images = imageDesc;
mipLevelCount = mipLevels;
layerCount = arrayLayers;
// Get memory requirements for later allocations
vkGetImageMemoryRequirements(device, images[0], &memoryReqs);
// Get sparse memory requirements
std::vector<VkSparseImageMemoryRequirements> sparseMemoryReqs;
uint32_t reqCount = 0u;
vkGetImageSparseMemoryRequirements(device, images[0], &reqCount, nullptr);
if(reqCount == 0u)
{
LOGE("No sparse image memory requirements available\n");
return {};
}
sparseMemoryReqs.resize(reqCount);
vkGetImageSparseMemoryRequirements(device, images[0], &reqCount, sparseMemoryReqs.data());
// Select the memory requirements with the smallest granularity to avoid wasting memory
uint32_t minGranularity = NVVK_SPARSE_IMAGE_INVALID_INDEX;
VkSparseImageMemoryRequirements sparseReqs = {};
for(const auto& reqs : sparseMemoryReqs)
{
uint32_t granularity = reqs.formatProperties.imageGranularity.width * reqs.formatProperties.imageGranularity.height
* reqs.formatProperties.imageGranularity.depth;
if(granularity < minGranularity)
{
minGranularity = granularity;
sparseReqs = reqs;
}
}
// sparseMemoryReq.imageMipTailFirstLod is the first mip level stored inside the mip tail
mipTailStart = sparseReqs.imageMipTailFirstLod;
// Get sparse image memory requirements for the color aspect
VkSparseImageMemoryRequirements sparseMemoryReq;
bool colorAspectFound = false;
for(const auto& reqs : sparseMemoryReqs)
{
if((reqs.formatProperties.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0)
{
sparseMemoryReq = reqs;
colorAspectFound = true;
break;
}
}
if(!colorAspectFound)
{
LOGE("Could not find sparse image memory requirements with color aspect bit");
return {};
}
// Check whether a mip tail is necessary
bool hasMipTail = (sparseMemoryReq.imageMipTailFirstLod < mipLevels);
// Check if the format has a single mip tail for all layers or one mip tail for each layer
// The mip tail contains all mip levels >= sparseMemoryReq.imageMipTailFirstLod
bool singleMipTail = ((sparseMemoryReq.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) != 0);
imageGranularity = sparseMemoryReq.formatProperties.imageGranularity;
// Prepare the data structure holding all the virtual pages for the sparse texture
// Sparse bindings for each mip level of each layer, excepting the mip levels of the mip tail
for(uint32_t layer = 0; layer < arrayLayers; layer++)
{
// If the format has one mip tail per layer, allocate each of them on the device
if((!singleMipTail) && hasMipTail)
{
mipTailRequirements.push_back(addMipTail(memoryReqs, sparseMemoryReq, layer));
}
}
// If the format has a single mip tail for all layers, allocate it on the device
if(singleMipTail && hasMipTail)
{
mipTailRequirements.push_back(addMipTail(memoryReqs, sparseMemoryReq));
}
// Compute the page indices for each mip level
computeMipPageCounts();
return mipTailRequirements;
}
// Bind device memory to the mip tail(s)
void nvvk::SparseImage::bindMipTailMemory(std::vector<std::pair<VkDeviceMemory, VkDeviceSize>> mipTailMemory)
{
if(mipTailMemory.size() != opaqueMemoryBinds.size())
{
LOGE("Mip tail allocations count must match the number of mip tails in the sparse texture\n");
return;
}
for(size_t i = 0; i < mipTailMemory.size(); i++)
{
opaqueMemoryBinds[i].memory = mipTailMemory[i].first;
opaqueMemoryBinds[i].memoryOffset = mipTailMemory[i].second;
}
}
// Unbind device memory from the mip tail(s)
void nvvk::SparseImage::unbindMipTailMemory()
{
for(size_t i = 0; i < opaqueMemoryBinds.size(); i++)
{
opaqueMemoryBinds[i].resourceOffset = {};
opaqueMemoryBinds[i].memory = {};
opaqueMemoryBinds[i].memoryOffset = {};
}
}
// Update the contents of sparseImageMemoryBinds based on the vector of updated page indices and
// set the pointers in the VkBindSparseInfo
// Call before sparse binding to update memory bind list etc.
// No synchronization is added to the VkBindSparseInfo object, the application
// is responsible for adding the proper semaphore before calling vkQueueBindSparse
void nvvk::SparseImage::updateSparseBindInfo(const std::vector<uint32_t>& updatedPageIndices, uint32_t layer /*= 0*/)
{
// Build the list of added/removed sparse image memory binds
sparseImageMemoryBinds.resize(updatedPageIndices.size());
uint32_t index = 0;
for(auto pageIndex : updatedPageIndices)
{
PageId id = {layer, pageIndex};
auto it = allocatedPages.find(id);
// If the page actually exists in the image and is not flagged for deletion,
// add it to the bindings
if(it != allocatedPages.end() && (it->second.allocationFlags & SparseImagePage::eMarkedForDeletion) == 0)
{
const auto& page = it->second;
sparseImageMemoryBinds[index] = page.imageMemoryBind;
index++;
}
else
{
// Otherwise the page has been deleted, and the sparse texture bindings
// are updated by binding VK_NULL_HANDLE memory to the page
SparseImagePage page = createPageInfo(pageIndex, layer);
sparseImageMemoryBinds[index] = page.imageMemoryBind;
index++;
}
}
// Set the pointers before calling vkQueueBindSparse
applySparseMemoryBinds(*this);
}
// Set the pointers in the VkBindSparseInfo using the contents of sparseImageMemoryBinds
// No synchronization is added to the VkBindSparseInfo object, the application
// is responsible for adding the proper semaphore before calling vkQueueBindSparse
void nvvk::SparseImage::updateSparseBindInfo()
{
// Set the pointers before calling vkQueueBindSparse
applySparseMemoryBinds(*this);
}
// Get the index of the beginning of a mip level in the page list
uint32_t nvvk::SparseImage::mipStartIndex(uint32_t mipLevel)
{
return sparseMipStartIndices[mipLevel];
}
// Compute the indices of the children of a page, representing the same area of the image at a finer mip level
std::vector<uint32_t> nvvk::SparseImage::pageChildIndices(const SparseImagePage& p)
{
std::vector<uint32_t> res(p.extent.depth <= 1 ? 4 : 8, NVVK_SPARSE_IMAGE_INVALID_INDEX);
if(p.mipLevel == 0)
{
return res;
}
if(p.extent.width == 0u || p.extent.height == 0u || p.extent.depth == 0u)
{
LOGE("pageChildIndices: Invalid page extent");
assert(false);
return res;
}
// Get the index from which the pages of the next mip level
// are defined, and sanity check the result
uint32_t mipStart = mipStartIndex(p.mipLevel - 1);
if(mipStart == NVVK_SPARSE_IMAGE_INVALID_INDEX)
{
LOGE("pageChildIndices: Invalid mip start index");
assert(false);
return res;
}
// Compute the size of the child mip level in texels, defined by originalSize/(2^level)
glm::uvec3 mipSize(std::max(size.width >> (p.mipLevel - 1), 1u), std::max(size.height >> (p.mipLevel - 1), 1u),
std::max(size.depth >> (p.mipLevel - 1), 1u));
// Compute the location of the beginning of the child list in the next mip level, where each dimension contains
// twice as many pages as the parent level
glm::uvec3 location(2 * p.offset.x / p.extent.width, 2 * p.offset.y / p.extent.height, 2 * p.offset.z / p.extent.depth);
uint32_t pageWidth = p.extent.width;
uint32_t pageHeight = p.extent.height;
uint32_t pageDepth = std::max(1u, p.extent.depth);
// Number of pages along one row (X) of the texture, and within one slice (X*Y) of the texture
uint32_t pagesPerRow = (mipSize.x / pageWidth);
uint32_t pagesPerSlice = (mipSize.x * mipSize.y) / (pageWidth * pageHeight);
// Build and return the child list
for(uint32_t z = 0; z < (pageDepth > 1 ? 2u : 1u); z++)
{
for(uint32_t y = 0; y < (pageHeight > 1 ? 2u : 1u); y++)
{
for(uint32_t x = 0; x < 2; x++)
{
res[x + 2 * (y + 2 * z)] = (location.z + z) * pagesPerSlice + mipStart + location.x + x + (location.y + y) * pagesPerRow;
}
}
}
return res;
}
// Create the page information from its page index and layer
nvvk::SparseImagePage nvvk::SparseImage::createPageInfo(uint32_t pageIndex, uint32_t layer)
{
uint32_t dimensionCount = 1;
if(size.height != 0)
dimensionCount++;
if(size.depth != 0)
dimensionCount++;
std::vector<uint32_t>& startIndices = sparseMipStartIndices;
// Find the mip level from the global page index by comparing the
// start indices of the mip levels with the page index
// There are at most 32 levels (including the mip tail)
// so a linear search is fast enough
uint32_t mipLevel = 0;
for(size_t i = 0; i < startIndices.size(); i++)
{
size_t currentMipLevel = startIndices.size() - i - 1;
if(pageIndex >= startIndices[currentMipLevel] && (currentMipLevel == 0 || pageIndex < startIndices[currentMipLevel - 1]))
{
mipLevel = uint32_t(currentMipLevel);
break;
}
}
// Get the local index of the page within its mip level
uint32_t indexInMip = pageIndex - startIndices[mipLevel];
// Resolution of the mip level, defined by the sparse image total size / 2^level
VkExtent3D mipResolution{std::max(size.width >> mipLevel, 1u), std::max(size.height >> mipLevel, 1u),
std::max(size.depth >> mipLevel, 1u)};
// Compute the number of pages required in each dimension for the mip level
glm::uvec3 sparseBindCounts = alignedDivision(mipResolution, imageGranularity);
// Compute the page index in each dimension and deduce the offset of the page
// in texels based on the page granularity
uint32_t x = indexInMip % sparseBindCounts.x;
uint32_t y = (indexInMip / sparseBindCounts.x) % sparseBindCounts.y;
uint32_t z = indexInMip / (sparseBindCounts.x * sparseBindCounts.y);
VkOffset3D offset{int32_t(x * imageGranularity.width), int32_t(y * imageGranularity.height),
int32_t(z * imageGranularity.depth)};
// Compute the size of the last page on each dimension in the case the image has non-power-of-two dimension
glm::uvec3 lastBlockExtent;
lastBlockExtent.x = (mipResolution.width % imageGranularity.width) ? mipResolution.width % imageGranularity.width :
imageGranularity.width;
lastBlockExtent.y = (mipResolution.height % imageGranularity.height) ? mipResolution.height % imageGranularity.height :
imageGranularity.height;
lastBlockExtent.z = (mipResolution.depth % imageGranularity.depth) ? mipResolution.depth % imageGranularity.depth :
imageGranularity.depth;
// Size of the page, including the nonuniform size on the edges of the image
VkExtent3D pageSize{(x == sparseBindCounts.x - 1) ? lastBlockExtent.x : imageGranularity.width,
(y == sparseBindCounts.y - 1) ? lastBlockExtent.y : imageGranularity.height,
(z == sparseBindCounts.z - 1) ? lastBlockExtent.z : imageGranularity.depth};
// Set and return the page information, with empty memory allocation
VkImageSubresource subresource{VK_IMAGE_ASPECT_COLOR_BIT, mipLevel, layer};
SparseImagePage newPage{};
newPage.offset = offset;
newPage.extent = pageSize;
newPage.size = memoryReqs.alignment;
newPage.mipLevel = mipLevel;
newPage.layer = layer;
newPage.imageMemoryBind.offset = offset;
newPage.imageMemoryBind.extent = pageSize;
newPage.imageMemoryBind.subresource = subresource;
newPage.index = pageIndex;
newPage.allocationFlags = SparseImagePage::eNone;
newPage.timeStamp = ~0u;
return newPage;
}

View file

@ -0,0 +1,287 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <array>
#include <algorithm>
#include <vector>
#include <unordered_map>
#include <vulkan/vulkan_core.h>
#include <glm/glm.hpp>
#include "nvvk/memorymanagement_vk.hpp"
#include "nvh/nvprint.hpp"
#include "nvh/container_utils.hpp"
// Mip level indexing relies on 32-bit unsigned integers
#define NVVK_SPARSE_IMAGE_MAX_MIP_LEVELS 32u
// Special error value used to catch indexing issues
#define NVVK_SPARSE_IMAGE_INVALID_INDEX (~0u)
namespace nvvk {
/** @DOC_START
# struct nvvk::SparseImagePage
> Virtual texture page as a part of the partially resident texture. Contains memory bindings, offsets and status information.
The virtual texture page is a part of the sparse texture, and contains information about its memory binding, location in the texture, and status information.
@DOC_END */
struct SparseImagePage
{
// Allocation flags to keep track of the next action
// to take on the page memory
enum AllocationFlagBits
{
// No action, keep the page in memory
eNone = 0,
// The page will have to be discarded once
// no image references it
eMarkedForDeletion = 1
};
typedef uint32_t AllocationFlags;
// Offset in the mip level of the sparse texture, in texels
VkOffset3D offset{};
// Page extent, in texels
VkExtent3D extent{};
// Sparse image memory bind for this page
VkSparseImageMemoryBind imageMemoryBind{};
// Size in bytes of the page
VkDeviceSize size{};
// Mip level of the page
uint32_t mipLevel{NVVK_SPARSE_IMAGE_INVALID_INDEX};
// Layer the page belongs to
uint32_t layer{NVVK_SPARSE_IMAGE_INVALID_INDEX};
nvvk::MemHandle allocation;
// Index of the page based on its location in the sparse texture
// index = mipStartIndex + location.x + pageCount.x*(location.y + pageCount.z*location.z)
// where mipStartIndex is the index of the first page of the mip level,
// location is the 3D index of the page in the mip, and pageCount is the number of pages
// of the mip in each dimension
uint32_t index{0};
// Application-managed timestamp, typically used for cache management
uint32_t timeStamp{~0u};
// Allocation flags for the page, either eNone for a page that is currently in use,
// or eMarkedForDeletion, for pages that will be destroyed as soon as the sparse image
// binding stops referencing them
AllocationFlags allocationFlags{eNone};
// Create the host-side data for the virtual page
inline void bindDeviceMemory(VkDeviceMemory mem, VkDeviceSize memOffset)
{
imageMemoryBind.memoryOffset = memOffset;
imageMemoryBind.memory = mem;
}
inline bool hasBoundMemory() const { return imageMemoryBind.memory != VkDeviceMemory(); }
};
/** @DOC_START
# struct nvvk::SparseImage
> Sparse image object, containing the virtual texture pages and memory bindings.
The sparse image object contains the virtual texture pages and memory bindings, as well as the memory requirements for the mip tail and the image granularity.
@DOC_END */
struct SparseImage
{
// Number of VkImages referencing the sparse memory bindings
// This allows updating the bindings of one image while
// rendering with the other in another thread
static const size_t s_sparseImageCount{2};
// Texture image handles (see above)
std::array<VkImage, s_sparseImageCount> images;
// Index of the image that can be used for rendering
uint32_t currentImage{0u};
// Opaque memory bindings for the mip tail
std::vector<VkSparseMemoryBind> opaqueMemoryBinds;
// Memory allocation for the mip tail. This memory is allocated
// upon creating the sparse image, and will remain allocated
// even after a flush call
std::vector<nvvk::MemHandle> mipTailAllocations;
// Memory properties for the sparse texture allocations
VkMemoryPropertyFlags memoryProperties{};
// Sparse queue binding information
VkBindSparseInfo bindSparseInfo{};
// Memory bindings for virtual addressing
std::vector<VkSparseImageMemoryBind> sparseImageMemoryBinds;
// Page identifier, defined by its layer and its page index, which
// is defined as mipStartIndex + location.x + pageCount.x*(location.y + pageCount.z*location.z)
// where mipStartIndex is the index of the first page of the mip level,
// location is the 3D index of the page in the mip, and pageCount is the number of pages
// of the mip in each dimension
struct PageId
{
uint32_t layer{};
uint32_t page{};
};
// Storage for the currently allocated pages
std::unordered_map<PageId, SparseImagePage, nvh::HashAligned32<PageId>, nvh::EqualMem<PageId>> allocatedPages;
// Binding information for sparse texture pages
VkSparseImageMemoryBindInfo imageMemoryBindInfo{};
// Binding information for the mip tail
VkSparseImageOpaqueMemoryBindInfo opaqueMemoryBindInfo{};
// First mip level in mip tail
uint32_t mipTailStart{NVVK_SPARSE_IMAGE_INVALID_INDEX};
// Total sparse texture resolution
VkExtent3D size{};
// Number of possible mip levels of the image
uint32_t mipLevelCount{NVVK_SPARSE_IMAGE_INVALID_INDEX};
// Number of layers
uint32_t layerCount{NVVK_SPARSE_IMAGE_INVALID_INDEX};
// Memory requirements for page and mip tail allocations
VkMemoryRequirements memoryReqs{};
// Granularity of the image, representing the extent of the pages
VkExtent3D imageGranularity{0u, 0u, 0u};
// Get the number of pages currently allocated on the device
size_t getAllocatedPageCount() const { return allocatedPages.size(); }
// Get the image handle for rendering
VkImage getCurrentImage() { return images[currentImage]; }
// Get the image handle for update work
VkImage getWorkImage() { return images[(currentImage + 1) % s_sparseImageCount]; }
// Swap the current and work images
void nextImage()
{
currentImage = (currentImage + 1) % s_sparseImageCount;
imageMemoryBindInfo.image = getWorkImage();
}
// Add mip tail information to the image, return the requested memory requirements for the mip tail
VkMemoryRequirements addMipTail(VkMemoryRequirements generalMemoryReqs,
VkSparseImageMemoryRequirements& sparseMemoryReq,
uint32_t layer = 0u);
// Compute and store the number of pages contained in each mip level
void computeMipPageCounts();
// Create the sparse image, return the memory requirements for the mip tail(s)
std::vector<VkMemoryRequirements> create(VkDevice device,
const std::array<VkImage, s_sparseImageCount> imageDesc,
uint32_t mipLevels,
uint32_t arrayLayers,
const VkExtent3D& extent);
// Bind device memory to the mip tail(s)
void bindMipTailMemory(std::vector<std::pair<VkDeviceMemory, VkDeviceSize>> mipTailMemory);
// Unbind device memory from the mip tail(s)
void unbindMipTailMemory();
// Add a page to the sparse image
void addPage(VkImageSubresource subresource, VkOffset3D offset, VkExtent3D extent, const VkDeviceSize size, const uint32_t mipLevel, uint32_t layer);
// Update the contents of sparseImageMemoryBinds based on the vector of updated page indices and
// set the pointers in the VkBindSparseInfo
// Call before sparse binding to update memory bind list etc.
// No synchronization is added to the VkBindSparseInfo object, the application
// is responsible for adding the proper semaphore before calling vkQueueBindSparse
void updateSparseBindInfo(const std::vector<uint32_t>& updatedPageIndices, uint32_t layer = 0);
// Set the pointers in the VkBindSparseInfo using the contents of sparseImageMemoryBinds
// No synchronization is added to the VkBindSparseInfo object, the application
// is responsible for adding the proper semaphore before calling vkQueueBindSparse
void updateSparseBindInfo();
// Get the index of the beginning of a mip level in the page list
uint32_t mipStartIndex(uint32_t mipLevel);
// Compute the index of a page within a mip level in the page list
inline uint32_t indexInMip(const SparseImagePage& p)
{
glm::uvec3 mipSize(std::max(size.width >> p.mipLevel, 1u), std::max(size.height >> p.mipLevel, 1u),
std::max(size.depth >> p.mipLevel, 1u));
glm::uvec3 location(p.offset.x / mipSize.x, p.offset.y / mipSize.y, p.offset.z / mipSize.z);
uint32_t pageWidth = p.extent.width;
uint32_t pageHeight = std::max(1u, p.extent.height);
if(pageWidth == 0 || pageHeight == 0)
{
LOGE("indexInMip: Invalid page dimensions");
assert(false);
return NVVK_SPARSE_IMAGE_INVALID_INDEX;
}
uint32_t index = location.x + (mipSize.x / pageWidth) * (location.y + location.z * (mipSize.y / pageHeight));
return index;
}
// Compute the index of a page in the page list
inline uint32_t pageIndex(const SparseImagePage& p)
{
uint32_t index = indexInMip(p);
return pageIndex(p.mipLevel, index);
}
// Compute the index of a page in the page list based on its mip level and index within
// that mip level
inline uint32_t pageIndex(uint32_t mipLevel, uint32_t indexInMip)
{
uint32_t mipStart = mipStartIndex(mipLevel);
if(mipStart == NVVK_SPARSE_IMAGE_INVALID_INDEX)
{
LOGE("pageIndex: invalid mip level");
assert(false);
return NVVK_SPARSE_IMAGE_INVALID_INDEX;
}
if(indexInMip == NVVK_SPARSE_IMAGE_INVALID_INDEX)
{
LOGE("pageIndex: cannot find page index in mip level");
assert(false);
return NVVK_SPARSE_IMAGE_INVALID_INDEX;
}
return mipStart + indexInMip;
}
// Compute the indices of the children of a page, representing the same area of the image at a finer mip level
std::vector<uint32_t> pageChildIndices(const SparseImagePage& p);
// Create the page information from its page index and layer
SparseImagePage createPageInfo(uint32_t pageIndex, uint32_t layer);
private:
// Start index of each mip level
std::vector<uint32_t> sparseMipStartIndices;
// Number of pages in each mip level
std::vector<uint32_t> sparseMipPageCounts;
// Total page count for the sparse image
uint32_t sparseMipTotalPageCount{};
};
} // namespace nvvk

View file

@ -0,0 +1,87 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <vector>
#include "vulkan/vulkan_core.h"
namespace nvvk {
/** @DOC_START
# class nvvk::Specialization
> Helper to generate specialization info
Examples:
```cpp
nvvk::Specialization specialization;
specialization.add(0, 5); // Adding value 5 to constant_id=0
VkPipelineShaderStageCreateInfo info;
...
info.pSpecializationInfo = specialization.getSpecialization();
createPipeline();
```
Note: this is adding information in a vector, therefor add all values
before calling getSpecialization(). Construct the pipeline before
specialization get out of scope, or pointer getting invalidated
by adding new values or clearing the vector of data.
@DOC_END */
class Specialization
{
public:
void add(uint32_t constantID, int32_t value)
{
m_specValues.push_back(value);
VkSpecializationMapEntry entry;
entry.constantID = constantID;
entry.size = sizeof(int32_t);
entry.offset = static_cast<uint32_t>(m_specEntries.size() * sizeof(int32_t));
m_specEntries.emplace_back(entry);
}
void add(const std::vector<std::pair<uint32_t, int32_t>>& const_values)
{
for(const auto& v : const_values)
{
add(v.first, v.second);
}
}
VkSpecializationInfo* getSpecialization()
{
m_specInfo.dataSize = static_cast<uint32_t>(m_specValues.size() * sizeof(int32_t));
m_specInfo.pData = m_specValues.data();
m_specInfo.mapEntryCount = static_cast<uint32_t>(m_specEntries.size());
m_specInfo.pMapEntries = m_specEntries.data();
return &m_specInfo;
}
void clear()
{
m_specValues.clear();
m_specEntries.clear();
m_specInfo = {};
}
private:
std::vector<int32_t> m_specValues;
std::vector<VkSpecializationMapEntry> m_specEntries;
VkSpecializationInfo m_specInfo{};
};
} // namespace nvvk

View file

@ -0,0 +1,304 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include <nvvk/stagingmemorymanager_vk.hpp>
#include <nvh/nvprint.hpp>
#include <nvvk/debug_util_vk.hpp>
#include <nvvk/error_vk.hpp>
namespace nvvk {
void StagingMemoryManager::init(MemAllocator* memAllocator, VkDeviceSize stagingBlockSize /*= 64 * 1024 * 1024*/)
{
assert(!m_device);
m_device = memAllocator->getDevice();
m_subToDevice.init(memAllocator, stagingBlockSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, true);
m_subFromDevice.init(memAllocator, stagingBlockSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
true);
m_freeStagingIndex = INVALID_ID_INDEX;
m_stagingIndex = newStagingIndex();
setFreeUnusedOnRelease(true);
}
void StagingMemoryManager::deinit()
{
if(!m_device)
return;
free(false);
m_subFromDevice.deinit();
m_subToDevice.deinit();
m_sets.clear();
m_device = VK_NULL_HANDLE;
}
bool StagingMemoryManager::fitsInAllocated(VkDeviceSize size, bool toDevice /*= true*/) const
{
return toDevice ? m_subToDevice.fitsInAllocated(size) : m_subFromDevice.fitsInAllocated(size);
}
void* StagingMemoryManager::cmdToImage(VkCommandBuffer cmd,
VkImage image,
const VkOffset3D& offset,
const VkExtent3D& extent,
const VkImageSubresourceLayers& subresource,
VkDeviceSize size,
const void* data,
VkImageLayout layout)
{
if(!image)
return nullptr;
VkBuffer srcBuffer;
VkDeviceSize srcOffset;
void* mapping = getStagingSpace(size, srcBuffer, srcOffset, true);
assert(mapping);
if(data)
{
memcpy(mapping, data, size);
}
VkBufferImageCopy cpy;
cpy.bufferOffset = srcOffset;
cpy.bufferRowLength = 0;
cpy.bufferImageHeight = 0;
cpy.imageSubresource = subresource;
cpy.imageOffset = offset;
cpy.imageExtent = extent;
vkCmdCopyBufferToImage(cmd, srcBuffer, image, layout, 1, &cpy);
return data ? nullptr : mapping;
}
void* StagingMemoryManager::cmdToBuffer(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, const void* data)
{
if(!size || !buffer)
{
return nullptr;
}
VkBuffer srcBuffer;
VkDeviceSize srcOffset;
void* mapping = getStagingSpace(size, srcBuffer, srcOffset, true);
assert(mapping);
if(data)
{
memcpy(mapping, data, size);
}
VkBufferCopy cpy;
cpy.size = size;
cpy.srcOffset = srcOffset;
cpy.dstOffset = offset;
vkCmdCopyBuffer(cmd, srcBuffer, buffer, 1, &cpy);
return data ? nullptr : (void*)mapping;
}
const void* StagingMemoryManager::cmdFromBuffer(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size)
{
VkBuffer dstBuffer;
VkDeviceSize dstOffset;
void* mapping = getStagingSpace(size, dstBuffer, dstOffset, false);
VkBufferCopy cpy;
cpy.size = size;
cpy.srcOffset = offset;
cpy.dstOffset = dstOffset;
vkCmdCopyBuffer(cmd, buffer, dstBuffer, 1, &cpy);
return mapping;
}
const void* StagingMemoryManager::cmdFromImage(VkCommandBuffer cmd,
VkImage image,
const VkOffset3D& offset,
const VkExtent3D& extent,
const VkImageSubresourceLayers& subresource,
VkDeviceSize size,
VkImageLayout layout)
{
VkBuffer dstBuffer;
VkDeviceSize dstOffset;
void* mapping = getStagingSpace(size, dstBuffer, dstOffset, false);
VkBufferImageCopy cpy;
cpy.bufferOffset = dstOffset;
cpy.bufferRowLength = 0;
cpy.bufferImageHeight = 0;
cpy.imageSubresource = subresource;
cpy.imageOffset = offset;
cpy.imageExtent = extent;
vkCmdCopyImageToBuffer(cmd, image, layout, dstBuffer, 1, &cpy);
return mapping;
}
void StagingMemoryManager::finalizeResources(VkFence fence)
{
if(m_sets[m_stagingIndex].entries.empty())
return;
m_sets[m_stagingIndex].fence = fence;
m_sets[m_stagingIndex].manualSet = false;
m_stagingIndex = newStagingIndex();
}
StagingMemoryManager::SetID StagingMemoryManager::finalizeResourceSet()
{
SetID setID;
if(m_sets[m_stagingIndex].entries.empty())
return setID;
setID.index = m_stagingIndex;
m_sets[m_stagingIndex].fence = nullptr;
m_sets[m_stagingIndex].manualSet = true;
m_stagingIndex = newStagingIndex();
return setID;
}
void* StagingMemoryManager::getStagingSpace(VkDeviceSize size, VkBuffer& buffer, VkDeviceSize& offset, bool toDevice)
{
assert(m_sets[m_stagingIndex].index == m_stagingIndex && "illegal index, did you forget finalizeResources");
BufferSubAllocator::Handle handle = toDevice ? m_subToDevice.subAllocate(size) : m_subFromDevice.subAllocate(size);
assert(handle);
BufferSubAllocator::Binding info = toDevice ? m_subToDevice.getSubBinding(handle) : m_subFromDevice.getSubBinding(handle);
buffer = info.buffer;
offset = info.offset;
// append used space to current staging set list
m_sets[m_stagingIndex].entries.push_back({handle, toDevice});
return toDevice ? m_subToDevice.getSubMapping(handle) : m_subFromDevice.getSubMapping(handle);
}
void StagingMemoryManager::releaseResources(uint32_t stagingID)
{
if(stagingID == INVALID_ID_INDEX)
return;
StagingSet& set = m_sets[stagingID];
assert(set.index == stagingID);
// free used allocation ranges
for(auto& itentry : set.entries)
{
if(itentry.toDevice)
{
m_subToDevice.subFree(itentry.handle);
}
else
{
m_subFromDevice.subFree(itentry.handle);
}
}
set.entries.clear();
// update the set.index with the current head of the free list
// pop its old value
m_freeStagingIndex = setIndexValue(set.index, m_freeStagingIndex);
}
void StagingMemoryManager::releaseResources()
{
for(auto& itset : m_sets)
{
if(!itset.entries.empty() && !itset.manualSet && (!itset.fence || vkGetFenceStatus(m_device, itset.fence) == VK_SUCCESS))
{
releaseResources(itset.index);
itset.fence = NULL;
itset.manualSet = false;
}
}
// special case for ease of use if there is only one
if(m_stagingIndex == 0 && m_freeStagingIndex == 0)
{
m_freeStagingIndex = setIndexValue(m_sets[0].index, 0);
}
}
float StagingMemoryManager::getUtilization(VkDeviceSize& allocatedSize, VkDeviceSize& usedSize) const
{
VkDeviceSize aSize = 0;
VkDeviceSize uSize = 0;
m_subFromDevice.getUtilization(aSize, uSize);
allocatedSize = aSize;
usedSize = uSize;
m_subToDevice.getUtilization(aSize, uSize);
allocatedSize += aSize;
usedSize += uSize;
return float(double(usedSize) / double(allocatedSize));
}
void StagingMemoryManager::free(bool unusedOnly)
{
m_subToDevice.free(unusedOnly);
m_subFromDevice.free(unusedOnly);
}
uint32_t StagingMemoryManager::newStagingIndex()
{
// find free slot
if(m_freeStagingIndex != INVALID_ID_INDEX)
{
uint32_t newIndex = m_freeStagingIndex;
// this updates the free link-list
m_freeStagingIndex = setIndexValue(m_sets[newIndex].index, newIndex);
assert(m_sets[newIndex].index == newIndex);
return m_sets[newIndex].index;
}
// otherwise push to end
uint32_t newIndex = (uint32_t)m_sets.size();
StagingSet info;
info.index = newIndex;
m_sets.push_back(info);
assert(m_sets[newIndex].index == newIndex);
return newIndex;
}
} // namespace nvvk

View file

@ -0,0 +1,289 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <string>
#include <vector>
#include <vulkan/vulkan_core.h>
#include "buffersuballocator_vk.hpp"
namespace nvvk {
#define NVVK_DEFAULT_STAGING_BLOCKSIZE (VkDeviceSize(64) * 1024 * 1024)
//////////////////////////////////////////////////////////////////
/** @DOC_START
# class nvvk::StagingMemoryManager
nvvk::StagingMemoryManager class is a utility that manages host visible
buffers and their allocations in an opaque fashion to assist
asynchronous transfers between device and host.
The memory for this is allocated using the provided
[nvvk::MemAllocator](#class-nvvkmemallocator).
The collection of the transfer resources is represented by nvvk::StagingID.
The necessary buffer space is sub-allocated and recycled by using one
[nvvk::BufferSubAllocator](#class-nvvkbuffersuballocator) per transfer direction (to or from device).
> **WARNING:**
> - cannot manage a copy > 4 GB
Usage:
- Enqueue transfers into your VkCommandBuffer and then finalize the copy operations.
- Associate the copy operations with a VkFence or retrieve a SetID
- The release of the resources allows to safely recycle the buffer space for future transfers.
> We use fences as a way to garbage collect here, however a more robust solution
> may be implementing some sort of ticketing/timeline system.
> If a fence is recycled, then this class may not be aware that the fence represents a different
> submission, likewise if the fence is deleted elsewhere problems can occur.
> You may want to use the manual "SetID" system in that case.
Example :
```cpp
StagingMemoryManager staging;
staging.init(memAllocator);
// Enqueue copy operations of data to target buffer.
// This internally manages the required staging resources
staging.cmdToBuffer(cmd, targetBufer, 0, targetSize, targetData);
// you can also get access to a temporary mapped pointer and fill
// the staging buffer directly
vertices = staging.cmdToBufferT<Vertex>(cmd, targetBufer, 0, targetSize);
// OPTION A:
// associate all previous copy operations with a fence (or not)
staging.finalizeResources( fence );
..
// every once in a while call
staging.releaseResources();
// this will release all those without fence, or those
// who had a fence that completed (but never manual SetIDs, see next).
// OPTION B
// alternatively manage the resource release yourself.
// The SetID represents the staging resources
// since any last finalize.
sid = staging.finalizeResourceSet();
...
// You need to ensure these transfers and their staging
// data access completed yourself prior releasing the set.
//
// This is particularly useful for managing downloads from
// device. The "from" functions return a pointer where the
// data will be copied to. You want to use this pointer
// after the device-side transfer completed, and then
// release its resources once you are done using it.
staging.releaseResourceSet(sid);
```
@DOC_END */
class StagingMemoryManager
{
public:
static const uint32_t INVALID_ID_INDEX = ~0;
//////////////////////////////////////////////////////////////////////////
class SetID
{
friend StagingMemoryManager;
private:
uint32_t index = INVALID_ID_INDEX;
};
StagingMemoryManager(StagingMemoryManager const&) = delete;
StagingMemoryManager& operator=(StagingMemoryManager const&) = delete;
StagingMemoryManager() { m_debugName = "nvvk::StagingMemManager:" + std::to_string((uint64_t)this); }
StagingMemoryManager(MemAllocator* memAllocator, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE)
{
init(memAllocator, stagingBlockSize);
}
virtual ~StagingMemoryManager() { deinit(); }
void init(MemAllocator* memAllocator, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
void deinit();
void setDebugName(const std::string& name) { m_debugName = name; }
// if true (default) we free the memory completely when released
// otherwise we would keep blocks for re-use around, unless freeUnused() is called
void setFreeUnusedOnRelease(bool state)
{
m_subToDevice.setKeepLastBlockOnFree(!state);
m_subFromDevice.setKeepLastBlockOnFree(!state);
}
// test if there is enough space in current allocations
bool fitsInAllocated(VkDeviceSize size, bool toDevice = true) const;
// if data != nullptr memcpies to mapping and returns nullptr
// otherwise returns temporary mapping (valid until "complete" functions)
void* cmdToImage(VkCommandBuffer cmd,
VkImage image,
const VkOffset3D& offset,
const VkExtent3D& extent,
const VkImageSubresourceLayers& subresource,
VkDeviceSize size,
const void* data,
VkImageLayout layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
template <class T>
T* cmdToImageT(VkCommandBuffer cmd,
VkImage image,
const VkOffset3D& offset,
const VkExtent3D& extent,
const VkImageSubresourceLayers& subresource,
VkDeviceSize size,
const void* data,
VkImageLayout layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
{
return (T*)cmdToImage(cmd, image, offset, extent, subresource, size, data, layout);
}
// pointer can be used after cmd execution but only valid until associated resources haven't been released
const void* cmdFromImage(VkCommandBuffer cmd,
VkImage image,
const VkOffset3D& offset,
const VkExtent3D& extent,
const VkImageSubresourceLayers& subresource,
VkDeviceSize size,
VkImageLayout layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
template <class T>
const T* cmdFromImageT(VkCommandBuffer cmd,
VkImage image,
const VkOffset3D& offset,
const VkExtent3D& extent,
const VkImageSubresourceLayers& subresource,
VkDeviceSize size,
VkImageLayout layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
{
return (const T*)cmdFromImage(cmd, image, offset, extent, subresource, size, layout);
}
// if data != nullptr memcpies to mapping and returns nullptr
// otherwise returns temporary mapping (valid until appropriate release)
void* cmdToBuffer(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, const void* data);
template <class T>
T* cmdToBufferT(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size)
{
return (T*)cmdToBuffer(cmd, buffer, offset, size, nullptr);
}
// pointer can be used after cmd execution but only valid until associated resources haven't been released
const void* cmdFromBuffer(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size);
template <class T>
const T* cmdFromBufferT(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size)
{
return (const T*)cmdFromBuffer(cmd, buffer, offset, size);
}
// closes the batch of staging resources since last finalize call
// and associates it with a fence for later release.
void finalizeResources(VkFence fence = VK_NULL_HANDLE);
// releases the staging resources whose fences have completed
// and those who had no fence at all, skips resourceSets.
void releaseResources();
// closes the batch of staging resources since last finalize call
// and returns a resource set handle that can be used to release them
SetID finalizeResourceSet();
// releases the staging resources from this particular
// resource set.
void releaseResourceSet(SetID setid) { releaseResources(setid.index); }
// frees staging memory no longer in use
void freeUnused() { free(true); }
float getUtilization(VkDeviceSize& allocatedSize, VkDeviceSize& usedSize) const;
protected:
// The implementation uses two major arrays:
// - Block stores VkBuffers that we sub-allocate the staging space from
// - StagingSet stores all such sub-allocations that were used
// in one batch of operations. Each batch is closed with
// finalizeResources, and typically associated with a fence.
// As such the resources are given by for recycling if the fence completed.
// To recycle StagingSet structures within the arrays
// we use a linked list of array indices. The "index" element
// in the struct refers to the next free list item, or itself
// when in use.
struct Entry
{
BufferSubAllocator::Handle handle;
bool toDevice;
};
struct StagingSet
{
uint32_t index = INVALID_ID_INDEX;
VkFence fence = VK_NULL_HANDLE;
bool manualSet = false;
std::vector<Entry> entries;
};
VkDevice m_device = VK_NULL_HANDLE;
BufferSubAllocator m_subToDevice;
BufferSubAllocator m_subFromDevice;
std::vector<StagingSet> m_sets;
// active staging Index, must be valid at all items
uint32_t m_stagingIndex;
// linked-list to next free staging set
uint32_t m_freeStagingIndex;
std::string m_debugName;
uint32_t setIndexValue(uint32_t& index, uint32_t newValue)
{
uint32_t oldValue = index;
index = newValue;
return oldValue;
}
void free(bool unusedOnly);
uint32_t newStagingIndex();
void* getStagingSpace(VkDeviceSize size, VkBuffer& buffer, VkDeviceSize& offset, bool toDevice);
void releaseResources(uint32_t stagingID);
};
} // namespace nvvk

View file

@ -0,0 +1,2 @@
..\..\nvpro_internal\luajit\win_x64\luajit.exe structs_vk.lua
pause

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,350 @@
local header =
[[
/*
* Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
//////////////////////////////////////////////////////////////////////////
/**
# function nvvk::make, nvvk::clear
Contains templated `nvvk::make<T>` and `nvvk::clear<T>` functions that are
auto-generated by `structs.lua`. The functions provide default
structs for the Vulkan C api by initializing the `VkStructureType sType`
field (also for nested structs) and clearing the rest to zero.
``` c++
auto compCreateInfo = nvvk::make<VkComputePipelineCreateInfo>;
```
*/
#pragma once
]]
-- HOW TO USE
--
-- 1. Setup environment variable NVVK_VULKAN_XML pointing to vk.xml
-- or use VULKAN_SDK >= 1.2.135.0
--
-- 2. Modify the extension subset (allowlist)
--
-- 3. Check out this and the other structs_vk files for write access
--
-- 4. Run with a lua5.1 compatible lua runtime and the lua2xml project
-- https://github.com/manoelcampos/xml2lua
-- (shared_internal has all the files).
--
-- lua structs_vk.lua
--
-- within this directory.
local VULKAN_XML = os.getenv("NVVK_VULKAN_XML") or os.getenv("VULKAN_SDK").."/share/vulkan/registry/vk.xml"
local extensionSubset = [[
VK_KHR_acceleration_structure
VK_KHR_ray_query
VK_KHR_ray_tracing_pipeline
VK_KHR_push_descriptor
VK_KHR_8bit_storage
VK_KHR_create_renderpass2
VK_KHR_depth_stencil_resolve
VK_KHR_draw_indirect_count
VK_KHR_driver_properties
VK_KHR_pipeline_executable_properties
VK_NV_compute_shader_derivatives
VK_NV_cooperative_matrix
VK_NV_corner_sampled_image
VK_NV_coverage_reduction_mode
VK_NV_dedicated_allocation_image_aliasing
VK_NV_mesh_shader
VK_NV_ray_tracing
VK_NV_representative_fragment_test
VK_NV_shading_rate_image
VK_NV_viewport_array2
VK_NV_viewport_swizzle
VK_NV_scissor_exclusive
VK_NV_device_generated_commands
VK_EXT_buffer_device_address
VK_EXT_debug_marker
VK_EXT_calibrated_timestamps
VK_EXT_conservative_rasterization
VK_EXT_descriptor_indexing
VK_EXT_depth_clip_enable
VK_EXT_memory_budget
VK_EXT_memory_priority
VK_EXT_pci_bus_info
VK_EXT_sample_locations
VK_EXT_sampler_filter_minmax
VK_EXT_texel_buffer_alignment
VK_EXT_debug_utils
VK_EXT_host_query_reset
VK_KHR_external_memory_win32
VK_KHR_external_semaphore_win32
VK_KHR_external_fence_win32
VK_KHR_external_memory_fd
VK_KHR_external_semaphore_fd
VK_EXT_validation_features
VK_KHR_swapchain
]]
local function generate(outfilename, header, whitelist)
local override = {
VkRayTracingShaderGroupCreateInfoNV =
[[
template<> inline VkRayTracingShaderGroupCreateInfoNV make<VkRayTracingShaderGroupCreateInfoNV>(){
VkRayTracingShaderGroupCreateInfoNV ret = {VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV};
ret.generalShader = VK_SHADER_UNUSED_NV;
ret.closestHitShader = VK_SHADER_UNUSED_NV;
ret.anyHitShader = VK_SHADER_UNUSED_NV;
ret.intersectionShader = VK_SHADER_UNUSED_NV;
return ret;
}
]],
VkRayTracingShaderGroupCreateInfoKHR =
[[
template<> inline VkRayTracingShaderGroupCreateInfoKHR make<VkRayTracingShaderGroupCreateInfoKHR>(){
VkRayTracingShaderGroupCreateInfoKHR ret = {VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR};
ret.generalShader = VK_SHADER_UNUSED_KHR;
ret.closestHitShader = VK_SHADER_UNUSED_KHR;
ret.anyHitShader = VK_SHADER_UNUSED_KHR;
ret.intersectionShader = VK_SHADER_UNUSED_KHR;
return ret;
}
]],
}
local function toTab(str)
local tab = {}
for name in str:gmatch("[%w_]+") do
tab[name] = true
end
return tab
end
local whitelist = whitelist and toTab(whitelist)
local xml2lua = require("xml2lua")
local handler = require("xmlhandler.tree")
local filename = VULKAN_XML
local f = io.open(filename,"rt")
assert(f, filename.." not found")
local xml = f:read("*a")
f:close()
-- Bug workaround https://github.com/manoelcampos/xml2lua/issues/35
xml = xml:gsub("(<member>)(<type>[%w_]+</type>)%* ", function(p,typ)
-- add _ dummy symbol
return "<member>_"..typ.."* "
end)
local parser = xml2lua.parser(handler)
parser:parse(xml)
local version = xml:match("VK_HEADER_VERSION</name> (%d+)")
assert(version)
local structenums = {}
local structextensions = {}
local function enumID(name)
name = name:lower()
name = name:gsub("_","")
return name
end
for name in xml:gmatch('"VK_STRUCTURE_TYPE_([%w_]-)"') do
structenums[enumID(name)] = "VK_STRUCTURE_TYPE_"..name
end
xml = nil
local types = handler.root.registry.types
local commands = handler.root.registry.commands
local extensions = handler.root.registry.extensions.extension
-- debugging
if (false) then
local serpent = require "serpent"
local f = io.open(filename..".types.lua", "wt")
f:write(serpent.block(types))
local f = io.open(filename..".exts.lua", "wt")
f:write(serpent.block(extensions))
end
-- build list struct types with structure type init
local lktypes = {}
local lkall = {}
local lkcore = {}
for _,v in ipairs(types.type) do
if (v._attr.category == "struct") then
local alias = v._attr.alias
local name = v._attr.name
if (alias) then
lktypes[name] = lktypes[alias]
else
local members = type(v.member[1]) == "table" and v.member or {v.member}
local tab = {name=name, members=members}
if (members[1].type == "VkStructureType") then
lktypes[name] = tab
lkcore[name] = true
end
lkall[name] = tab
end
end
end
local platforms = {
ggp = "VK_USE_PLATFORM_GGP",
win32 = "VK_USE_PLATFORM_WIN32_KHR",
vi = "VK_USE_PLATFORM_VI_NN",
ios = "VK_USE_PLATFORM_IOS_MVK",
macos = "VK_USE_PLATFORM_MACOS_MVK",
android = "VK_USE_PLATFORM_ANDROID_KHR",
fuchsia = "VK_USE_PLATFORM_FUCHSIA",
metal = "VK_USE_PLATFORM_METAL_EXT",
xlib = "VK_USE_PLATFORM_XLIB_KHR",
xcb = "VK_USE_PLATFORM_XCB_KHR",
wayland = "VK_USE_PLATFORM_WAYLAND_KHR",
xlib_xrandr = "VK_USE_PLATFORM_XLIB_XRANDR_EXT",
}
-- fill extension list
local extLists = {}
for _,v in ipairs(extensions) do
if (v.require) then
local reqs = v.require[1] and v.require or {v.require}
local list = {}
local valid = false
for _,r in ipairs(reqs) do
if (r.type) then
local types = r.type[1] and r.type or {r.type}
for _,t in ipairs(types) do
local tname = t._attr.name
if (lktypes[tname]) then
lkcore[tname] = false
table.insert(list, tname)
valid = true
end
end
end
end
if (valid and ((whitelist and whitelist[v._attr.name]) or not whitelist)) then
table.insert(extLists, {list=list, ext=v._attr.name, platform=platforms[v._attr.platform or "_"] })
end
end
end
-- fill core list
local coreList = {}
for _,v in ipairs(types.type) do
if (v._attr.category == "struct" and lkcore[v._attr.name]) then
table.insert(coreList, v._attr.name)
end
end
local out = ""
out = out.." template <class T> T make(){ return T(); }\n"
out = out.." template <class T> void clear(T& ref){ ref = make<T>(); }\n"
local function process(t)
local ext = nil
for _,sname in ipairs(t.list) do
local enum = structenums[enumID(sname:match("Vk(.*)"))]
local struct = lktypes[sname]
if (enum and struct and not struct.exported) then
if ((not ext) and t.ext) then
out = out.."#if "..t.ext.."\n"
ext = t.ext
end
local complex = ""
local function addComplex(prefix, members)
for _,m in ipairs(members) do
local mvar = m.name
local mtype = m.type
local mstruct = lkall[mtype]
-- skip pointers
if (mstruct and not m[1]) then
local mexp = mstruct.exported
local mmembers = mstruct.members
if (mexp == true) then
complex = complex..prefix..mvar.." = make<"..mtype..">();\n"
elseif (mexp) then
complex = complex..prefix..mvar.." = {"..mexp.."};\n"
elseif (mmembers) then
addComplex(prefix..mvar..".", mmembers)
end
end
end
end
addComplex(" ret.", struct.members)
if (override[sname]) then
out = out..override[sname]
print("override", sname)
struct.exported = true
elseif (complex ~= "") then
out = out.." template<> inline "..sname.." make<"..sname..">(){\n "..sname.." ret = {"..enum.."};\n"..complex.." return ret;\n }\n"
print("complex", sname)
struct.exported = true
else
out = out.." template<> inline "..sname.." make<"..sname..">(){\n return "..sname.."{"..enum.."};\n }\n"
struct.exported = enum
end
end
end
if (ext) then
out = out.."#endif\n"
end
end
-- process core
process({list=coreList})
-- process whitelisted extensions
for _,ext in ipairs(extLists) do
process(ext)
end
local outfile = io.open(outfilename, "wt")
assert(outfile, "could not open "..outfilename.." for writing")
outfile:write("/* based on VK_HEADER_VERSION "..version.." */\n")
outfile:write(header)
outfile:write("namespace nvvk {\n")
outfile:write(out)
outfile:write("}\n")
outfile:flush()
outfile:close()
end
generate("structs_vk.hpp", header, extensionSubset)

View file

@ -0,0 +1,468 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#include "swapchain_vk.hpp"
#include "error_vk.hpp"
#include <assert.h>
#include <nvvk/debug_util_vk.hpp>
namespace nvvk {
bool SwapChain::init(VkDevice device,
VkPhysicalDevice physicalDevice,
VkQueue queue,
uint32_t queueFamilyIndex,
VkSurfaceKHR surface,
VkFormat format,
VkImageUsageFlags imageUsage)
{
assert(!m_device);
m_device = device;
m_physicalDevice = physicalDevice;
m_swapchain = VK_NULL_HANDLE;
m_queue = queue;
m_queueFamilyIndex = queueFamilyIndex;
m_changeID = 0;
m_currentSemaphore = 0;
m_surface = surface;
m_imageUsage = imageUsage;
VkResult result;
// Get the list of VkFormat's that are supported:
uint32_t formatCount;
result = vkGetPhysicalDeviceSurfaceFormatsKHR(m_physicalDevice, m_surface, &formatCount, nullptr);
assert(!result);
std::vector<VkSurfaceFormatKHR> surfFormats(formatCount);
result = vkGetPhysicalDeviceSurfaceFormatsKHR(m_physicalDevice, m_surface, &formatCount, surfFormats.data());
assert(!result);
// If the format list includes just one entry of VK_FORMAT_UNDEFINED,
// the surface has no preferred format. Otherwise, at least one
// supported format will be returned.
m_surfaceFormat = VK_FORMAT_B8G8R8A8_UNORM;
m_surfaceColor = surfFormats[0].colorSpace;
for(uint32_t i = 0; i < formatCount; i++)
{
if(surfFormats[i].format == format)
{
m_surfaceFormat = format;
m_surfaceColor = surfFormats[i].colorSpace;
return true;
}
}
return false;
}
VkExtent2D SwapChain::update(int width, int height, bool vsync)
{
m_changeID++;
VkResult err;
VkSwapchainKHR oldSwapchain = m_swapchain;
err = waitIdle();
if(nvvk::checkResult(err, __FILE__, __LINE__))
{
exit(-1);
}
// Check the surface capabilities and formats
VkSurfaceCapabilitiesKHR surfCapabilities;
err = vkGetPhysicalDeviceSurfaceCapabilitiesKHR(m_physicalDevice, m_surface, &surfCapabilities);
assert(!err);
uint32_t presentModeCount;
err = vkGetPhysicalDeviceSurfacePresentModesKHR(m_physicalDevice, m_surface, &presentModeCount, nullptr);
assert(!err);
std::vector<VkPresentModeKHR> presentModes(presentModeCount);
err = vkGetPhysicalDeviceSurfacePresentModesKHR(m_physicalDevice, m_surface, &presentModeCount, presentModes.data());
assert(!err);
VkExtent2D swapchainExtent;
// width and height are either both -1, or both not -1.
if(surfCapabilities.currentExtent.width == (uint32_t)-1)
{
// If the surface size is undefined, the size is set to
// the size of the images requested.
swapchainExtent.width = width;
swapchainExtent.height = height;
}
else
{
// If the surface size is defined, the swap chain size must match
swapchainExtent = surfCapabilities.currentExtent;
}
// test against valid size, typically hit when windows are minimized, the app must
// prevent triggering this code accordingly
assert(swapchainExtent.width && swapchainExtent.height);
// everyone must support FIFO mode
VkPresentModeKHR swapchainPresentMode = VK_PRESENT_MODE_FIFO_KHR;
// no vsync try to find a faster alternative to FIFO
if(!vsync)
{
for(uint32_t i = 0; i < presentModeCount; i++)
{
if(presentModes[i] == VK_PRESENT_MODE_MAILBOX_KHR)
{
swapchainPresentMode = VK_PRESENT_MODE_MAILBOX_KHR;
}
if(presentModes[i] == VK_PRESENT_MODE_IMMEDIATE_KHR)
{
swapchainPresentMode = VK_PRESENT_MODE_IMMEDIATE_KHR;
}
if(swapchainPresentMode == m_preferredVsyncOffMode)
{
break;
}
}
}
// Determine the number of VkImage's to use in the swap chain (we desire to
// own only 1 image at a time, besides the images being displayed and
// queued for display):
uint32_t desiredNumberOfSwapchainImages = surfCapabilities.minImageCount + 1;
if((surfCapabilities.maxImageCount > 0) && (desiredNumberOfSwapchainImages > surfCapabilities.maxImageCount))
{
// Application must settle for fewer images than desired:
desiredNumberOfSwapchainImages = surfCapabilities.maxImageCount;
}
VkSurfaceTransformFlagBitsKHR preTransform;
if(surfCapabilities.supportedTransforms & VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR)
{
preTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
}
else
{
preTransform = surfCapabilities.currentTransform;
}
VkSwapchainCreateInfoKHR swapchain = {VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR};
swapchain.surface = m_surface;
swapchain.minImageCount = desiredNumberOfSwapchainImages;
swapchain.imageFormat = m_surfaceFormat;
swapchain.imageColorSpace = m_surfaceColor;
swapchain.imageExtent = swapchainExtent;
swapchain.imageUsage = m_imageUsage;
swapchain.preTransform = preTransform;
swapchain.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
swapchain.imageArrayLayers = 1;
swapchain.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
swapchain.queueFamilyIndexCount = 1;
swapchain.pQueueFamilyIndices = &m_queueFamilyIndex;
swapchain.presentMode = swapchainPresentMode;
swapchain.oldSwapchain = oldSwapchain;
swapchain.clipped = true;
err = vkCreateSwapchainKHR(m_device, &swapchain, nullptr, &m_swapchain);
assert(!err);
nvvk::DebugUtil debugUtil(m_device);
debugUtil.setObjectName(m_swapchain, "SwapChain::m_swapchain");
// If we just re-created an existing swapchain, we should destroy the old
// swapchain at this point.
// Note: destroying the swapchain also cleans up all its associated
// presentable images once the platform is done with them.
if(oldSwapchain != VK_NULL_HANDLE)
{
for(auto it : m_entries)
{
vkDestroyImageView(m_device, it.imageView, nullptr);
}
for(auto it : m_semaphores)
{
vkDestroySemaphore(m_device, it.readSemaphore, nullptr);
vkDestroySemaphore(m_device, it.writtenSemaphore, nullptr);
}
vkDestroySwapchainKHR(m_device, oldSwapchain, nullptr);
}
err = vkGetSwapchainImagesKHR(m_device, m_swapchain, &m_imageCount, nullptr);
assert(!err);
m_entries.resize(m_imageCount);
m_barriers.resize(m_imageCount);
std::vector<VkImage> images(m_imageCount);
err = vkGetSwapchainImagesKHR(m_device, m_swapchain, &m_imageCount, images.data());
assert(!err);
//
// Image views
//
for(uint32_t i = 0; i < m_imageCount; i++)
{
Entry& entry = m_entries[i];
// image
entry.image = images[i];
// imageview
VkImageViewCreateInfo viewCreateInfo = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
nullptr,
0,
entry.image,
VK_IMAGE_VIEW_TYPE_2D,
m_surfaceFormat,
{VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A},
{VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}};
err = vkCreateImageView(m_device, &viewCreateInfo, nullptr, &entry.imageView);
assert(!err);
// initial barriers
VkImageSubresourceRange range = {0};
range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
range.baseMipLevel = 0;
range.levelCount = VK_REMAINING_MIP_LEVELS;
range.baseArrayLayer = 0;
range.layerCount = VK_REMAINING_ARRAY_LAYERS;
VkImageMemoryBarrier memBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
memBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
memBarrier.dstAccessMask = 0;
memBarrier.srcAccessMask = 0;
memBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
memBarrier.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
memBarrier.image = entry.image;
memBarrier.subresourceRange = range;
m_barriers[i] = memBarrier;
debugUtil.setObjectName(entry.image, "swapchainImage:" + std::to_string(i));
debugUtil.setObjectName(entry.imageView, "swapchainImageView:" + std::to_string(i));
}
m_semaphores.resize(getSemaphoreCycleCount());
for(uint32_t i = 0; i < getSemaphoreCycleCount(); i++)
{
SemaphoreEntry& entry = m_semaphores[i];
// semaphore
VkSemaphoreCreateInfo semCreateInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO};
err = vkCreateSemaphore(m_device, &semCreateInfo, nullptr, &entry.readSemaphore);
assert(!err);
err = vkCreateSemaphore(m_device, &semCreateInfo, nullptr, &entry.writtenSemaphore);
assert(!err);
debugUtil.setObjectName(entry.readSemaphore, "swapchainReadSemaphore:" + std::to_string(i));
debugUtil.setObjectName(entry.writtenSemaphore, "swapchainWrittenSemaphore:" + std::to_string(i));
}
m_updateWidth = width;
m_updateHeight = height;
m_vsync = vsync;
m_extent = swapchainExtent;
m_currentSemaphore = 0;
m_currentImage = 0;
return swapchainExtent;
}
void SwapChain::deinitResources()
{
if(!m_device)
return;
VkResult result = waitIdle();
if(nvvk::checkResult(result, __FILE__, __LINE__))
{
exit(-1);
}
for(auto it : m_entries)
{
vkDestroyImageView(m_device, it.imageView, nullptr);
}
for(auto it : m_semaphores)
{
vkDestroySemaphore(m_device, it.readSemaphore, nullptr);
vkDestroySemaphore(m_device, it.writtenSemaphore, nullptr);
}
if(m_swapchain)
{
vkDestroySwapchainKHR(m_device, m_swapchain, nullptr);
m_swapchain = VK_NULL_HANDLE;
}
m_entries.clear();
m_barriers.clear();
}
void SwapChain::deinit()
{
deinitResources();
m_physicalDevice = VK_NULL_HANDLE;
m_device = VK_NULL_HANDLE;
m_surface = VK_NULL_HANDLE;
m_changeID = 0;
}
bool SwapChain::acquire(bool* pRecreated, SwapChainAcquireState* pOut)
{
return acquireCustom(VK_NULL_HANDLE, m_updateWidth, m_updateHeight, pRecreated, pOut);
}
bool SwapChain::acquireAutoResize(int width, int height, bool* pRecreated, SwapChainAcquireState* pOut)
{
return acquireCustom(VK_NULL_HANDLE, width, height, pRecreated, pOut);
}
bool SwapChain::acquireCustom(VkSemaphore argSemaphore, bool* pRecreated, SwapChainAcquireState* pOut)
{
return acquireCustom(argSemaphore, m_updateWidth, m_updateHeight, pRecreated, pOut);
}
bool SwapChain::acquireCustom(VkSemaphore argSemaphore, int width, int height, bool* pRecreated, SwapChainAcquireState* pOut)
{
bool didRecreate = false;
if(width != m_updateWidth || height != m_updateHeight)
{
deinitResources();
update(width, height);
m_updateWidth = width;
m_updateHeight = height;
didRecreate = true;
}
if(pRecreated != nullptr)
{
*pRecreated = didRecreate;
}
// try recreation a few times
for(int i = 0; i < 2; i++)
{
VkSemaphore semaphore = argSemaphore ? argSemaphore : getActiveReadSemaphore();
VkResult result;
result = vkAcquireNextImageKHR(m_device, m_swapchain, UINT64_MAX, semaphore, (VkFence)VK_NULL_HANDLE, &m_currentImage);
if(result == VK_SUCCESS)
{
if(pOut != nullptr)
{
pOut->image = getActiveImage();
pOut->view = getActiveImageView();
pOut->index = getActiveImageIndex();
pOut->waitSem = getActiveReadSemaphore();
pOut->signalSem = getActiveWrittenSemaphore();
}
return true;
}
else if(result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR)
{
deinitResources();
update(width, height, m_vsync);
}
else
{
return false;
}
}
return false;
}
VkSemaphore SwapChain::getActiveWrittenSemaphore() const
{
return m_semaphores[(m_currentSemaphore % getSemaphoreCycleCount())].writtenSemaphore;
}
VkSemaphore SwapChain::getActiveReadSemaphore() const
{
return m_semaphores[(m_currentSemaphore % getSemaphoreCycleCount())].readSemaphore;
}
VkImage SwapChain::getActiveImage() const
{
return m_entries[m_currentImage].image;
}
VkImageView SwapChain::getActiveImageView() const
{
return m_entries[m_currentImage].imageView;
}
VkImage SwapChain::getImage(uint32_t i) const
{
if(i >= m_imageCount)
return nullptr;
return m_entries[i].image;
}
void SwapChain::present(VkQueue queue)
{
VkResult result;
VkPresentInfoKHR presentInfo;
presentCustom(presentInfo);
result = vkQueuePresentKHR(queue, &presentInfo);
//assert(result == VK_SUCCESS); // can fail on application exit
}
void SwapChain::presentCustom(VkPresentInfoKHR& presentInfo)
{
VkSemaphore& written = m_semaphores[(m_currentSemaphore % getSemaphoreCycleCount())].writtenSemaphore;
presentInfo = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR};
presentInfo.swapchainCount = 1;
presentInfo.waitSemaphoreCount = 1;
presentInfo.pWaitSemaphores = &written;
presentInfo.pSwapchains = &m_swapchain;
presentInfo.pImageIndices = &m_currentImage;
m_currentSemaphore++;
}
void SwapChain::cmdUpdateBarriers(VkCommandBuffer cmd) const
{
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
nullptr, m_imageCount, m_barriers.data());
}
uint32_t SwapChain::getChangeID() const
{
return m_changeID;
}
VkImageView SwapChain::getImageView(uint32_t i) const
{
if(i >= m_imageCount)
return nullptr;
return m_entries[i].imageView;
}
} // namespace nvvk

View file

@ -0,0 +1,385 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef NV_VK_SWAPCHAIN_INCLUDED
#define NV_VK_SWAPCHAIN_INCLUDED
#include <stdio.h>
#include <string>
#include <vector>
#include <vulkan/vulkan_core.h>
namespace nvvk {
/** @DOC_START
# class nvvk::SwapChain
> nvvk::SwapChain is a helper to handle swapchain setup and use
In Vulkan, we have to use `VkSwapchainKHR` to request a swap chain
(front and back buffers) from the operating system and manually
synchronize our and OS's access to the images within the swap chain.
This helper abstracts that process.
For each swap chain image there is an ImageView, and one read and write
semaphore synchronizing it (see `SwapChainAcquireState`).
To start, you need to call `init`, then `update` with the window's
initial framebuffer size (for example, use `glfwGetFramebufferSize`).
Then, in your render loop, you need to call `acquire()` to get the
swap chain image to draw to, draw your frame (waiting and signalling
the appropriate semaphores), and call `present()`.
Sometimes, the swap chain needs to be re-created (usually due to
window resizes). `nvvk::SwapChain` detects this automatically and
re-creates the swap chain for you. Every new swap chain is assigned a
unique ID (`getChangeID()`), allowing you to detect swap chain
re-creations. This usually triggers a `VkDeviceWaitIdle`; however, if
this is not appropriate, see `setWaitQueue()`.
Finally, there is a utility function to setup the image transitions
from VK_IMAGE_LAYOUT_UNDEFINED to VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
which is the format an image must be in before it is presented.
Example in combination with nvvk::Context :
* get the window handle
* create its related surface
* make sure the Queue is the one we need to render in this surface
```cpp
// could {.cpp}be arguments of a function/method :
nvvk::Context ctx;
NVPWindow win;
...
// get the surface of the window in which to render
VkWin32SurfaceCreateInfoKHR createInfo = {};
... populate the fields of createInfo ...
createInfo.hwnd = glfwGetWin32Window(win.m_internal);
result = vkCreateWin32SurfaceKHR(ctx.m_instance, &createInfo, nullptr, &m_surface);
...
// make sure we assign the proper Queue to m_queueGCT, from what the surface tells us
ctx.setGCTQueueWithPresent(m_surface);
```
The initialization can happen now :
```cpp
m_swapChain.init(ctx.m_device, ctx.m_physicalDevice, ctx.m_queueGCT, ctx.m_queueGCT.familyIndex,
m_surface, VK_FORMAT_B8G8R8A8_UNORM);
...
// after init or update you also have to setup the image layouts at some point
VkCommandBuffer cmd = ...
m_swapChain.cmdUpdateBarriers(cmd);
```
During a resizing of a window, you can update the swapchain as well :
```cpp
bool WindowSurface::resize(int w, int h)
{
...
m_swapChain.update(w, h);
// be cautious to also transition the image layouts
...
}
```
A typical renderloop would look as follows:
```cpp
// handles vkAcquireNextImageKHR and setting the active image
// w,h only needed if update(w,h) not called reliably.
int w, h;
bool recreated;
glfwGetFramebufferSize(window, &w, &h);
if(!m_swapChain.acquire(w, h, &recreated, [, optional SwapChainAcquireState ptr]))
{
... handle acquire error (shouldn't happen)
}
VkCommandBuffer cmd = ...
// acquire might have recreated the swap chain: respond if needed here.
// NOTE: you can also check the recreated variable above, but this
// only works if the swap chain was recreated this frame.
if (m_swapChain.getChangeID() != lastChangeID){
// after init or resize you have to setup the image layouts
m_swapChain.cmdUpdateBarriers(cmd);
lastChangeID = m_swapChain.getChangeID();
}
// do render operations either directly using the imageview
VkImageView swapImageView = m_swapChain.getActiveImageView();
// or you may always render offline int your own framebuffer
// and then simply blit into the backbuffer. NOTE: use
// m_swapChain.getWidth() / getHeight() to get blit dimensions,
// actual swap chain image size may differ from requested width/height.
VkImage swapImage = m_swapChain.getActiveImage();
vkCmdBlitImage(cmd, ... swapImage ...);
// setup submit
VkSubmitInfo submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO};
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &cmd;
// we need to ensure to wait for the swapchain image to have been read already
// so we can safely blit into it
VkSemaphore swapchainReadSemaphore = m_swapChain->getActiveReadSemaphore();
VkPipelineStageFlags swapchainReadFlags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
submitInfo.waitSemaphoreCount = 1;
submitInfo.pWaitSemaphores = &swapchainReadSemaphore;
submitInfo.pWaitDstStageMask = &swapchainReadFlags);
// once this submit completed, it means we have written the swapchain image
VkSemaphore swapchainWrittenSemaphore = m_swapChain->getActiveWrittenSemaphore();
submitInfo.signalSemaphoreCount = 1;
submitInfo.pSignalSemaphores = &swapchainWrittenSemaphore;
// submit it
vkQueueSubmit(m_queue, 1, &submitInfo, fence);
// present via a queue that supports it
// this will also setup the dependency for the appropriate written semaphore
// and bump the semaphore cycle
m_swapChain.present(m_queue);
```
@DOC_END */
// What SwapChain::acquire produces: a swap chain image plus
// semaphores protecting it.
struct SwapChainAcquireState
{
// The image and its view and index in the swap chain.
VkImage image;
VkImageView view;
uint32_t index;
// MUST wait on this semaphore before writing to the image. ("The
// system" signals this semaphore when it's done presenting the
// image and can safely be reused).
VkSemaphore waitSem;
// MUST signal this semaphore when done writing to the image, and
// before presenting it. (The system waits for this before presenting).
VkSemaphore signalSem;
};
class SwapChain
{
private:
struct Entry
{
VkImage image{};
VkImageView imageView{};
};
struct SemaphoreEntry
{
// be aware semaphore index may not match active image index
VkSemaphore readSemaphore{};
VkSemaphore writtenSemaphore{};
};
VkDevice m_device = VK_NULL_HANDLE;
VkPhysicalDevice m_physicalDevice = VK_NULL_HANDLE;
VkQueue m_queue{};
VkQueue m_waitQueue{}; // See waitIdle and setWaitQueue.
uint32_t m_queueFamilyIndex{0};
VkSurfaceKHR m_surface{};
VkFormat m_surfaceFormat{};
VkColorSpaceKHR m_surfaceColor{};
uint32_t m_imageCount{0};
VkSwapchainKHR m_swapchain{};
std::vector<Entry> m_entries;
std::vector<SemaphoreEntry> m_semaphores;
std::vector<VkImageMemoryBarrier> m_barriers;
// index for current image, returned by vkAcquireNextImageKHR
// vk spec: The order in which images are acquired is implementation-dependent,
// and may be different than the order the images were presented
uint32_t m_currentImage{0};
// index for current semaphore, incremented by `SwapChain::present`
uint32_t m_currentSemaphore{0};
// incremented by `SwapChain::update`, use to update other resources or track changes
uint32_t m_changeID{0};
// surface
VkExtent2D m_extent{0, 0};
// requested on update
uint32_t m_updateWidth{0};
uint32_t m_updateHeight{0};
// if the swap operation is sync'ed with monitor
bool m_vsync = false;
// if vsync is off which mode to prefer
VkPresentModeKHR m_preferredVsyncOffMode = VK_PRESENT_MODE_MAILBOX_KHR;
// usage flags for swapchain images
VkImageUsageFlags m_imageUsage{};
VkResult waitIdle()
{
if(m_waitQueue)
return vkQueueWaitIdle(m_waitQueue);
else
return vkDeviceWaitIdle(m_device);
}
// triggers device/queue wait idle
void deinitResources();
public:
SwapChain(SwapChain const&) = delete;
SwapChain& operator=(SwapChain const&) = delete;
SwapChain() {}
static constexpr VkFormat s_defaultImageFormat = VK_FORMAT_B8G8R8A8_UNORM;
static constexpr VkImageUsageFlags s_defaultImageUsage =
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
SwapChain(VkDevice device,
VkPhysicalDevice physicalDevice,
VkQueue queue,
uint32_t queueFamilyIndex,
VkSurfaceKHR surface,
VkFormat format = s_defaultImageFormat,
VkImageUsageFlags imageUsage = s_defaultImageUsage)
{
init(device, physicalDevice, queue, queueFamilyIndex, surface, format, imageUsage);
}
~SwapChain() { deinit(); }
bool init(VkDevice device,
VkPhysicalDevice physicalDevice,
VkQueue queue,
uint32_t queueFamilyIndex,
VkSurfaceKHR surface,
VkFormat format = s_defaultImageFormat,
VkImageUsageFlags imageUsage = s_defaultImageUsage);
// triggers queue/device wait idle
void deinit();
// update the swapchain configuration
// (must be called at least once after init)
// triggers queue/device wait idle
// returns actual swapchain dimensions, which may differ from requested
VkExtent2D update(int width, int height, bool vsync);
VkExtent2D update(int width, int height) { return update(width, height, m_vsync); }
// Returns true on success.
//
// Sets active index to the next swap chain image to draw to.
// The handles and semaphores for this image are optionally written to *pOut.
//
// `acquire` and `acquireAutoResize` use getActiveReadSemaphore();
// `acquireCustom` allows you to provide your own semaphore.
//
// If the swap chain was invalidated (window resized, etc.), the
// swap chain will be recreated, which triggers queue/device wait
// idle. If you are not calling `update` manually on window resize,
// you must pass the new swap image size explicitly.
//
// WARNING: The actual swap image size might not match what is
// requested; use getWidth/getHeight to check actual swap image
// size.
//
// If the swap chain was recreated, *pRecreated is set to true (if
// pRecreated != nullptr); otherwise, set to false.
//
// WARNING the swap chain could be spontaneously recreated, even if
// you are calling `update` whenever the window is resized.
bool acquire(bool* pRecreated = nullptr, SwapChainAcquireState* pOut = nullptr);
bool acquireAutoResize(int width, int height, bool* pRecreated, SwapChainAcquireState* pOut = nullptr);
// Can be made public if this functionality is needed again.
private:
bool acquireCustom(VkSemaphore semaphore, bool* pRecreated = nullptr, SwapChainAcquireState* pOut = nullptr);
bool acquireCustom(VkSemaphore semaphore, int width, int height, bool* pRecreated, SwapChainAcquireState* pOut = nullptr);
// add one to avoid accidentally missing proper fence wait prior acquire
uint32_t getSemaphoreCycleCount() const { return m_imageCount + 1; }
public:
// all present functions bump semaphore cycle
// present on provided queue
void present(VkQueue queue);
// present using a default queue from init time
void present() { present(m_queue); }
// present via a custom function
// (e.g. when extending via VkDeviceGroupPresentInfoKHR)
// fills in defaults for provided presentInfo
// with getActiveImageIndex()
// and getActiveWrittenSemaphore()
void presentCustom(VkPresentInfoKHR& outPresentInfo);
VkSemaphore getActiveReadSemaphore() const;
VkSemaphore getActiveWrittenSemaphore() const;
VkImage getActiveImage() const;
VkImageView getActiveImageView() const;
uint32_t getActiveImageIndex() const { return m_currentImage; }
uint32_t getImageCount() const { return m_imageCount; }
VkImage getImage(uint32_t i) const;
VkImageView getImageView(uint32_t i) const;
VkFormat getFormat() const { return m_surfaceFormat; }
// Get the actual size of the swap chain images.
uint32_t getWidth() const { return m_extent.width; }
uint32_t getHeight() const { return m_extent.height; }
VkExtent2D getExtent() const { return m_extent; }
// Get the requested size of the swap chain images. THIS IS RARELY USEFUL.
uint32_t getUpdateWidth() const { return m_updateWidth; }
uint32_t getUpdateHeight() const { return m_updateHeight; }
bool getVsync() const { return m_vsync; }
VkSwapchainKHR getSwapchain() const { return m_swapchain; }
// does a vkCmdPipelineBarrier for VK_IMAGE_LAYOUT_UNDEFINED to VK_IMAGE_LAYOUT_PRESENT_SRC_KHR
// must apply resource transitions after update calls
void cmdUpdateBarriers(VkCommandBuffer cmd) const;
uint32_t getChangeID() const;
// Ordinarily, `SwapChain` calls vkDeviceWaitIdle before recreating
// the swap chain. However, if setWaitQueue is called with a
// non-null queue, we only wait for that queue instead of the whole
// device. This may be needed if you are using queues in other CPU
// threads that are not synchronized to the render loop.
void setWaitQueue(VkQueue waitQueue = VK_NULL_HANDLE) { m_waitQueue = waitQueue; }
// typically either VK_PRESENT_MODE_MAILBOX_KHR or VK_PRESENT_MODE_IMMEDIATE_KHR
void setPreferredVsyncOffMode(VkPresentModeKHR mode) { m_preferredVsyncOffMode = mode; }
};
} // namespace nvvk
#endif

View file

@ -0,0 +1,201 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "vulkanhppsupport.hpp"
#include "memallocator_dedicated_vk.hpp"
#include "memorymanagement_vk.hpp"
namespace nvvk {
bool checkResult(vk::Result result, const char* message)
{
return nvvk::checkResult(VkResult(result), message);
}
bool checkResult(vk::Result result, const char* file, int32_t line)
{
return nvvk::checkResult((VkResult)result, file, line);
}
} // namespace nvvk
namespace nvvkpp {
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
ExportResourceAllocator::ExportResourceAllocator(VkDevice device, VkPhysicalDevice physicalDevice, nvvk::MemAllocator* memAllocator, VkDeviceSize stagingBlockSize)
: ResourceAllocator(device, physicalDevice, memAllocator, stagingBlockSize)
{
}
void ExportResourceAllocator::CreateBufferEx(const VkBufferCreateInfo& info_, VkBuffer* buffer)
{
VkBufferCreateInfo info = info_;
VkExternalMemoryBufferCreateInfo infoEx{VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO};
#ifdef WIN32
infoEx.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
#else
infoEx.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
#endif
info.pNext = &infoEx;
NVVK_CHECK(vkCreateBuffer(m_device, &info, nullptr, buffer));
}
void ExportResourceAllocator::CreateImageEx(const VkImageCreateInfo& info_, VkImage* image)
{
auto info = info_;
VkExternalMemoryImageCreateInfo infoEx{VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO};
#ifdef WIN32
infoEx.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
#else
infoEx.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
#endif
info.pNext = &infoEx;
NVVK_CHECK(vkCreateImage(m_device, &info, nullptr, image));
}
nvvk::MemHandle ExportResourceAllocator::AllocateMemory(const nvvk::MemAllocateInfo& allocateInfo)
{
nvvk::MemAllocateInfo exportAllocateInfo(allocateInfo);
exportAllocateInfo.setExportable(true);
return ResourceAllocator::AllocateMemory(exportAllocateInfo);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
ExportResourceAllocatorDedicated::ExportResourceAllocatorDedicated(VkDevice device,
VkPhysicalDevice physicalDevice,
VkDeviceSize stagingBlockSize /*= NVVK_DEFAULT_STAGING_BLOCKSIZE*/)
{
init(device, physicalDevice, stagingBlockSize);
}
ExportResourceAllocatorDedicated::~ExportResourceAllocatorDedicated()
{
deinit();
}
void ExportResourceAllocatorDedicated::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize /*= NVVK_DEFAULT_STAGING_BLOCKSIZE*/)
{
m_memAlloc = std::make_unique<nvvk::DedicatedMemoryAllocator>(device, physicalDevice);
ExportResourceAllocator::init(device, physicalDevice, m_memAlloc.get(), stagingBlockSize);
}
void ExportResourceAllocatorDedicated::deinit()
{
ExportResourceAllocator::deinit();
m_memAlloc.reset();
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
ExplicitDeviceMaskResourceAllocator::ExplicitDeviceMaskResourceAllocator(VkDevice device,
VkPhysicalDevice physicalDevice,
nvvk::MemAllocator* memAlloc,
uint32_t deviceMask)
{
init(device, physicalDevice, memAlloc, deviceMask);
}
void ExplicitDeviceMaskResourceAllocator::init(VkDevice device, VkPhysicalDevice physicalDevice, nvvk::MemAllocator* memAlloc, uint32_t deviceMask)
{
ResourceAllocator::init(device, physicalDevice, memAlloc);
m_deviceMask = deviceMask;
}
nvvk::MemHandle ExplicitDeviceMaskResourceAllocator::AllocateMemory(const nvvk::MemAllocateInfo& allocateInfo)
{
nvvk::MemAllocateInfo deviceMaskAllocateInfo(allocateInfo);
deviceMaskAllocateInfo.setDeviceMask(m_deviceMask);
return ResourceAllocator::AllocateMemory(deviceMaskAllocateInfo);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
ResourceAllocatorDma::ResourceAllocatorDma(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize, VkDeviceSize memBlockSize)
{
init(device, physicalDevice, stagingBlockSize, memBlockSize);
}
ResourceAllocatorDma::~ResourceAllocatorDma()
{
deinit();
}
void ResourceAllocatorDma::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize, VkDeviceSize memBlockSize)
{
m_dma = std::make_unique<nvvk::DeviceMemoryAllocator>(device, physicalDevice, memBlockSize);
ResourceAllocator::init(device, physicalDevice, m_dma.get(), stagingBlockSize);
}
void ResourceAllocatorDma::init(VkInstance, VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize, VkDeviceSize memBlockSize)
{
init(device, physicalDevice, stagingBlockSize, memBlockSize);
}
void ResourceAllocatorDma::deinit()
{
ResourceAllocator::deinit();
m_dma.reset();
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
ResourceAllocatorDedicated::ResourceAllocatorDedicated(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize)
{
init(device, physicalDevice, stagingBlockSize);
}
ResourceAllocatorDedicated::~ResourceAllocatorDedicated()
{
deinit();
}
void ResourceAllocatorDedicated::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize)
{
m_memAlloc = std::make_unique<nvvk::DedicatedMemoryAllocator>(device, physicalDevice);
ResourceAllocator::init(device, physicalDevice, m_memAlloc.get(), stagingBlockSize);
}
void ResourceAllocatorDedicated::init(VkInstance, // unused
VkDevice device,
VkPhysicalDevice physicalDevice,
VkDeviceSize stagingBlockSize /*= NVVK_DEFAULT_STAGING_BLOCKSIZE*/)
{
init(device, physicalDevice, stagingBlockSize);
}
void ResourceAllocatorDedicated::deinit()
{
ResourceAllocator::deinit();
m_memAlloc.reset();
}
} // namespace nvvkpp

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,58 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#if NVP_SUPPORTS_OPENGL
#include "vulkanhppsupport_vkgl.hpp"
namespace nvvkpp {
ResourceAllocatorGLInterop::ResourceAllocatorGLInterop(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize)
{
init(device, physicalDevice, stagingBlockSize);
}
ResourceAllocatorGLInterop::~ResourceAllocatorGLInterop()
{
deinit();
}
void ResourceAllocatorGLInterop::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize)
{
m_dmaGL = std::make_unique<nvvk::DeviceMemoryAllocatorGL>(device, physicalDevice);
nvvkpp::ExportResourceAllocator::init(device, physicalDevice, m_dmaGL.get(), stagingBlockSize);
// The staging will only use DMA, without export functionality.
m_dma = std::make_unique<nvvk::DeviceMemoryAllocator>(device, physicalDevice);
m_staging = std::make_unique<nvvk::StagingMemoryManager>(dynamic_cast<nvvk::MemAllocator*>(m_dma.get()), stagingBlockSize);
}
void ResourceAllocatorGLInterop::deinit()
{
nvvkpp::ExportResourceAllocator::deinit();
m_dmaGL.reset();
m_dma.reset();
}
nvvk::AllocationGL ResourceAllocatorGLInterop::getAllocationGL(nvvk::MemHandle memHandle) const
{
return m_dmaGL->getAllocationGL(m_dmaGL->getAllocationID(memHandle));
}
} // namespace nvvkpp
#endif

View file

@ -0,0 +1,56 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#if NVP_SUPPORTS_OPENGL
#pragma once
#include "memorymanagement_vkgl.hpp" // This needs to be first to not break the build
#include "nvvk/vulkanhppsupport.hpp"
namespace nvvkpp {
/** @DOC_START
# class nvvkpp::ResourceAllocatorGLInterop
> ResourceAllocatorGLInterop is a helper class to manage Vulkan and OpenGL memory allocation and interop.
This class is a wrapper around the `nvvk::DeviceMemoryAllocatorGL` and `nvvk::DeviceMemoryAllocator` classes, which are used to allocate memory for Vulkan and OpenGL resources.
@DOC_END */
class ResourceAllocatorGLInterop : public ExportResourceAllocator
{
public:
ResourceAllocatorGLInterop() = default;
ResourceAllocatorGLInterop(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
~ResourceAllocatorGLInterop();
void init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
void deinit();
nvvk::DeviceMemoryAllocatorGL& getDmaGL() const { return *m_dmaGL; }
nvvk::AllocationGL getAllocationGL(nvvk::MemHandle memHandle) const;
protected:
std::unique_ptr<nvvk::DeviceMemoryAllocatorGL> m_dmaGL;
std::unique_ptr<nvvk::DeviceMemoryAllocator> m_dma;
};
} // namespace nvvkpp
#endif