cleanup and refactoring

2024-05-25 11:53:25 +02:00 · 2024-05-25 11:53:25 +02:00 · 76f6bf62a4
commit 76f6bf62a4
parent 2302158928
1285 changed files with 757994 additions and 8 deletions
--- a/raytracer/nvpro_core/nvvk/README.md
+++ b/raytracer/nvpro_core/nvvk/README.md
--- a/raytracer/nvpro_core/nvvk/appwindowprofiler_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/appwindowprofiler_vk.cpp
@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include "appwindowprofiler_vk.hpp"
+#include "context_vk.hpp"
+#include "error_vk.hpp"
+
+#include <nvh/misc.hpp>
+#include <nvh/nvprint.hpp>
+
+#define GLFW_INCLUDE_NONE
+#include <GLFW/glfw3.h>
+
+#ifdef _WIN32
+#define GLFW_EXPOSE_NATIVE_WIN32
+#include <GLFW/glfw3native.h>
+#include <vulkan/vulkan_win32.h>
+#else
+#define GLFW_EXPOSE_NATIVE_X11
+#include <GLFW/glfw3native.h>
+#include <xcb/xcb.h>
+
+// Prevent clang format from "organizing" the includes.
+
+#include <vulkan/vulkan_xcb.h>
+#endif
+
+namespace nvvk {
+
+void AppWindowProfilerVK::contextInit()
+{
+  //m_contextWindow.init(&m_deviceInfo, this);
+  ContextCreateInfo contextInfo = m_contextInfo;
+  m_swapVsync                   = false;
+
+  contextInfo.addInstanceExtension(VK_KHR_SURFACE_EXTENSION_NAME, false);
+#ifdef _WIN32
+  contextInfo.addInstanceExtension(VK_KHR_WIN32_SURFACE_EXTENSION_NAME, false);
+#else
+  contextInfo.addInstanceExtension(VK_KHR_XCB_SURFACE_EXTENSION_NAME, false);
+#endif
+  contextInfo.addDeviceExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, false);
+
+  if(!m_context.init(contextInfo))
+  {
+    LOGE("FATAL ERROR: failed to create Vulkan context\n");
+    exit(-1);
+    return;
+  }
+
+  // Construct the surface description:
+  VkResult result;
+#ifdef _WIN32
+  HWND      hWnd      = glfwGetWin32Window(m_internal);
+  HINSTANCE hInstance = GetModuleHandle(NULL);
+
+  VkWin32SurfaceCreateInfoKHR createInfo = {};
+  createInfo.sType                       = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR;
+  createInfo.pNext                       = NULL;
+  createInfo.hinstance                   = hInstance;
+  createInfo.hwnd                        = hWnd;
+  result = vkCreateWin32SurfaceKHR(m_context.m_instance, &createInfo, nullptr, &m_surface);
+#else   // _WIN32
+  result = glfwCreateWindowSurface(m_context.m_instance, m_internal, NULL, &m_surface);
+#endif  // _WIN32
+  assert(result == VK_SUCCESS);
+
+  m_context.setGCTQueueWithPresent(m_surface);
+
+  m_swapChain.init(m_context.m_device, m_context.m_physicalDevice, m_context.m_queueGCT, m_context.m_queueGCT.familyIndex, m_surface);
+  m_swapChain.update(getWidth(), getHeight(), m_swapVsync);
+  m_windowState.m_swapSize[0] = m_swapChain.getWidth();
+  m_windowState.m_swapSize[1] = m_swapChain.getHeight();
+
+  m_profilerVK.init(m_context.m_device, m_context.m_physicalDevice);
+  m_profilerVK.setLabelUsage(m_context.hasInstanceExtension(VK_EXT_DEBUG_UTILS_EXTENSION_NAME));
+}
+
+void AppWindowProfilerVK::contextDeinit()
+{
+  VkResult result = vkDeviceWaitIdle(m_context.m_device);
+  if(nvvk::checkResult(result, __FILE__, __LINE__))
+  {
+    exit(-1);
+  }
+  m_profilerVK.deinit();
+  m_swapChain.deinit();
+  vkDestroySurfaceKHR(m_context.m_instance, m_surface, nullptr);
+  m_context.deinit();
+}
+
+void AppWindowProfilerVK::contextSync()
+{
+  VkResult result = vkDeviceWaitIdle(m_context.m_device);
+  if(nvvk::checkResult(result, __FILE__, __LINE__))
+  {
+    exit(-1);
+  }
+}
+
+void AppWindowProfilerVK::swapResize(int width, int height)
+{
+  if((m_swapChain.getUpdateWidth() != width) || (m_swapChain.getUpdateHeight() != height))
+  {
+    m_swapChain.update(width, height, m_swapVsync);
+    m_windowState.m_swapSize[0] = m_swapChain.getWidth();
+    m_windowState.m_swapSize[1] = m_swapChain.getHeight();
+  }
+}
+void AppWindowProfilerVK::swapPrepare()
+{
+  if(!m_swapChain.acquire())
+  {
+    LOGE("error: vulkan swapchain acqiure failed, try -vsync 1\n");
+    exit(-1);
+  }
+}
+
+void AppWindowProfilerVK::swapBuffers()
+{
+  m_swapChain.present(m_context.m_queueGCT);
+}
+
+void AppWindowProfilerVK::swapVsync(bool swapVsync)
+{
+  if(m_swapVsync != swapVsync)
+  {
+    m_swapChain.update(getWidth(), getHeight(), swapVsync);
+    m_swapVsync = swapVsync;
+  }
+}
+
+const char* AppWindowProfilerVK::contextGetDeviceName()
+{
+  return m_context.m_physicalInfo.properties10.deviceName;
+}
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/appwindowprofiler_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/appwindowprofiler_vk.hpp
@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#ifndef NV_WINDOWPROFILER_GL_INCLUDED
+#define NV_WINDOWPROFILER_GL_INCLUDED
+
+#include <nvh/appwindowprofiler.hpp>
+
+#include <nvvk/context_vk.hpp>
+#include <nvvk/profiler_vk.hpp>
+#include <nvvk/swapchain_vk.hpp>
+
+
+namespace nvvk {
+
+//////////////////////////////////////////////////////////////////////////
+/** @DOC_START
+  # class nvvk::AppWindowProfilerVK
+
+  nvvk::AppWindowProfilerVK derives from nvh::AppWindowProfiler
+  and overrides the context and swapbuffer functions.
+  The nvh class itself provides several utilities and 
+  command line options to run automated benchmarks etc.
+  
+  To influence the vulkan instance/device creation modify 
+  `m_contextInfo` prior running AppWindowProfiler::run,
+  which triggers instance, device, window, swapchain creation etc.
+
+  The class comes with a nvvk::ProfilerVK instance that references the 
+  AppWindowProfiler::m_profiler's data.
+@DOC_END */
+
+#define NV_PROFILE_VK_SECTION(name, cmd) const nvvk::ProfilerVK::Section _tempTimer(m_profilerVK, name, cmd)
+#define NV_PROFILE_VK_SPLIT() m_profilerVK.accumulationSplit()
+
+class AppWindowProfilerVK : public nvh::AppWindowProfiler
+{
+public:
+  AppWindowProfilerVK(bool singleThreaded = true)
+      : nvh::AppWindowProfiler(singleThreaded)
+      , m_profilerVK(&m_profiler)
+  {
+  }
+
+  bool              m_swapVsync = false;
+  ContextCreateInfo m_contextInfo{};
+  Context           m_context{};
+  SwapChain         m_swapChain{};
+  VkSurfaceKHR      m_surface{};
+  ProfilerVK        m_profilerVK{};
+
+  int run(const std::string& name, int argc, const char** argv, int width, int height)
+  {
+    return AppWindowProfiler::run(name, argc, argv, width, height, false);
+  }
+
+  virtual void        contextInit() override;
+  virtual void        contextDeinit() override;
+  virtual void        contextSync() override;
+  virtual const char* contextGetDeviceName() override;
+
+  virtual void swapResize(int width, int height) override;
+  virtual void swapPrepare() override;
+  virtual void swapBuffers() override;
+  virtual void swapVsync(bool state) override;
+};
+}  // namespace nvvk
+
+
+#endif
--- a/raytracer/nvpro_core/nvvk/buffers_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/buffers_vk.hpp
@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#pragma once
+
+#include <platform.h>
+
+#include <vector>
+#include <vulkan/vulkan_core.h>
+
+namespace nvvk {
+
+//////////////////////////////////////////////////////////////////////////
+/** @DOC_START
+  The utilities in this file provide a more direct approach, we encourage to use
+  higher-level mechanisms also provided in the allocator / memorymanagement classes.
+
+  # functions in nvvk
+
+  - makeBufferCreateInfo : wraps setup of VkBufferCreateInfo (implicitly sets VK_BUFFER_USAGE_TRANSFER_DST_BIT)
+  - makeBufferViewCreateInfo : wraps setup of VkBufferViewCreateInfo
+  - createBuffer : wraps vkCreateBuffer
+  - createBufferView : wraps vkCreateBufferView
+  - getBufferDeviceAddressKHR : wraps vkGetBufferDeviceAddressKHR
+  - getBufferDeviceAddress : wraps vkGetBufferDeviceAddress
+
+  ```cpp
+  VkBufferCreateInfo bufferCreate = makeBufferCreateInfo (size, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT);
+  VkBuffer buffer                 = createBuffer(device, bufferCreate);
+  VkBufferView bufferView         = createBufferView(device, makeBufferViewCreateInfo(buffer, VK_FORMAT_R8G8B8A8_UNORM, size));
+  ```
+@DOC_END */
+
+// implicitly sets VK_BUFFER_USAGE_TRANSFER_DST_BIT
+inline VkBufferCreateInfo makeBufferCreateInfo(VkDeviceSize size, VkBufferUsageFlags usage, VkBufferCreateFlags flags = 0)
+{
+  VkBufferCreateInfo createInfo = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
+  createInfo.size               = size;
+  createInfo.usage              = usage | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+  createInfo.flags              = flags;
+
+  return createInfo;
+}
+
+inline VkBufferViewCreateInfo makeBufferViewCreateInfo(VkBuffer                buffer,
+                                                       VkFormat                format,
+                                                       VkDeviceSize            range,
+                                                       VkDeviceSize            offset = 0,
+                                                       VkBufferViewCreateFlags flags  = 0)
+{
+  VkBufferViewCreateInfo createInfo = {VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO};
+  createInfo.buffer                 = buffer;
+  createInfo.offset                 = offset;
+  createInfo.range                  = range;
+  createInfo.flags                  = flags;
+  createInfo.format                 = format;
+
+  return createInfo;
+}
+
+inline VkBufferViewCreateInfo makeBufferViewCreateInfo(const VkDescriptorBufferInfo& descrInfo,
+                                                       VkFormat                      fmt,
+                                                       VkBufferViewCreateFlags       flags = 0)
+{
+  VkBufferViewCreateInfo createInfo = {VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO};
+  createInfo.buffer                 = descrInfo.buffer;
+  createInfo.offset                 = descrInfo.offset;
+  createInfo.range                  = descrInfo.range;
+  createInfo.flags                  = flags;
+  createInfo.format                 = fmt;
+
+  return createInfo;
+}
+
+
+inline VkDeviceAddress getBufferDeviceAddressKHR(VkDevice device, VkBuffer buffer)
+{
+  if(buffer == VK_NULL_HANDLE)
+    return 0ULL;
+
+  VkBufferDeviceAddressInfo info = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_KHR};
+  info.buffer                    = buffer;
+  return vkGetBufferDeviceAddressKHR(device, &info);
+}
+
+inline VkDeviceAddress getBufferDeviceAddress(VkDevice device, VkBuffer buffer)
+{
+  if(buffer == VK_NULL_HANDLE)
+    return 0ULL;
+
+  VkBufferDeviceAddressInfo info = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO};
+  info.buffer                    = buffer;
+  return vkGetBufferDeviceAddress(device, &info);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// these use pass by value so one can easily chain createBuffer(device, makeBufferCreateInfo(...));
+
+inline VkBuffer createBuffer(VkDevice device, VkBufferCreateInfo info)
+{
+  VkBuffer buffer;
+  VkResult result = vkCreateBuffer(device, &info, nullptr, &buffer);
+  assert(result == VK_SUCCESS);
+  return buffer;
+}
+
+inline VkBufferView createBufferView(VkDevice device, VkBufferViewCreateInfo info)
+{
+  VkBufferView bufferView;
+  VkResult     result = vkCreateBufferView(device, &info, nullptr, &bufferView);
+  assert(result == VK_SUCCESS);
+  return bufferView;
+}
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/buffersuballocator_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/buffersuballocator_vk.cpp
@ -0,0 +1,357 @@
+/*
+* Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*
+* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+* SPDX-License-Identifier: Apache-2.0
+*/
+
+
+#include <assert.h>
+#include "buffersuballocator_vk.hpp"
+#include "debug_util_vk.hpp"
+#include "error_vk.hpp"
+
+namespace nvvk {
+
+//////////////////////////////////////////////////////////////////////////
+
+void BufferSubAllocator::init(MemAllocator*                memAllocator,
+                              VkDeviceSize                 blockSize,
+                              VkBufferUsageFlags           bufferUsageFlags,
+                              VkMemoryPropertyFlags        memPropFlags,
+                              bool                         mapped,
+                              const std::vector<uint32_t>& sharingQueueFamilyIndices)
+{
+  assert(!m_device);
+  m_memAllocator     = memAllocator;
+  m_device           = memAllocator->getDevice();
+  m_blockSize        = std::min(blockSize, ((uint64_t(1) << Handle::BLOCKBITS) - 1) * uint64_t(BASE_ALIGNMENT));
+  m_bufferUsageFlags = bufferUsageFlags;
+  m_memoryPropFlags  = memPropFlags;
+  m_memoryTypeIndex  = ~0;
+  m_keepLastBlock    = true;
+  m_mapped           = mapped;
+  m_sharingQueueFamilyIndices = sharingQueueFamilyIndices;
+
+  m_freeBlockIndex = INVALID_ID_INDEX;
+  m_usedSize       = 0;
+  m_allocatedSize  = 0;
+}
+
+void BufferSubAllocator::deinit()
+{
+  if(!m_memAllocator)
+    return;
+
+  free(false);
+
+  m_blocks.clear();
+  m_memAllocator = nullptr;
+}
+
+BufferSubAllocator::Handle BufferSubAllocator::subAllocate(VkDeviceSize size, uint32_t align)
+{
+  uint32_t usedOffset;
+  uint32_t usedSize;
+  uint32_t usedAligned;
+
+  uint32_t blockIndex = INVALID_ID_INDEX;
+
+  // if size either doesn't fit in the bits within the handle
+  // or we are bigger than the default block size, we use a full dedicated block
+  // for this allocation
+  bool isDedicated = Handle::needsDedicated(size, align) || size > m_blockSize;
+
+  if(!isDedicated)
+  {
+    // Find the first non-dedicated block that can fit the allocation
+    for(uint32_t i = 0; i < (uint32_t)m_blocks.size(); i++)
+    {
+      Block& block = m_blocks[i];
+      if(!block.isDedicated && block.buffer && block.range.subAllocate((uint32_t)size, align, usedOffset, usedAligned, usedSize))
+      {
+        blockIndex = block.index;
+        break;
+      }
+    }
+  }
+
+  if(blockIndex == INVALID_ID_INDEX)
+  {
+    if(m_freeBlockIndex != INVALID_ID_INDEX)
+    {
+      Block& block     = m_blocks[m_freeBlockIndex];
+      m_freeBlockIndex = setIndexValue(block.index, m_freeBlockIndex);
+
+      blockIndex = block.index;
+    }
+    else
+    {
+      uint32_t newIndex = (uint32_t)m_blocks.size();
+      m_blocks.resize(m_blocks.size() + 1);
+      Block& block = m_blocks[newIndex];
+      block.index  = newIndex;
+
+      blockIndex = newIndex;
+    }
+
+    Block& block = m_blocks[blockIndex];
+    block.size   = std::max(m_blockSize, size);
+    if(!isDedicated)
+    {
+      // only adjust size if not dedicated.
+      // warning this lowers from 64 bit to 32 bit size, which should be fine given
+      // such big allocations will trigger the dedicated path
+      block.size = block.range.alignedSize((uint32_t)block.size);
+    }
+
+    VkResult result = allocBlock(block, blockIndex, block.size);
+    NVVK_CHECK(result);
+    if(result != VK_SUCCESS)
+    {
+      freeBlock(block);
+      return Handle();
+    }
+
+    block.isDedicated = isDedicated;
+
+    if(!isDedicated)
+    {
+      // Dedicated blocks don't allow for subranges, so don't initialize the range allocator
+      block.range.init((uint32_t)block.size);
+      block.range.subAllocate((uint32_t)size, align, usedOffset, usedAligned, usedSize);
+      m_regularBlocks++;
+    }
+  }
+
+  Handle sub;
+  if(!sub.setup(blockIndex, isDedicated ? 0 : usedOffset, isDedicated ? size : uint64_t(usedSize), isDedicated))
+  {
+    return Handle();
+  }
+
+  // append used space for stats
+  m_usedSize += sub.getSize();
+
+  return sub;
+}
+
+void BufferSubAllocator::subFree(Handle sub)
+{
+  if(!sub)
+    return;
+
+  Block& block       = getBlock(sub.blockIndex);
+  bool   isDedicated = sub.isDedicated();
+  if(!isDedicated)
+  {
+    block.range.subFree(uint32_t(sub.getOffset()), uint32_t(sub.getSize()));
+  }
+
+  m_usedSize -= sub.getSize();
+
+  if(isDedicated || (block.range.isEmpty() && (!m_keepLastBlock || m_regularBlocks > 1)))
+  {
+    if(!isDedicated)
+    {
+      m_regularBlocks--;
+    }
+    freeBlock(block);
+  }
+}
+
+float BufferSubAllocator::getUtilization(VkDeviceSize& allocatedSize, VkDeviceSize& usedSize) const
+{
+  allocatedSize = m_allocatedSize;
+  usedSize      = m_usedSize;
+
+  return float(double(usedSize) / double(allocatedSize));
+}
+
+bool BufferSubAllocator::fitsInAllocated(VkDeviceSize size, uint32_t alignment) const
+{
+  if(Handle::needsDedicated(size, alignment))
+  {
+    return false;
+  }
+
+  for(const auto& block : m_blocks)
+  {
+    if(block.buffer && !block.isDedicated)
+    {
+      if(block.range.isAvailable((uint32_t)size, (uint32_t)alignment))
+      {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+void BufferSubAllocator::free(bool onlyEmpty)
+{
+  for(uint32_t i = 0; i < (uint32_t)m_blocks.size(); i++)
+  {
+    Block& block = m_blocks[i];
+    if(block.buffer && (!onlyEmpty || (!block.isDedicated && block.range.isEmpty())))
+    {
+      freeBlock(block);
+    }
+  }
+
+  if(!onlyEmpty)
+  {
+    m_blocks.clear();
+    m_freeBlockIndex = INVALID_ID_INDEX;
+  }
+}
+
+void BufferSubAllocator::freeBlock(Block& block)
+{
+  m_allocatedSize -= block.size;
+
+  vkDestroyBuffer(m_device, block.buffer, nullptr);
+  if(block.mapping)
+  {
+    m_memAllocator->unmap(block.memory);
+  }
+  m_memAllocator->freeMemory(block.memory);
+
+  if(!block.isDedicated)
+  {
+    block.range.deinit();
+  }
+  block.memory      = NullMemHandle;
+  block.buffer      = VK_NULL_HANDLE;
+  block.mapping     = nullptr;
+  block.isDedicated = false;
+
+  // update the block.index with the current head of the free list
+  // pop its old value
+  m_freeBlockIndex = setIndexValue(block.index, m_freeBlockIndex);
+}
+
+VkResult BufferSubAllocator::allocBlock(Block& block, uint32_t index, VkDeviceSize size)
+{
+
+  std::string debugName = m_debugName + ":block:" + std::to_string(index);
+
+  VkResult           result;
+  VkBufferCreateInfo createInfo = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
+  createInfo.size               = size;
+  createInfo.usage              = m_bufferUsageFlags;
+  createInfo.sharingMode = m_sharingQueueFamilyIndices.size() > 1 ? VK_SHARING_MODE_CONCURRENT : VK_SHARING_MODE_EXCLUSIVE;
+  createInfo.pQueueFamilyIndices   = m_sharingQueueFamilyIndices.data();
+  createInfo.queueFamilyIndexCount = static_cast<uint32_t>(m_sharingQueueFamilyIndices.size());
+
+  VkBuffer buffer = VK_NULL_HANDLE;
+  result          = vkCreateBuffer(m_device, &createInfo, nullptr, &buffer);
+  if(result != VK_SUCCESS)
+  {
+    NVVK_CHECK(result);
+    return result;
+  }
+  nvvk::DebugUtil(m_device).setObjectName(buffer, debugName);
+
+  VkMemoryRequirements2           memReqs    = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+  VkBufferMemoryRequirementsInfo2 bufferReqs = {VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2};
+
+  bufferReqs.buffer = buffer;
+  vkGetBufferMemoryRequirements2(m_device, &bufferReqs, &memReqs);
+
+
+  if(m_memoryTypeIndex == ~0)
+  {
+    VkPhysicalDeviceMemoryProperties memoryProperties;
+    vkGetPhysicalDeviceMemoryProperties(m_memAllocator->getPhysicalDevice(), &memoryProperties);
+
+    VkMemoryPropertyFlags memProps = m_memoryPropFlags;
+
+    // Find an available memory type that satisfies the requested properties.
+    for(uint32_t memoryTypeIndex = 0; memoryTypeIndex < memoryProperties.memoryTypeCount; ++memoryTypeIndex)
+    {
+      if((memReqs.memoryRequirements.memoryTypeBits & (1 << memoryTypeIndex))
+         && (memoryProperties.memoryTypes[memoryTypeIndex].propertyFlags & memProps) == memProps)
+      {
+        m_memoryTypeIndex = memoryTypeIndex;
+        break;
+      }
+    }
+  }
+
+  if(m_memoryTypeIndex == ~0)
+  {
+    assert(0 && "could not find memoryTypeIndex\n");
+    vkDestroyBuffer(m_device, buffer, nullptr);
+    return VK_ERROR_INCOMPATIBLE_DRIVER;
+  }
+
+  MemAllocateInfo memAllocateInfo(memReqs.memoryRequirements, m_memoryPropFlags, false);
+  memAllocateInfo.setDebugName(debugName);
+
+  MemHandle memory = m_memAllocator->allocMemory(memAllocateInfo, &result);
+  if(result != VK_SUCCESS)
+  {
+    assert(0 && "could not allocate buffer\n");
+    vkDestroyBuffer(m_device, buffer, nullptr);
+    return result;
+  }
+
+  MemAllocator::MemInfo memInfo = m_memAllocator->getMemoryInfo(memory);
+
+  VkBindBufferMemoryInfo bindInfos = {VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO};
+  bindInfos.buffer                 = buffer;
+  bindInfos.memory                 = memInfo.memory;
+  bindInfos.memoryOffset           = memInfo.offset;
+
+  result = vkBindBufferMemory2(m_device, 1, &bindInfos);
+
+  if(result == VK_SUCCESS)
+  {
+    if(m_mapped)
+    {
+      block.mapping = m_memAllocator->mapT<uint8_t>(memory);
+    }
+    else
+    {
+      block.mapping = nullptr;
+    }
+
+    if(!m_mapped || block.mapping)
+    {
+      if(m_bufferUsageFlags & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT)
+      {
+        VkBufferDeviceAddressInfo info = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO};
+        info.buffer                    = buffer;
+        block.address                  = vkGetBufferDeviceAddress(m_device, &info);
+      }
+
+      block.memory = memory;
+      block.buffer = buffer;
+      m_allocatedSize += block.size;
+      return result;
+    }
+  }
+
+  // error case
+  NVVK_CHECK(result);
+  vkDestroyBuffer(m_device, buffer, nullptr);
+  m_memAllocator->freeMemory(memory);
+  return result;
+}
+
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/buffersuballocator_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/buffersuballocator_vk.hpp
@ -0,0 +1,281 @@
+/*
+* Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*
+* SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+* SPDX-License-Identifier: Apache-2.0
+*/
+
+
+#pragma once
+
+#include <platform.h>
+
+#include <vector>
+#include <string>
+#include <vulkan/vulkan_core.h>
+#include <nvh/trangeallocator.hpp>
+#include "memallocator_vk.hpp"
+
+namespace nvvk {
+
+//////////////////////////////////////////////////////////////////
+/** @DOC_START
+  # class nvvk::BufferSubAllocator
+
+  nvvk::BufferSubAllocator provides buffer sub allocation using larger buffer blocks.
+  The blocks are one VkBuffer each and are allocated via the 
+  provided [nvvk::MemAllocator](#class-nvvkmemallocator).
+
+  The requested buffer space is sub-allocated and recycled in blocks internally.
+  This way we avoid creating lots of small VkBuffers and can avoid calling the Vulkan
+  API at all, when there are blocks with sufficient empty space. 
+  While Vulkan is more efficient than previous APIs, creating lots
+  of objects for it, is still not good for overall performance. It will result 
+  into more cache misses and use more system memory over all.
+
+  Be aware that each sub-allocation is always BASE_ALIGNMENT aligned.
+  A custom alignment during allocation can be requested, it will ensure
+  that the returned sub-allocation range of offset & size can account for 
+  the original requested size fitting within and respecting the requested
+
+  This, however, means the regular offset and may not match the requested 
+  alignment, and the regular size can be bigger to account for the shift
+  caused by manual alignment.
+  
+  It is therefore necessary to pass the alignment that was used at allocation time
+  to the query functions as well.
+
+  ```cpp
+  // alignment <= BASE_ALIGNMENT
+      handle  = subAllocator.subAllocate(size);
+      binding = subAllocator.getSubBinding(handle);
+
+  // alignment > BASE_ALIGNMENT
+      handle  = subAllocator.subAllocate(size, alignment);
+      binding = subAllocator.getSubBinding(handle, alignment);
+  ```
+@DOC_END */
+
+class BufferSubAllocator
+{
+private:
+  static const uint32_t INVALID_ID_INDEX = ~0;
+  static const uint32_t BASE_ALIGNMENT   = 16;  // could compromise between max block size and typical requests
+
+public:
+  class Handle
+  {
+    friend class BufferSubAllocator;
+
+  private:
+    static const uint32_t BLOCKBITS = 26;
+
+    // if we cannot pack size and offset each into 26 bits (after adjusting for base alignment)
+    // we need a dedicated block just for this
+    static bool needsDedicated(uint64_t size, uint64_t alignment)
+    {
+      return ((size + (alignment > 16 ? alignment : 0)) >= (uint64_t((1 << BLOCKBITS)) * uint64_t(BASE_ALIGNMENT)));
+    }
+
+    union
+    {
+      struct
+      {
+        uint64_t blockIndex : 11;  // 2047 blocks, typical blockSize 64 MB or more, should be enough
+        uint64_t offset : BLOCKBITS;
+        uint64_t size : BLOCKBITS;
+        uint64_t dedicated : 1;  // 0 dedicated or not
+      };
+      uint64_t raw;
+    };
+
+    uint64_t getOffset() const { return dedicated == 1 ? 0 : offset * uint64_t(BASE_ALIGNMENT); }
+    uint64_t getSize() const { return dedicated == 1 ? offset + (size << BLOCKBITS) : size * uint64_t(BASE_ALIGNMENT); }
+    uint32_t getBlockIndex() const { return uint32_t(blockIndex); }
+    bool     isDedicated() const { return dedicated == 1; }
+
+    bool setup(uint32_t blockIndex_, uint64_t offset_, uint64_t size_, bool dedicated_)
+    {
+      const uint64_t blockBitsMask = ((1ULL << BLOCKBITS) - 1);
+      assert((blockIndex_ & ~((1ULL << 11) - 1)) == 0);
+      blockIndex = blockIndex_ & ((1ULL << 11) - 1);
+      if(dedicated_)
+      {
+        dedicated = 1;
+        offset    = size_ & blockBitsMask;
+        size      = (size_ >> BLOCKBITS) & blockBitsMask;
+      }
+      else
+      {
+        dedicated = 0;
+        offset    = (offset_ / uint64_t(BASE_ALIGNMENT)) & blockBitsMask;
+        size      = (size_ / uint64_t(BASE_ALIGNMENT)) & blockBitsMask;
+      }
+
+      return (getBlockIndex() == blockIndex_ && getOffset() == offset_ && getSize() == size_);
+    }
+
+  public:
+    Handle() { raw = ~uint64_t(0); }
+
+    bool isValid() const { return raw != ~uint64_t(0); }
+    bool isEqual(const Handle& other) const
+    {
+      return blockIndex == other.blockIndex && offset == other.offset && dedicated == other.dedicated && size == other.size;
+    }
+
+    explicit operator bool() const { return isValid(); }
+
+    friend bool operator==(const Handle& lhs, const Handle& rhs) { return rhs.isEqual(lhs); }
+  };
+
+  //////////////////////////////////////////////////////////////////////////
+  BufferSubAllocator(BufferSubAllocator const&)            = delete;
+  BufferSubAllocator& operator=(BufferSubAllocator const&) = delete;
+
+  BufferSubAllocator() { m_debugName = "nvvk::BufferSubAllocator:" + std::to_string((uint64_t)this); }
+  BufferSubAllocator(MemAllocator*                memAllocator,
+                     VkDeviceSize                 blockSize,
+                     VkBufferUsageFlags           bufferUsageFlags,
+                     VkMemoryPropertyFlags        memPropFlags              = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+                     bool                         mapped                    = false,
+                     const std::vector<uint32_t>& sharingQueueFamilyIndices = std::vector<uint32_t>())
+  {
+    init(memAllocator, blockSize, bufferUsageFlags, memPropFlags, mapped, sharingQueueFamilyIndices);
+  }
+
+  ~BufferSubAllocator() { deinit(); }
+
+  void init(MemAllocator*                memallocator,
+            VkDeviceSize                 blockSize,
+            VkBufferUsageFlags           bufferUsageFlags,
+            VkMemoryPropertyFlags        memPropFlags  = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+            bool                         mapped        = false,
+            const std::vector<uint32_t>& sharingQueues = std::vector<uint32_t>());
+  void deinit();
+  void setDebugName(const std::string& name) { m_debugName = name; }
+
+  void setKeepLastBlockOnFree(bool state) { m_keepLastBlock = state; }
+
+  // alignment will be BASE_ALIGNMENT byte at least
+  // alignment must be power of 2
+  Handle subAllocate(VkDeviceSize size, uint32_t alignment = BASE_ALIGNMENT);
+  void   subFree(Handle sub);
+
+  struct Binding
+  {
+    VkBuffer        buffer;
+    uint64_t        offset;
+    uint64_t        size;
+    VkDeviceAddress address;
+  };
+
+  // sub allocation was aligned to BASE_ALIGNMENT
+  Binding getSubBinding(Handle handle)
+  {
+    Binding binding;
+    binding.offset  = handle.getOffset();
+    binding.size    = handle.getSize();
+    binding.buffer  = m_blocks[handle.getBlockIndex()].buffer;
+    binding.address = m_blocks[handle.getBlockIndex()].address + binding.offset;
+
+    return binding;
+  }
+  // sub allocation alignment was custom
+  Binding getSubBinding(Handle handle, uint32_t alignment)
+  {
+    Binding binding;
+    binding.offset  = (handle.getOffset() + (uint64_t(alignment) - 1)) & ~(uint64_t(alignment) - 1);
+    binding.size    = handle.getSize() - (binding.offset - handle.getOffset());
+    binding.buffer  = m_blocks[handle.getBlockIndex()].buffer;
+    binding.address = m_blocks[handle.getBlockIndex()].address + binding.offset;
+
+    return binding;
+  }
+
+  void* getSubMapping(Handle handle, uint32_t alignment = BASE_ALIGNMENT) const
+  {
+    return m_blocks[handle.getBlockIndex()].mapping
+           + ((handle.getOffset() + (uint64_t(alignment) - 1)) & ~(uint64_t(alignment) - 1));
+  }
+
+  uint32_t getSubBlockIndex(Handle handle) const { return handle.getBlockIndex(); }
+  VkBuffer getBlockBuffer(uint32_t blockIndex) const { return m_blocks[blockIndex].buffer; }
+
+  float getUtilization(VkDeviceSize& allocatedSize, VkDeviceSize& usedSize) const;
+  bool  fitsInAllocated(VkDeviceSize size, uint32_t alignment = BASE_ALIGNMENT) const;
+
+  void free(bool onlyEmpty);
+
+protected:
+  // - Block stores VkBuffers that we sub-allocate the staging space from
+
+  // To recycle Block structures within the arrays
+  // we use a linked list of array indices. The "index" element
+  // in the struct refers to the next free list item, or itself
+  // when in use.
+  // A block is "dedicated" if it only holds a single allocation.
+  // This can happen if we cannot encode the offset/size into the
+  // bits that the Handle provides for this, or when the size
+  // of the allocation is bigger than our preferred block size.
+
+  struct Block
+  {
+    uint32_t                             index  = INVALID_ID_INDEX;
+    VkDeviceSize                         size   = 0;
+    VkBuffer                             buffer = VK_NULL_HANDLE;
+    nvh::TRangeAllocator<BASE_ALIGNMENT> range;
+    MemHandle                            memory      = NullMemHandle;
+    uint8_t*                             mapping     = nullptr;
+    VkDeviceAddress                      address     = 0;
+    bool                                 isDedicated = false;
+  };
+
+  MemAllocator*         m_memAllocator = nullptr;
+  VkDevice              m_device       = VK_NULL_HANDLE;
+  uint32_t              m_memoryTypeIndex;
+  VkDeviceSize          m_blockSize;
+  VkBufferUsageFlags    m_bufferUsageFlags;
+  VkMemoryPropertyFlags m_memoryPropFlags;
+  std::vector<uint32_t> m_sharingQueueFamilyIndices;
+  bool                  m_mapped;
+  bool                  m_keepLastBlock = false;
+
+  std::vector<Block> m_blocks;
+  uint32_t           m_regularBlocks = 0;
+  uint32_t           m_freeBlockIndex;  // linked list to next free block
+  VkDeviceSize       m_allocatedSize;
+  VkDeviceSize       m_usedSize;
+  std::string        m_debugName;
+
+  uint32_t setIndexValue(uint32_t& index, uint32_t newValue)
+  {
+    uint32_t oldValue = index;
+    index             = newValue;
+    return oldValue;
+  }
+
+  Block& getBlock(uint32_t index)
+  {
+    Block& block = m_blocks[index];
+    assert(block.index == index);
+    return block;
+  }
+
+  void     freeBlock(Block& block);
+  VkResult allocBlock(Block& block, uint32_t id, VkDeviceSize size);
+};
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/commands_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/commands_vk.cpp
@ -0,0 +1,456 @@
+/*
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include <algorithm>
+#include <platform.h>
+
+
+#include "commands_vk.hpp"
+#include "error_vk.hpp"
+
+
+namespace nvvk {
+uint32_t makeAccessMaskPipelineStageFlags(uint32_t accessMask, VkPipelineStageFlags supportedShaderBits)
+{
+  static const uint32_t accessPipes[] = {
+    VK_ACCESS_INDIRECT_COMMAND_READ_BIT,
+    VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT,
+    VK_ACCESS_INDEX_READ_BIT,
+    VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
+    VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
+    VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
+    VK_ACCESS_UNIFORM_READ_BIT,
+    supportedShaderBits,
+    VK_ACCESS_INPUT_ATTACHMENT_READ_BIT,
+    VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
+    VK_ACCESS_SHADER_READ_BIT,
+    supportedShaderBits,
+    VK_ACCESS_SHADER_WRITE_BIT,
+    supportedShaderBits,
+    VK_ACCESS_COLOR_ATTACHMENT_READ_BIT,
+    VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+    VK_ACCESS_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT,
+    VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+    VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
+    VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+    VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT,
+    VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
+    VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
+    VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
+    VK_ACCESS_TRANSFER_READ_BIT,
+    VK_PIPELINE_STAGE_TRANSFER_BIT,
+    VK_ACCESS_TRANSFER_WRITE_BIT,
+    VK_PIPELINE_STAGE_TRANSFER_BIT,
+    VK_ACCESS_HOST_READ_BIT,
+    VK_PIPELINE_STAGE_HOST_BIT,
+    VK_ACCESS_HOST_WRITE_BIT,
+    VK_PIPELINE_STAGE_HOST_BIT,
+    VK_ACCESS_MEMORY_READ_BIT,
+    0,
+    VK_ACCESS_MEMORY_WRITE_BIT,
+    0,
+#if VK_NV_device_generated_commands
+    VK_ACCESS_COMMAND_PREPROCESS_READ_BIT_NV,
+    VK_PIPELINE_STAGE_COMMAND_PREPROCESS_BIT_NV,
+    VK_ACCESS_COMMAND_PREPROCESS_WRITE_BIT_NV,
+    VK_PIPELINE_STAGE_COMMAND_PREPROCESS_BIT_NV,
+#endif
+#if VK_NV_ray_tracing
+    VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NV,
+    VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_NV | supportedShaderBits | VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV,
+    VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_NV,
+    VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV,
+#endif
+  };
+  if(!accessMask)
+  {
+    return VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+  }
+
+  uint32_t pipes = 0;
+
+  for(uint32_t i = 0; i < NV_ARRAY_SIZE(accessPipes); i += 2)
+  {
+    if(accessPipes[i] & accessMask)
+    {
+      pipes |= accessPipes[i + 1];
+    }
+  }
+  assert(pipes != 0);
+
+  return pipes;
+}
+
+void cmdBegin(VkCommandBuffer cmd, VkCommandBufferUsageFlags flags)
+{
+  VkCommandBufferBeginInfo beginInfo{VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO};
+  beginInfo.flags = flags;
+
+  VkResult res = vkBeginCommandBuffer(cmd, &beginInfo);
+  assert(res == VK_SUCCESS);
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+
+void CommandPool::init(VkDevice device, uint32_t familyIndex, VkCommandPoolCreateFlags flags, VkQueue defaultQueue)
+{
+  assert(!m_device);
+  m_device                     = device;
+  VkCommandPoolCreateInfo info = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO};
+  info.flags                   = flags;
+  info.queueFamilyIndex        = familyIndex;
+  vkCreateCommandPool(m_device, &info, nullptr, &m_commandPool);
+  if(defaultQueue)
+  {
+    m_queue = defaultQueue;
+  }
+  else
+  {
+    vkGetDeviceQueue(device, familyIndex, 0, &m_queue);
+  }
+}
+
+void CommandPool::deinit()
+{
+  if(m_commandPool)
+  {
+    vkDestroyCommandPool(m_device, m_commandPool, nullptr);
+    m_commandPool = VK_NULL_HANDLE;
+  }
+  m_device = VK_NULL_HANDLE;
+}
+
+VkCommandBuffer CommandPool::createCommandBuffer(VkCommandBufferLevel level /*= VK_COMMAND_BUFFER_LEVEL_PRIMARY*/,
+                                                 bool                 begin,
+                                                 VkCommandBufferUsageFlags flags /*= VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT*/,
+                                                 const VkCommandBufferInheritanceInfo* pInheritanceInfo /*= nullptr*/)
+{
+  VkCommandBufferAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO};
+  allocInfo.level                       = level;
+  allocInfo.commandPool                 = m_commandPool;
+  allocInfo.commandBufferCount          = 1;
+
+  VkCommandBuffer cmd;
+  vkAllocateCommandBuffers(m_device, &allocInfo, &cmd);
+
+  if(begin)
+  {
+    VkCommandBufferBeginInfo beginInfo = {};
+    beginInfo.sType                    = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+    beginInfo.flags                    = flags;
+    beginInfo.pInheritanceInfo         = pInheritanceInfo;
+
+    vkBeginCommandBuffer(cmd, &beginInfo);
+  }
+
+  return cmd;
+}
+
+void CommandPool::destroy(size_t count, const VkCommandBuffer* cmds)
+{
+  vkFreeCommandBuffers(m_device, m_commandPool, (uint32_t)count, cmds);
+}
+
+
+void CommandPool::submitAndWait(size_t count, const VkCommandBuffer* cmds, VkQueue queue)
+{
+  submit(count, cmds, queue);
+  VkResult result = vkQueueWaitIdle(queue);
+  if(nvvk::checkResult(result, __FILE__, __LINE__))
+  {
+    exit(-1);
+  }
+  vkFreeCommandBuffers(m_device, m_commandPool, (uint32_t)count, cmds);
+}
+
+void CommandPool::submit(size_t count, const VkCommandBuffer* cmds, VkQueue queue, VkFence fence)
+{
+  for(size_t i = 0; i < count; i++)
+  {
+    vkEndCommandBuffer(cmds[i]);
+  }
+
+  VkSubmitInfo submit       = {VK_STRUCTURE_TYPE_SUBMIT_INFO};
+  submit.pCommandBuffers    = cmds;
+  submit.commandBufferCount = (uint32_t)count;
+  vkQueueSubmit(queue, 1, &submit, fence);
+}
+
+void CommandPool::submit(size_t count, const VkCommandBuffer* cmds, VkFence fence)
+{
+  submit(count, cmds, m_queue, fence);
+}
+
+void CommandPool::submit(const std::vector<VkCommandBuffer>& cmds, VkFence fence)
+{
+  submit(cmds.size(), cmds.data(), m_queue, fence);
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+void RingFences::init(VkDevice device, uint32_t ringSize)
+{
+  assert(!m_device);
+  m_device     = device;
+  m_cycleIndex = 0;
+  m_cycleSize  = ringSize;
+
+  m_fences.resize(ringSize);
+  for(uint32_t i = 0; i < m_cycleSize; i++)
+  {
+    VkFenceCreateInfo info = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO};
+    info.flags             = 0;
+    NVVK_CHECK(vkCreateFence(device, &info, nullptr, &m_fences[i].fence));
+    m_fences[i].active = false;
+  }
+}
+
+void RingFences::deinit()
+{
+  if(!m_device)
+    return;
+
+  for(uint32_t i = 0; i < m_cycleSize; i++)
+  {
+    vkDestroyFence(m_device, m_fences[i].fence, nullptr);
+  }
+  m_fences.clear();
+  m_device = VK_NULL_HANDLE;
+}
+
+VkFence RingFences::getFence()
+{
+  m_fences[m_cycleIndex].active = true;
+  return m_fences[m_cycleIndex].fence;
+}
+
+
+void RingFences::setCycleAndWait(uint32_t cycle)
+{
+  // set cycle
+  m_cycleIndex = cycle % m_cycleSize;
+
+  Entry& entry = m_fences[m_cycleIndex];
+  if(entry.active)
+  {
+    // ensure the cycle we will use now has completed
+    VkResult result = vkWaitForFences(m_device, 1, &entry.fence, VK_TRUE, ~0ULL);
+    if(nvvk::checkResult(result, __FILE__, __LINE__))
+    {
+      exit(-1);
+    }
+    entry.active = false;
+  }
+  vkResetFences(m_device, 1, &entry.fence);
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+void RingCommandPool::init(VkDevice device, uint32_t queueFamilyIndex, VkCommandPoolCreateFlags flags, uint32_t ringSize)
+{
+  assert(!m_device);
+  m_device      = device;
+  m_cycleIndex  = 0;
+  m_cycleSize   = ringSize;
+  m_flags       = flags;
+  m_familyIndex = queueFamilyIndex;
+
+  m_pools.resize(ringSize);
+  for(uint32_t i = 0; i < m_cycleSize; i++)
+  {
+    VkCommandPoolCreateInfo info = {VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO};
+    info.queueFamilyIndex        = queueFamilyIndex;
+    info.flags                   = flags;
+
+    NVVK_CHECK(vkCreateCommandPool(m_device, &info, nullptr, &m_pools[i].pool));
+  }
+}
+
+void RingCommandPool::deinit()
+{
+  if(!m_device)
+    return;
+
+  for(uint32_t i = 0; i < m_cycleSize; i++)
+  {
+    Entry& entry = m_pools[i];
+    if(!entry.cmds.empty())
+    {
+      vkFreeCommandBuffers(m_device, entry.pool, uint32_t(entry.cmds.size()), entry.cmds.data());
+      vkResetCommandPool(m_device, entry.pool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT);
+      entry.cmds.clear();
+    }
+    vkDestroyCommandPool(m_device, entry.pool, nullptr);
+  }
+
+  m_device = VK_NULL_HANDLE;
+}
+
+void RingCommandPool::setCycle(uint32_t cycle)
+{
+  m_cycleIndex = cycle % m_cycleSize;
+
+  Entry& entry = m_pools[m_cycleIndex];
+  if(!entry.cmds.empty())
+  {
+    vkFreeCommandBuffers(m_device, entry.pool, uint32_t(entry.cmds.size()), entry.cmds.data());
+    vkResetCommandPool(m_device, entry.pool, 0);
+    entry.cmds.clear();
+  }
+}
+
+VkCommandBuffer RingCommandPool::createCommandBuffer(VkCommandBufferLevel level /*= VK_COMMAND_BUFFER_LEVEL_PRIMARY*/,
+                                                     bool                 begin,
+                                                     VkCommandBufferUsageFlags flags /*= VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT*/,
+                                                     const VkCommandBufferInheritanceInfo* pInheritanceInfo /*= nullptr*/)
+{
+  Entry& cycle = m_pools[m_cycleIndex];
+
+  VkCommandBufferAllocateInfo info = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO};
+  info.commandBufferCount          = 1;
+  info.commandPool                 = cycle.pool;
+  info.level                       = level;
+
+  VkCommandBuffer cmd;
+  vkAllocateCommandBuffers(m_device, &info, &cmd);
+
+  cycle.cmds.push_back(cmd);
+
+  if(begin)
+  {
+    VkCommandBufferBeginInfo beginInfo = {};
+    beginInfo.sType                    = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+    beginInfo.flags                    = flags;
+    beginInfo.pInheritanceInfo         = pInheritanceInfo;
+
+    vkBeginCommandBuffer(cmd, &beginInfo);
+  }
+
+  return cmd;
+}
+
+const VkCommandBuffer* RingCommandPool::createCommandBuffers(VkCommandBufferLevel level, uint32_t count)
+{
+  Entry& cycle = m_pools[m_cycleIndex];
+
+  VkCommandBufferAllocateInfo info = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO};
+  info.commandBufferCount          = count;
+  info.commandPool                 = cycle.pool;
+  info.level                       = level;
+
+  size_t begin = cycle.cmds.size();
+  cycle.cmds.resize(begin + count);
+  VkCommandBuffer* cmds = cycle.cmds.data() + begin;
+  vkAllocateCommandBuffers(m_device, &info, cmds);
+
+  return cmds;
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+void BatchSubmission::init(VkQueue queue)
+{
+  assert(m_waits.empty() && m_waitFlags.empty() && m_signals.empty() && m_commands.empty());
+  m_queue = queue;
+}
+
+void BatchSubmission::enqueue(uint32_t num, const VkCommandBuffer* cmdbuffers)
+{
+  for(uint32_t i = 0; i < num; i++)
+  {
+    m_commands.push_back(cmdbuffers[i]);
+  }
+}
+
+void BatchSubmission::enqueue(VkCommandBuffer cmdbuffer)
+{
+  m_commands.push_back(cmdbuffer);
+}
+
+void BatchSubmission::enqueueSignal(VkSemaphore sem)
+{
+  m_signals.push_back(sem);
+}
+
+void BatchSubmission::enqueueWait(VkSemaphore sem, VkPipelineStageFlags flag)
+{
+  m_waits.push_back(sem);
+  m_waitFlags.push_back(flag);
+}
+
+VkResult BatchSubmission::execute(VkFence fence /*= nullptr*/, uint32_t deviceMask)
+{
+  VkResult res = VK_SUCCESS;
+
+  if(m_queue && (fence || !m_commands.empty() || !m_signals.empty() || !m_waits.empty()))
+  {
+    VkSubmitInfo submitInfo         = {VK_STRUCTURE_TYPE_SUBMIT_INFO};
+    submitInfo.commandBufferCount   = uint32_t(m_commands.size());
+    submitInfo.signalSemaphoreCount = uint32_t(m_signals.size());
+    submitInfo.waitSemaphoreCount   = uint32_t(m_waits.size());
+
+    submitInfo.pCommandBuffers   = m_commands.data();
+    submitInfo.pSignalSemaphores = m_signals.data();
+    submitInfo.pWaitSemaphores   = m_waits.data();
+    submitInfo.pWaitDstStageMask = m_waitFlags.data();
+
+    std::vector<uint32_t> deviceMasks;
+    std::vector<uint32_t> deviceIndices;
+
+    VkDeviceGroupSubmitInfo deviceGroupInfo = {VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO};
+
+    if(deviceMask != 0)
+    {
+      // Allocate an array big enough to hold the mask for all three parameters
+      deviceMasks.resize(m_commands.size(), deviceMask);
+      deviceIndices.resize(std::max(m_signals.size(), m_waits.size()), 0);  // Only perform semaphore actions on device zero
+
+      submitInfo.pNext                              = &deviceGroupInfo;
+      deviceGroupInfo.commandBufferCount            = submitInfo.commandBufferCount;
+      deviceGroupInfo.pCommandBufferDeviceMasks     = deviceMasks.data();
+      deviceGroupInfo.signalSemaphoreCount          = submitInfo.signalSemaphoreCount;
+      deviceGroupInfo.pSignalSemaphoreDeviceIndices = deviceIndices.data();
+      deviceGroupInfo.waitSemaphoreCount            = submitInfo.waitSemaphoreCount;
+      deviceGroupInfo.pWaitSemaphoreDeviceIndices   = deviceIndices.data();
+    }
+
+    res = vkQueueSubmit(m_queue, 1, &submitInfo, fence);
+
+    m_commands.clear();
+    m_waits.clear();
+    m_waitFlags.clear();
+    m_signals.clear();
+  }
+
+  return res;
+}
+
+
+void BatchSubmission::waitIdle() const
+{
+  VkResult result = vkQueueWaitIdle(m_queue);
+  if(nvvk::checkResult(result, __FILE__, __LINE__))
+  {
+    exit(-1);
+  }
+}
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/commands_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/commands_vk.hpp
@ -0,0 +1,568 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#pragma once
+
+#include <platform.h>
+#include <vector>
+#include <vulkan/vulkan_core.h>
+
+namespace nvvk {
+
+//--------------------------------------------------------------------------------------------------
+/** @DOC_START
+# functions in nvvk
+
+- makeAccessMaskPipelineStageFlags : depending on accessMask returns appropriate VkPipelineStageFlagBits
+- cmdBegin : wraps vkBeginCommandBuffer with VkCommandBufferUsageFlags and implicitly handles VkCommandBufferBeginInfo setup
+- makeSubmitInfo : VkSubmitInfo struct setup using provided arrays of signals and commandbuffers, leaving rest zeroed
+@DOC_END */
+
+// useful for barriers, derive all compatible stage flags from an access mask
+
+
+uint32_t makeAccessMaskPipelineStageFlags(uint32_t accessMask,
+                                          VkPipelineStageFlags supportedShaderBits = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT
+                                                                                     | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT
+                                                                                     | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
+                                                                                     | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
+
+void cmdBegin(VkCommandBuffer cmd, VkCommandBufferUsageFlags flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
+
+inline VkSubmitInfo makeSubmitInfo(uint32_t numCmds, VkCommandBuffer* cmds, uint32_t numSignals, VkSemaphore* signals)
+{
+  VkSubmitInfo submitInfo         = {VK_STRUCTURE_TYPE_SUBMIT_INFO};
+  submitInfo.pCommandBuffers      = cmds;
+  submitInfo.commandBufferCount   = numCmds;
+  submitInfo.pSignalSemaphores    = signals;
+  submitInfo.signalSemaphoreCount = numSignals;
+
+  return submitInfo;
+}
+
+//--------------------------------------------------------------------------------------------------
+/** @DOC_START
+  # class nvvk::CommandPool
+
+  nvvk::CommandPool stores a single VkCommandPool and provides utility functions
+  to create VkCommandBuffers from it.
+
+  Example:
+  ```cpp
+  {
+    nvvk::CommandPool cmdPool;
+    cmdPool.init(...);
+
+    // some setup/one shot work
+    {
+      vkCommandBuffer cmd = scopePool.createAndBegin();
+      ... record commands ...
+      // trigger execution with a blocking operation
+      // not recommended for performance
+      // but useful for sample setup
+      scopePool.submitAndWait(cmd, queue);
+    }
+
+    // other cmds you may batch, or recycle
+    std::vector<VkCommandBuffer> cmds;
+    {
+      vkCommandBuffer cmd = scopePool.createAndBegin();
+      ... record commands ...
+      cmds.push_back(cmd);
+    }
+    {
+      vkCommandBuffer cmd = scopePool.createAndBegin();
+      ... record commands ...
+      cmds.push_back(cmd);
+    }
+
+    // do some form of batched submission of cmds
+
+    // after completion destroy cmd
+    cmdPool.destroy(cmds.size(), cmds.data());
+    cmdPool.deinit();
+  }
+  ```
+@DOC_END */
+
+class CommandPool
+{
+public:
+  CommandPool(CommandPool const&)            = delete;
+  CommandPool& operator=(CommandPool const&) = delete;
+
+  CommandPool() {}
+  ~CommandPool() { deinit(); }
+
+  // if defaultQueue is null, uses first queue from familyIndex as default
+  CommandPool(VkDevice                 device,
+              uint32_t                 familyIndex,
+              VkCommandPoolCreateFlags flags        = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
+              VkQueue                  defaultQueue = VK_NULL_HANDLE)
+  {
+    init(device, familyIndex, flags, defaultQueue);
+  }
+
+  // if defaultQueue is null, uses first queue from familyIndex as default
+  void init(VkDevice                 device,
+            uint32_t                 familyIndex,
+            VkCommandPoolCreateFlags flags        = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
+            VkQueue                  defaultQueue = VK_NULL_HANDLE);
+  void deinit();
+
+
+  VkCommandBuffer createCommandBuffer(VkCommandBufferLevel      level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+                                      bool                      begin = true,
+                                      VkCommandBufferUsageFlags flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+                                      const VkCommandBufferInheritanceInfo* pInheritanceInfo = nullptr);
+
+  // free cmdbuffers from this pool
+  void destroy(size_t count, const VkCommandBuffer* cmds);
+  void destroy(const std::vector<VkCommandBuffer>& cmds) { destroy(cmds.size(), cmds.data()); }
+  void destroy(VkCommandBuffer cmd) { destroy(1, &cmd); }
+
+  VkCommandPool getCommandPool() const { return m_commandPool; }
+
+  // Ends command buffer recording and submits to queue, if 'fence' is not
+  // VK_NULL_HANDLE, it will be used to signal the completion of the command
+  // buffer execution. Does NOT destroy the command buffers! This is not
+  // optimal use for queue submission asity may lead to a large number of
+  // vkQueueSubmit() calls per frame. . Consider batching submissions up via
+  // FencedCommandPools and BatchedSubmission classes down below.
+  void submit(size_t count, const VkCommandBuffer* cmds, VkQueue queue, VkFence fence = VK_NULL_HANDLE);
+  void submit(size_t count, const VkCommandBuffer* cmds, VkFence fence = VK_NULL_HANDLE);
+  void submit(const std::vector<VkCommandBuffer>& cmds, VkFence fence = VK_NULL_HANDLE);
+
+  // Non-optimal usage pattern using wait for idles, avoid in production use.
+  // Consider batching submissions up via FencedCommandPools and
+  // BatchedSubmission classes down below. Ends command buffer recording and
+  // submits to queue, waits for queue idle and destroys cmds.
+  void submitAndWait(size_t count, const VkCommandBuffer* cmds, VkQueue queue);
+  void submitAndWait(const std::vector<VkCommandBuffer>& cmds, VkQueue queue)
+  {
+    submitAndWait(cmds.size(), cmds.data(), queue);
+  }
+  void submitAndWait(VkCommandBuffer cmd, VkQueue queue) { submitAndWait(1, &cmd, queue); }
+
+  // ends and submits to default queue, waits for queue idle and destroys cmds
+  void submitAndWait(size_t count, const VkCommandBuffer* cmds) { submitAndWait(count, cmds, m_queue); }
+  void submitAndWait(const std::vector<VkCommandBuffer>& cmds) { submitAndWait(cmds.size(), cmds.data(), m_queue); }
+  void submitAndWait(VkCommandBuffer cmd) { submitAndWait(1, &cmd, m_queue); }
+
+
+protected:
+  VkDevice      m_device      = VK_NULL_HANDLE;
+  VkQueue       m_queue       = VK_NULL_HANDLE;
+  VkCommandPool m_commandPool = VK_NULL_HANDLE;
+};
+
+
+//--------------------------------------------------------------------------------------------------
+/** @DOC_START
+  # class nvvk::ScopeCommandBuffer
+
+  nvvk::ScopeCommandBuffer provides a single VkCommandBuffer that lives within the scope
+  and is directly submitted and deleted when the scope is left.
+  Not recommended for efficiency, since it results in a blocking
+  operation, but aids sample writing.
+
+  Example:
+  ```cpp
+  {
+    ScopeCommandBuffer cmd(device, queueFamilyIndex, queue);
+    ... do stuff
+    vkCmdCopyBuffer(cmd, ...);
+  }
+  ```
+@DOC_END */
+
+class ScopeCommandBuffer : public CommandPool
+{
+public:
+  // if queue is null, uses first queue from familyIndex
+  ScopeCommandBuffer(VkDevice device, uint32_t familyIndex, VkQueue queue = VK_NULL_HANDLE)
+  {
+    CommandPool::init(device, familyIndex, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queue);
+    m_cmd = createCommandBuffer();
+  }
+
+  ~ScopeCommandBuffer() { submitAndWait(m_cmd); }
+
+  operator VkCommandBuffer() const { return m_cmd; };
+
+private:
+  VkCommandBuffer m_cmd;
+};
+
+//--------------------------------------------------------------------------------------------------
+/** @DOC_START
+  # class **nvvk::Ring...**
+
+  In real-time processing, the CPU typically generates commands 
+  in advance to the GPU and send them in batches for execution.
+
+  To avoid having the CPU to wait for the GPU'S completion and let it "race ahead"
+  we make use of double, or tripple-buffering techniques, where we cycle through
+  a pool of resources every frame. We know that those resources are currently 
+  not in use by the GPU and can therefore manipulate them directly.
+  
+  Especially in Vulkan it is the developer's responsibility to avoid such
+  access of resources that are in-flight.
+
+  The "Ring" classes cycle through a pool of resources. The default value
+  is set to allow two frames in-flight, assuming one fence is used per-frame.
+@DOC_END */
+
+// typically the driver will not let the CPU race ahead more than two frames of GPU
+// during swapchain operations.
+static const uint32_t DEFAULT_RING_SIZE = 3;
+//--------------------------------------------------------------------------------------------------
+/** @DOC_START
+  # class nvvk::RingFences
+
+  nvvk::RingFences recycles a fixed number of fences, provides information in which cycle
+  we are currently at, and prevents accidental access to a cycle in-flight.
+
+  A typical frame would start by "setCycleAndWait", which waits for the
+  requested cycle to be available.
+@DOC_END */
+
+class RingFences
+{
+public:
+  RingFences(RingFences const&)            = delete;
+  RingFences& operator=(RingFences const&) = delete;
+
+  RingFences() {}
+  RingFences(VkDevice device, uint32_t ringSize = DEFAULT_RING_SIZE) { init(device, ringSize); }
+  ~RingFences() { deinit(); }
+
+  void init(VkDevice device, uint32_t ringSize = DEFAULT_RING_SIZE);
+  void deinit();
+  void reset()
+  {
+    VkDevice device   = m_device;
+    uint32_t ringSize = m_cycleSize;
+    deinit();
+    init(device, ringSize);
+  }
+
+  // ensures the availability of the passed cycle
+  void setCycleAndWait(uint32_t cycle);
+  // get current cycle fence
+  VkFence getFence();
+
+  // query current cycle index
+  uint32_t getCycleIndex() const { return m_cycleIndex; }
+  uint32_t getCycleSize() const { return m_cycleSize; }
+
+private:
+  struct Entry
+  {
+    VkFence fence;
+    bool    active;
+  };
+
+  uint32_t           m_cycleIndex{0};
+  uint32_t           m_cycleSize{0};
+  std::vector<Entry> m_fences;
+  VkDevice           m_device = VK_NULL_HANDLE;
+};
+//--------------------------------------------------------------------------------------------------
+/** @DOC_START
+  ## class nvvk::RingCommandPool
+
+  nvvk::RingCommandPool manages a fixed cycle set of VkCommandBufferPools and
+  one-shot command buffers allocated from them.
+
+  The usage of multiple command buffer pools also means we get nice allocation
+  behavior (linear allocation from frame start to frame end) without fragmentation.
+  If we were using a single command pool over multiple frames, it could fragment easily.
+
+  You must ensure cycle is available manually, typically by keeping in sync
+  with ring fences.
+
+  Example:
+
+  ```cpp
+  {
+    frame++;
+
+    // wait until we can use the new cycle 
+    // (very rare if we use the fence at then end once per-frame)
+    ringFences.setCycleAndWait( frame );
+
+    // update cycle state, allows recycling of old resources
+    ringPool.setCycle( frame );
+
+    VkCommandBuffer cmd = ringPool.createCommandBuffer(...);
+    ... do stuff / submit etc...
+
+    VkFence fence = ringFences.getFence();
+    // use this fence in the submit
+    vkQueueSubmit(...fence..);
+  }
+  ```
+@DOC_END */
+
+class RingCommandPool
+{
+public:
+  RingCommandPool(RingCommandPool const&)            = delete;
+  RingCommandPool& operator=(RingCommandPool const&) = delete;
+
+  RingCommandPool(VkDevice                 device,
+                  uint32_t                 queueFamilyIndex,
+                  VkCommandPoolCreateFlags flags    = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
+                  uint32_t                 ringSize = DEFAULT_RING_SIZE)
+  {
+    init(device, queueFamilyIndex, flags, ringSize);
+  }
+  RingCommandPool() {}
+  ~RingCommandPool() { deinit(); }
+
+  void init(VkDevice                 device,
+            uint32_t                 queueFamilyIndex,
+            VkCommandPoolCreateFlags flags    = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
+            uint32_t                 ringSize = DEFAULT_RING_SIZE);
+  void deinit();
+
+  void reset()
+  {
+    VkDevice                 device           = m_device;
+    VkCommandPoolCreateFlags flags            = m_flags;
+    uint32_t                 queueFamilyIndex = m_familyIndex;
+    uint32_t                 ringSize         = m_cycleSize;
+    deinit();
+    init(device, queueFamilyIndex, flags, ringSize);
+  }
+
+  // call when cycle has changed, prior creating command buffers
+  // resets old pools etc.
+  void setCycle(uint32_t cycle);
+
+  // ensure proper cycle or frame is set prior these
+  VkCommandBuffer createCommandBuffer(VkCommandBufferLevel      level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+                                      bool                      begin = true,
+                                      VkCommandBufferUsageFlags flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+                                      const VkCommandBufferInheritanceInfo* pInheritanceInfo = nullptr);
+
+  // pointer is only valid until next create
+  const VkCommandBuffer* createCommandBuffers(VkCommandBufferLevel level, uint32_t count);
+
+protected:
+  struct Entry
+  {
+    VkCommandPool                pool{};
+    std::vector<VkCommandBuffer> cmds;
+  };
+
+  uint32_t                 m_cycleIndex{0};
+  uint32_t                 m_cycleSize{0};
+  std::vector<Entry>       m_pools;
+  VkDevice                 m_device = VK_NULL_HANDLE;
+  VkCommandPoolCreateFlags m_flags{0};
+  uint32_t                 m_familyIndex{0};
+};
+
+//--------------------------------------------------------------------------------------------------
+/** @DOC_START
+  # class nvvk::BatchSubmission
+
+  nvvk::BatchSubmission batches the submission arguments of VkSubmitInfo for VkQueueSubmit.
+
+  vkQueueSubmit is a rather costly operation (depending on OS)
+  and should be avoided to be done too often (e.g. < 10 per frame). Therefore 
+  this utility class allows adding commandbuffers, semaphores etc. and
+  submit them later in a batch.
+
+  When using manual locks, it can also be useful to feed commandbuffers
+  from different threads and then later kick it off.
+
+  Example
+
+  ```cpp
+    // within upload logic
+    {
+      semTransfer = handleUpload(...);
+      // for example trigger async upload on transfer queue here
+      vkQueueSubmit(... semTransfer ...);
+
+      // tell next frame's batch submission 
+      // that its commandbuffers should wait for transfer
+      // to be completed
+      graphicsSubmission.enqueWait(semTransfer)
+    }
+
+    // within present logic
+    {
+      // for example ensure the next frame waits until proper present semaphore was triggered
+      graphicsSubmission.enqueueWait(presentSemaphore);
+    }
+
+    // within drawing logic
+    {
+      // enqueue some graphics work for submission
+      graphicsSubmission.enqueue(getSceneCmdBuffer());
+      graphicsSubmission.enqueue(getUiCmdBuffer());
+
+      graphicsSubmission.execute(frameFence);
+    }
+  ```
+@DOC_END */
+
+class BatchSubmission
+{
+private:
+  VkQueue                           m_queue = nullptr;
+  std::vector<VkSemaphore>          m_waits;
+  std::vector<VkPipelineStageFlags> m_waitFlags;
+  std::vector<VkSemaphore>          m_signals;
+  std::vector<VkCommandBuffer>      m_commands;
+
+public:
+  BatchSubmission(BatchSubmission const&)            = delete;
+  BatchSubmission& operator=(BatchSubmission const&) = delete;
+
+  BatchSubmission() {}
+  BatchSubmission(VkQueue queue) { init(queue); }
+
+  uint32_t getCommandBufferCount() const { return uint32_t(m_commands.size()); }
+  VkQueue  getQueue() const { return m_queue; }
+
+  // can change queue if nothing is pending
+  void init(VkQueue queue);
+
+  void enqueue(uint32_t num, const VkCommandBuffer* cmdbuffers);
+  void enqueue(VkCommandBuffer cmdbuffer);
+  void enqueueSignal(VkSemaphore sem);
+  void enqueueWait(VkSemaphore sem, VkPipelineStageFlags flag);
+
+  // submits the work and resets internal state
+  VkResult execute(VkFence fence = nullptr, uint32_t deviceMask = 0);
+
+  void waitIdle() const;
+};
+
+//////////////////////////////////////////////////////////////////////////
+/** @DOC_START
+  # class nvvk::FencedCommandPools
+
+  nvvk::FencedCommandPools container class contains the typical utilities to handle
+  command submission. It contains RingFences, RingCommandPool and BatchSubmission
+  with a convenient interface.
+
+@DOC_END */
+class FencedCommandPools : protected RingFences, protected RingCommandPool, protected BatchSubmission
+{
+public:
+  FencedCommandPools(FencedCommandPools const&)            = delete;
+  FencedCommandPools& operator=(FencedCommandPools const&) = delete;
+
+  FencedCommandPools() {}
+  ~FencedCommandPools() { deinit(); }
+
+  FencedCommandPools(VkDevice                 device,
+                     VkQueue                  queue,
+                     uint32_t                 queueFamilyIndex,
+                     VkCommandPoolCreateFlags flags    = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
+                     uint32_t                 ringSize = DEFAULT_RING_SIZE)
+  {
+    init(device, queue, queueFamilyIndex, flags, ringSize);
+  }
+
+  void init(VkDevice                 device,
+            VkQueue                  queue,
+            uint32_t                 queueFamilyIndex,
+            VkCommandPoolCreateFlags flags    = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
+            uint32_t                 ringSize = DEFAULT_RING_SIZE)
+  {
+    RingFences::init(device, ringSize);
+    RingCommandPool::init(device, queueFamilyIndex, flags, ringSize);
+    BatchSubmission::init(queue);
+  }
+
+  void deinit()
+  {
+    RingFences::deinit();
+    RingCommandPool::deinit();
+    //BatchSubmission::deinit();
+  }
+
+  void reset()
+  {
+    waitIdle();
+    RingFences::reset();
+    RingCommandPool::reset();
+  }
+
+  void     enqueue(uint32_t num, const VkCommandBuffer* cmdbuffers) { BatchSubmission::enqueue(num, cmdbuffers); }
+  void     enqueue(VkCommandBuffer cmdbuffer) { BatchSubmission::enqueue(cmdbuffer); }
+  void     enqueueSignal(VkSemaphore sem) { BatchSubmission::enqueueSignal(sem); }
+  void     enqueueWait(VkSemaphore sem, VkPipelineStageFlags flag) { BatchSubmission::enqueueWait(sem, flag); }
+  VkResult execute(uint32_t deviceMask = 0) { return BatchSubmission::execute(getFence(), deviceMask); }
+
+  void waitIdle() const { BatchSubmission::waitIdle(); }
+
+  void setCycleAndWait(uint32_t cycle)
+  {
+    RingFences::setCycleAndWait(cycle);
+    RingCommandPool::setCycle(cycle);
+  }
+
+  // ensure proper cycle is set prior this
+  VkCommandBuffer createCommandBuffer(VkCommandBufferLevel      level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+                                      bool                      begin = true,
+                                      VkCommandBufferUsageFlags flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+                                      const VkCommandBufferInheritanceInfo* pInheritanceInfo = nullptr)
+  {
+    return RingCommandPool::createCommandBuffer(level, begin, flags, pInheritanceInfo);
+  }
+
+  // pointer is only valid until next create
+  const VkCommandBuffer* createCommandBuffers(VkCommandBufferLevel level, uint32_t count)
+  {
+    return RingCommandPool::createCommandBuffers(level, count);
+  }
+
+  struct ScopedCmd
+  {
+    FencedCommandPools* pCmdPools;
+    VkCommandBuffer     cmd;
+
+    ScopedCmd(FencedCommandPools& cp)
+    {
+      pCmdPools = &cp;
+      cmd       = cp.createCommandBuffer();
+    }
+    ~ScopedCmd()
+    {
+      vkEndCommandBuffer(cmd);
+      pCmdPools->enqueue(cmd);
+      pCmdPools->execute();
+      pCmdPools->waitIdle();
+    }
+
+    operator VkCommandBuffer() { return cmd; }
+  };
+};
+
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/compute_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/compute_vk.hpp
@ -0,0 +1,400 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#pragma once
+
+#include <unordered_map>
+#include <memory>
+#include "vulkan/vulkan_core.h"
+#include "descriptorsets_vk.hpp"
+
+#define NVVK_COMPUTE_DEFAULT_BLOCK_SIZE 256
+
+namespace nvvk {
+
+//////////////////////////////////////////////////////////////////////////
+/** @DOC_START
+  # class nvvk::PushComputeDispatcher
+
+  nvvk::PushComputeDispatcher is a convenience structure for easily creating
+  compute-only pipelines by defining the bindings and providing SPV code.
+  The descriptor set updates are carried out using the KHR_push_descriptor 
+  extension.
+
+
+  Example:
+
+  ```cpp
+  
+  enum BindingLocation
+  {
+    eMyBindingLocation = 0
+  };
+
+  struct PushConstant{
+   ...
+  }
+  pushConstant;
+
+  nvvk::PushComputeDispatcher<PushConstant, BindingLocation> myCompute;
+  VkBuffer myFirstBuffer = createMyFirstBuffer(...);
+  VkBuffer mySecondBuffer = createMySecondBuffer(...);
+  VkDevice device = getMyVkDevice(...);
+  const uint8_t* spvCode = getMyComputeShaderCode(...);
+  size_t spvCodeSize = getMyComputeShaderCodeSize(...);
+  myCompute.addBufferBinding(BindingLocation::eMyBindingLocation, myFirstBuffer);
+  myCompute.setCode(device, spvCode, spvCodeSize);
+  myCompute.finalizePipeline(device);
+
+  ...
+  VkCommandBuffer cmd = getMyCommandBuffer(...);
+  myCompute.dispatch(cmd, targetThreadCount, &pushConstant);
+  ...
+  myCompute.updateBufferBinding(BindingLocation::eMyBindingLocation, mySecondBuffer)
+  myCompute.dispatch(cmd, targetThreadCount, &pushConstant);
+  ...
+  ```
+@DOC_END */
+
+
+/// Barrier types usable before and after the shader dispatch
+/// Those barriers apply to SHADER_READ, SHADER_WRITE and TRANSFER if needed
+enum DispatcherBarrier
+{
+  eNone       = 0,
+  eCompute    = 1,
+  eTransfer   = 2,
+  eGraphics   = 4,
+  eRaytracing = 8
+};
+
+template <typename TPushConstants, typename TBindingEnum, uint32_t pipelineCount = 1u>
+struct PushComputeDispatcher
+{
+  VkPipelineLayout                      layout{};
+  std::array<VkPipeline, pipelineCount> pipelines{};
+  VkDescriptorSetLayout                 dsetLayout{};
+  nvvk::DescriptorSetBindings           bindings;
+
+  std::unordered_map<TBindingEnum, std::unique_ptr<VkDescriptorBufferInfo>>                       bufferInfos;
+  std::unordered_map<TBindingEnum, std::unique_ptr<VkWriteDescriptorSetAccelerationStructureKHR>> accelInfos;
+  std::unordered_map<TBindingEnum, std::unique_ptr<VkAccelerationStructureKHR>>                   accel;
+  std::unordered_map<TBindingEnum, std::unique_ptr<VkDescriptorImageInfo>>                        sampledImageInfos;
+
+  TPushConstants pushConstants{};
+
+  struct ShaderModule
+  {
+    VkShaderModule module{VK_NULL_HANDLE};
+    bool           isLocal{false};
+  };
+
+  std::vector<VkWriteDescriptorSet>       writes;
+  std::array<ShaderModule, pipelineCount> shaderModules;
+
+  bool addBufferBinding(TBindingEnum index)
+  {
+    if(bufferInfos.find(index) == bufferInfos.end())
+    {
+      bindings.addBinding(VkDescriptorSetLayoutBinding{uint32_t(index), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT});
+
+      bufferInfos[index] = std::make_unique<VkDescriptorBufferInfo>();
+      auto* info         = bufferInfos[index].get();
+      *(info)            = {VK_NULL_HANDLE, 0, VK_WHOLE_SIZE};
+      writes.emplace_back(bindings.makeWrite(0, index, info));
+      return true;
+    }
+    return false;
+  }
+
+  bool addAccelerationStructureBinding(TBindingEnum index)
+  {
+    if(accelInfos.find(index) == accelInfos.end())
+    {
+      bindings.addBinding(VkDescriptorSetLayoutBinding{uint32_t(index), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
+                                                       1, VK_SHADER_STAGE_COMPUTE_BIT});
+
+      accelInfos[index] = std::make_unique<VkWriteDescriptorSetAccelerationStructureKHR>();
+      auto* info        = accelInfos[index].get();
+
+      accel[index]                     = std::make_unique<VkAccelerationStructureKHR>();
+      auto* acc                        = accel[index].get();
+      info->sType                      = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR;
+      info->pNext                      = nullptr;
+      info->accelerationStructureCount = 1;
+      info->pAccelerationStructures    = acc;
+
+      writes.emplace_back(bindings.makeWrite(0, index, info));
+      return true;
+    }
+    return false;
+  }
+  bool addSampledImageBinding(TBindingEnum index)
+  {
+    if(sampledImageInfos.find(index) == sampledImageInfos.end())
+    {
+      bindings.addBinding(VkDescriptorSetLayoutBinding{uint32_t(index), VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1,
+                                                       VK_SHADER_STAGE_COMPUTE_BIT});
+      sampledImageInfos[index] = std::make_unique<VkDescriptorImageInfo>();
+      auto* info               = sampledImageInfos[index].get();
+      writes.emplace_back(bindings.makeWrite(0, index, info));
+      return true;
+    }
+    return false;
+  }
+
+  bool updateBufferBinding(TBindingEnum index, VkBuffer buffer)
+  {
+    auto it = bufferInfos.find(index);
+    if(it != bufferInfos.end())
+    {
+      it->second->buffer = buffer;
+      return true;
+    }
+    return false;
+  }
+  bool updateAccelerationStructureBinding(TBindingEnum index, VkAccelerationStructureKHR acc)
+  {
+    auto it = accel.find(index);
+    if(it != accel.end())
+    {
+      *(it->second.get()) = acc;
+      return true;
+    }
+    return false;
+  }
+
+  bool updateSampledImageBinding(TBindingEnum  index,
+                                 VkSampler     sampler = VK_NULL_HANDLE,
+                                 VkImageView   view    = VK_NULL_HANDLE,
+                                 VkImageLayout layout  = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)
+  {
+    auto it = sampledImageInfos.find(index);
+    if(it != sampledImageInfos.end())
+    {
+      it->second->sampler     = sampler;
+      it->second->imageView   = view;
+      it->second->imageLayout = layout;
+      return true;
+    }
+    return false;
+  }
+
+
+  bool setCode(VkDevice device, void* shaderCode, size_t codeSize, uint32_t pipelineIndex = 0u)
+  {
+    VkShaderModuleCreateInfo moduleCreateInfo{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
+    moduleCreateInfo.codeSize = codeSize;
+    moduleCreateInfo.pCode    = reinterpret_cast<uint32_t*>(shaderCode);
+
+    VkResult r = vkCreateShaderModule(device, &moduleCreateInfo, nullptr, &(shaderModules[pipelineIndex].module));
+    if(r != VK_SUCCESS || shaderModules[pipelineIndex].module == VK_NULL_HANDLE)
+    {
+      return false;
+    }
+    shaderModules[pipelineIndex].isLocal = true;
+    return true;
+  }
+
+  bool setCode(VkShaderModule m, uint32_t pipelineIndex = 0u)
+  {
+    shaderModules[pipelineIndex].module  = m;
+    shaderModules[pipelineIndex].isLocal = false;
+    return m != VK_NULL_HANDLE;
+  }
+
+  bool finalizePipeline(VkDevice device)
+  {
+
+    dsetLayout = bindings.createLayout(device, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
+
+    VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
+    pipelineLayoutCreateInfo.pSetLayouts    = &dsetLayout;
+    pipelineLayoutCreateInfo.setLayoutCount = 1;
+
+    VkPushConstantRange pushConstantRange{VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(TPushConstants)};
+    pipelineLayoutCreateInfo.pushConstantRangeCount = 1;
+    pipelineLayoutCreateInfo.pPushConstantRanges    = &pushConstantRange;
+
+    VkResult r = vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, nullptr, &layout);
+
+    if(r != VK_SUCCESS || layout == VK_NULL_HANDLE)
+    {
+      return false;
+    }
+    VkPipelineShaderStageCreateInfo stageCreateInfo = {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO};
+    stageCreateInfo.stage                           = VK_SHADER_STAGE_COMPUTE_BIT;
+    stageCreateInfo.pName                           = "main";
+
+
+    for(uint32_t i = 0; i < pipelineCount; i++)
+    {
+      stageCreateInfo.module = shaderModules[i].module;
+
+      VkComputePipelineCreateInfo createInfo{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
+      createInfo.stage  = stageCreateInfo;
+      createInfo.layout = layout;
+      r                 = vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &createInfo, nullptr, &pipelines[i]);
+      if(r != VK_SUCCESS || pipelines[i] == VK_NULL_HANDLE)
+      {
+        return false;
+      }
+      if(shaderModules[i].isLocal)
+      {
+        vkDestroyShaderModule(device, shaderModules[i].module, nullptr);
+      }
+    }
+    return true;
+  }
+
+
+  uint32_t getBlockCount(uint32_t targetThreadCount, uint32_t blockSize)
+  {
+    return (targetThreadCount + blockSize - 1) / blockSize;
+  }
+
+
+  // Bind the pipeline resources. Used internally, or if the app uses a direct call to
+  // vkCmdDispatch instead of the dispatch() method
+  void bind(VkCommandBuffer cmd, const TPushConstants* constants = nullptr, uint32_t pipelineIndex = 0u)
+  {
+    vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipelines[pipelineIndex]);
+    if(constants != nullptr)
+    {
+      vkCmdPushConstants(cmd, layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(TPushConstants), constants);
+    }
+    if(writes.size() > 0)
+    {
+      vkCmdPushDescriptorSetKHR(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, static_cast<uint32_t>(writes.size()),
+                                writes.data());
+    }
+  }
+
+  void dispatchThreads(VkCommandBuffer       cmd,
+                       uint32_t              threadCount,
+                       const TPushConstants* constants   = nullptr,
+                       uint32_t              postBarrier = DispatcherBarrier::eCompute,
+                       uint32_t              preBarrier  = DispatcherBarrier::eNone,
+                       uint32_t              blockSize   = NVVK_COMPUTE_DEFAULT_BLOCK_SIZE,
+                       // If pipelineIndex == ~0u, all pipelines will be executed sequentially. Otherwise, only dispatch the requested pipeline
+                       uint32_t pipelineIndex = ~0u)
+  {
+    uint32_t blockCount = getBlockCount(threadCount, blockSize);
+    dispatchBlocks(cmd, blockCount, constants, postBarrier, preBarrier, pipelineIndex);
+  }
+
+  void dispatchBlocks(VkCommandBuffer       cmd,
+                      uint32_t              blockCount,
+                      const TPushConstants* constants   = nullptr,
+                      uint32_t              postBarrier = DispatcherBarrier::eCompute,
+                      uint32_t              preBarrier  = DispatcherBarrier::eNone,
+                      // If pipelineIndex == ~0u, all pipelines will be executed sequentially. Otherwise, only dispatch the requested pipeline
+                      uint32_t pipelineIndex = ~0u)
+  {
+
+    dispatchBlocks(cmd, {blockCount, 1, 1}, constants, postBarrier, preBarrier, pipelineIndex);
+  }
+
+  void dispatchBlocks(VkCommandBuffer       cmd,
+                      glm::uvec3            blockCount,
+                      const TPushConstants* constants   = nullptr,
+                      uint32_t              postBarrier = DispatcherBarrier::eCompute,
+                      uint32_t              preBarrier  = DispatcherBarrier::eNone,
+                      // If pipelineIndex == ~0u, all pipelines will be executed sequentially. Otherwise, only dispatch the requested pipeline
+                      uint32_t pipelineIndex = ~0u)
+  {
+
+    if(preBarrier != eNone)
+    {
+      VkMemoryBarrier mb{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
+      mb.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
+      VkPipelineStageFlags srcStage{};
+      if((preBarrier & eCompute) || (preBarrier & eGraphics) || (preBarrier & eRaytracing))
+      {
+        mb.srcAccessMask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
+        if(preBarrier & eCompute)
+          srcStage |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+        if(preBarrier & eGraphics)
+          srcStage |= VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT;
+        if(preBarrier & eRaytracing)
+          srcStage |= VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR;
+      }
+      if(preBarrier & eTransfer)
+      {
+        mb.srcAccessMask |= VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
+        srcStage |= VK_PIPELINE_STAGE_TRANSFER_BIT;
+      }
+
+      vkCmdPipelineBarrier(cmd, srcStage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &mb, 0, nullptr, 0, nullptr);
+    }
+
+    uint32_t currentPipeline = (pipelineIndex == ~0u) ? 0 : pipelineIndex;
+    uint32_t count           = (pipelineIndex == ~0u) ? pipelineCount : 1;
+
+    for(uint32_t i = 0; i < count; i++)
+    {
+      bind(cmd, constants, currentPipeline + i);
+      vkCmdDispatch(cmd, blockCount.x, blockCount.y, blockCount.z);
+
+      if(postBarrier != eNone)
+      {
+        VkMemoryBarrier mb{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
+        mb.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
+        VkPipelineStageFlags dstStage{};
+        if((postBarrier & eCompute) || (postBarrier & eGraphics) || (postBarrier & eRaytracing))
+        {
+          mb.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
+          if(postBarrier & eCompute)
+            dstStage |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+          if(postBarrier & eGraphics)
+            dstStage |= VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT;
+          if(postBarrier & eRaytracing)
+            dstStage |= VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR;
+        }
+        if(postBarrier & eTransfer)
+        {
+          mb.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
+          dstStage |= VK_PIPELINE_STAGE_TRANSFER_BIT;
+        }
+
+        vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, dstStage, 0, 1, &mb, 0, nullptr, 0, nullptr);
+      }
+    }
+  }
+
+
+  void destroy(VkDevice device)
+  {
+    vkDestroyPipelineLayout(device, layout, nullptr);
+    for(uint32_t i = 0; i < pipelineCount; i++)
+    {
+      vkDestroyPipeline(device, pipelines[i], nullptr);
+    }
+    vkDestroyDescriptorSetLayout(device, dsetLayout, nullptr);
+
+    bufferInfos.clear();
+    accelInfos.clear();
+    accel.clear();
+    sampledImageInfos.clear();
+    writes.clear();
+    bindings.clear();
+  }
+};
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/context_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/context_vk.cpp
--- a/raytracer/nvpro_core/nvvk/context_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/context_vk.hpp
@ -0,0 +1,522 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#ifndef NV_VK_DEVICEINSTANCE_INCLUDED
+#define NV_VK_DEVICEINSTANCE_INCLUDED
+
+#include <string>    // std::string
+#include <string.h>  // memcpy
+#include <unordered_set>
+#include <vector>
+#include <functional>
+#include <vulkan/vulkan_core.h>
+
+#include "nsight_aftermath_vk.hpp"
+
+static_assert(VK_HEADER_VERSION >= 261, "Vulkan SDK version needs to be 1.3.261.0 or greater");
+
+namespace nvvk {
+/** @DOC_START
+To run a Vulkan application, you need to create the Vulkan instance and device.
+This is done using the `nvvk::Context`, which wraps the creation of `VkInstance`
+and `VkDevice`.
+
+First, any application needs to specify how instance and device should be created:
+Version, layers, instance and device extensions influence the features available.
+This is done through a temporary and intermediate class that will allow you to gather
+all the required conditions for the device creation.
+@DOC_END */
+
+//////////////////////////////////////////////////////////////////////////
+/** @DOC_START
+# struct ContextCreateInfo
+
+This structure allows the application to specify a set of features
+that are expected for the creation of
+- VkInstance
+- VkDevice
+
+It is consumed by the `nvvk::Context::init` function.
+
+Example on how to populate information in it : 
+
+```cpp
+    nvvk::ContextCreateInfo ctxInfo;
+    ctxInfo.setVersion(1, 2);
+    ctxInfo.addInstanceExtension(VK_KHR_SURFACE_EXTENSION_NAME, false);
+    ctxInfo.addInstanceExtension(VK_KHR_WIN32_SURFACE_EXTENSION_NAME, false);
+    ctxInfo.addDeviceExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, false);
+
+    // adding an extension with a feature struct:
+    //
+    VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR pipePropFeatures = {
+        VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR};
+    // Be aware of the lifetime of the pointer of the feature struct.
+    // ctxInfo stores the pointer directly and context init functions use it for read & write access.
+    ctxInfo.addDeviceExtension(VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME, true, &pipePropFeatures);
+
+    // disabling a feature:
+    //
+    // This callback is called after the feature structs were filled with physical device information
+    // and prior logical device creation.
+    // The callback iterates over all feature structs, including those from
+    // the vulkan versions.
+    ctxInfo.fnDisableFeatures = [](VkStructureType sType, void *pFeatureStruct)
+    {
+      switch(sType){
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES:
+        {
+          auto* features11 = reinterpret_cast<VkPhysicalDeviceVulkan11Features>(pFeatureStruct);
+          // at this point the struct is populated with what the device supports
+          // and therefore it is only legal to disable features, not enable them.
+          
+          // let's say we wanted to disable multiview
+          features11->multiView = VK_FALSE;
+        }
+        break;
+      default:
+        break;
+      }
+    };
+
+```
+
+then you are ready to create initialize `nvvk::Context`
+
+> Note: In debug builds, the extension `VK_EXT_DEBUG_UTILS_EXTENSION_NAME` and the layer `VK_LAYER_KHRONOS_validation` are added to help finding issues early.
+
+@DOC_END */
+
+static const VkDeviceDiagnosticsConfigFlagsNV defaultAftermathFlags =
+    (VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_RESOURCE_TRACKING_BIT_NV  // Additional information about the resource related to a GPU virtual address
+     | VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_AUTOMATIC_CHECKPOINTS_BIT_NV  // Automatic checkpoints for all draw calls (ADD OVERHEAD)
+     | VK_DEVICE_DIAGNOSTICS_CONFIG_ENABLE_SHADER_DEBUG_INFO_BIT_NV);  // instructs the shader compiler to generate debug information (ADD OVERHEAD)
+
+
+struct ContextCreateInfo
+{
+  // aftermathFlags != 0 will enable GPU crash dumps when Aftermath is available via SUPPORT_AFTERMATH
+  // No-op when Aftermath is not available.
+  ContextCreateInfo(bool bUseValidation = true);
+
+  void setVersion(uint32_t major, uint32_t minor);
+
+  void addInstanceExtension(const char* name, bool optional = false);
+  void addInstanceLayer(const char* name, bool optional = false);
+
+  // Add a extension to be enabled at context creation time. If 'optional' is
+  // false, context creation will fail if the extension is not supported by the
+  // device. If the extension requires a feature struct, pass the initialized
+  // struct to 'pFeatureStruct'. If 'version' = 0: don't care, otherwise check
+  // against equality (useful for provisional exts)
+  //
+  // IMPORTANT: The 'pFeatureStruct' pointer will be stored and the object will
+  // later be written to! Make sure the pointer is still valid when
+  // Context::Init() gets called with the ContextCreateInfo object. All
+  // pFeatureStruct objects will be chained together and filled out with the
+  // actual device capabilities during Context::Init().
+  void addDeviceExtension(const char* name, bool optional = false, void* pFeatureStruct = nullptr, uint32_t version = 0);
+
+  void removeInstanceExtension(const char* name);
+  void removeInstanceLayer(const char* name);
+  void removeDeviceExtension(const char* name);
+
+  // by default-constructor three queues are requested,
+  // if you want more/different setups manipulate the requestedQueues vector
+  // or use this function.
+  void addRequestedQueue(VkQueueFlags flags, uint32_t count = 1, float priority = 1.0f);
+
+  // this callback is run after extension and version related feature structs were queried for their support
+  // from the physical device and prior using them for device creation. It allows custom logic for disabling
+  // certain features.
+  // Be aware that enabling a feature is not legal within this function, only disabling.
+  std::function<void(VkStructureType sType, void* pFeatureStruct)> fnDisableFeatures = nullptr;
+
+  // Configure additional device creation with these variables and functions
+
+  // use device groups
+  bool useDeviceGroups = false;
+
+  // which compatible device or device group to pick
+  // only used by All-in-one Context::init(...)
+  uint32_t compatibleDeviceIndex = 0;
+
+  // instance properties
+  std::string appEngine = "nvpro-sample";
+  std::string appTitle  = "nvpro-sample";
+
+  // may impact performance hence disable by default
+  bool disableRobustBufferAccess = true;
+
+  // Information printed at Context::init time
+  bool verboseCompatibleDevices = true;
+  bool verboseUsed              = true;  // Print what is used
+  bool verboseAvailable         =        // Print what is available
+#ifndef NDEBUG
+      true;
+#else
+      false;
+#endif
+
+  // Will Enable GPU crash dumps when Aftermath is available.
+  // No-op when Aftermath has not been made available via SUPPORT_AFTERMATH in CMakeLists.txt
+  bool enableAftermath = true;
+  VkDeviceDiagnosticsConfigFlagsNV aftermathFlags  = defaultAftermathFlags;
+
+  struct Entry
+  {
+    Entry(const char* entryName, bool isOptional = false, void* pointerFeatureStruct = nullptr, uint32_t checkVersion = 0)
+        : name(entryName)
+        , optional(isOptional)
+        , pFeatureStruct(pointerFeatureStruct)
+        , version(checkVersion)
+    {
+    }
+
+    std::string name;
+    bool        optional{false};
+    void*       pFeatureStruct{nullptr};
+    uint32_t    version{0};
+  };
+
+  uint32_t apiMajor{1};
+  uint32_t apiMinor{1};
+
+  using EntryArray = std::vector<Entry>;
+  EntryArray instanceLayers;
+  EntryArray instanceExtensions;
+  EntryArray deviceExtensions;
+  void*      deviceCreateInfoExt{nullptr};
+  void*      instanceCreateInfoExt{nullptr};
+
+  struct QueueSetup
+  {
+    VkQueueFlags requiredFlags = 0;
+    uint32_t     count         = 0;
+    float        priority      = 1.0;
+  };
+  using QueueArray = std::vector<QueueSetup>;
+
+  // this array defines how many queues are required for the provided queue flags
+  // reset / add new entries if changes are desired
+  //
+  // ContextCreateInfo constructor adds 1 queue per default queue flag below
+  QueueArray requestedQueues;
+
+  // leave 0 and no default queue will be created
+  VkQueueFlags defaultQueueGCT    = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
+  VkQueueFlags defaultQueueT      = VK_QUEUE_TRANSFER_BIT;
+  VkQueueFlags defaultQueueC      = VK_QUEUE_COMPUTE_BIT;
+  float        defaultPriorityGCT = 1.0f;
+  float        defaultPriorityT   = 1.0f;
+  float        defaultPriorityC   = 1.0f;
+};
+
+//////////////////////////////////////////////////////////////////////////
+/** @DOC_START
+# class nvvk::Context
+
+nvvk::Context class helps creating the Vulkan instance and to choose the logical device for the mandatory extensions. First is to fill the `ContextCreateInfo` structure, then call:
+
+```cpp
+  // Creating the Vulkan instance and device
+  nvvk::ContextCreateInfo ctxInfo;
+  ... see above ...
+
+  nvvk::Context vkctx;
+  vkctx.init(ctxInfo);
+
+  // after init the ctxInfo is no longer needed
+``` 
+
+At this point, the class will have created the `VkInstance` and `VkDevice` according to the information passed. It will also keeps track or have query the information of:
+ 
+* Physical Device information that you can later query : `PhysicalDeviceInfo` in which lots of `VkPhysicalDevice...` are stored
+* `VkInstance` : the one instance being used for the program
+* `VkPhysicalDevice` : physical device(s) used for the logical device creation. In case of more than one physical device, we have a std::vector for this purpose...
+* `VkDevice` : the logical device instantiated
+* `VkQueue` : By default, 3 queues are created, one per family: Graphic-Compute-Transfer, Compute and Transfer.
+              For any additionnal queue, they need to be requested with `ContextCreateInfo::addRequestedQueue()`. This is creating information of the best suitable queues,
+              but not creating them. To create the additional queues, 
+              `Context::createQueue()` **must be call after** creating the Vulkan context.
+              </br>The following queues are always created and can be directly accessed without calling createQueue :
+   * `Queue m_queueGCT` : Graphics/Compute/Transfer Queue + family index
+   * `Queue m_queueT` : async Transfer Queue + family index
+   * `Queue m_queueC` : async Compute Queue + family index
+* maintains what extensions are finally available
+* implicitly hooks up the debug callback
+
+## Choosing the device
+When there are multiple devices, the `init` method is choosing the first compatible device available, but it is also possible the choose another one.
+```cpp
+  vkctx.initInstance(deviceInfo); 
+  // Find all compatible devices
+  auto compatibleDevices = vkctx.getCompatibleDevices(deviceInfo);
+  assert(!compatibleDevices.empty());
+
+  // Use first compatible device
+  vkctx.initDevice(compatibleDevices[0], deviceInfo);
+```
+
+## Multi-GPU
+
+When multiple graphic cards should be used as a single device, the `ContextCreateInfo::useDeviceGroups` need to be set to `true`.
+The above methods will transparently create the `VkDevice` using `VkDeviceGroupDeviceCreateInfo`.
+Especially in the context of NVLink connected cards this is useful.
+
+@DOC_END */
+class Context
+{
+public:
+  Context(Context const&)            = delete;
+  Context& operator=(Context const&) = delete;
+
+  Context() = default;
+
+  // Vulkan == 1.1 used individual structs
+  // Vulkan >= 1.2  have per-version structs
+  struct Features11Old
+  {
+    VkPhysicalDeviceMultiviewFeatures    multiview{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES};
+    VkPhysicalDevice16BitStorageFeatures t16BitStorage{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES};
+    VkPhysicalDeviceSamplerYcbcrConversionFeatures samplerYcbcrConversion{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES};
+    VkPhysicalDeviceProtectedMemoryFeatures protectedMemory{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES};
+    VkPhysicalDeviceShaderDrawParameterFeatures drawParameters{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES};
+    VkPhysicalDeviceVariablePointerFeatures variablePointers{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES};
+
+    Features11Old()
+    {
+      multiview.pNext              = &t16BitStorage;
+      t16BitStorage.pNext          = &samplerYcbcrConversion;
+      samplerYcbcrConversion.pNext = &protectedMemory;
+      protectedMemory.pNext        = &drawParameters;
+      drawParameters.pNext         = &variablePointers;
+      variablePointers.pNext       = nullptr;
+    }
+
+    void read(const VkPhysicalDeviceVulkan11Features& features11)
+    {
+      multiview.multiview                              = features11.multiview;
+      multiview.multiviewGeometryShader                = features11.multiviewGeometryShader;
+      multiview.multiviewTessellationShader            = features11.multiviewTessellationShader;
+      t16BitStorage.storageBuffer16BitAccess           = features11.storageBuffer16BitAccess;
+      t16BitStorage.storageInputOutput16               = features11.storageInputOutput16;
+      t16BitStorage.storagePushConstant16              = features11.storagePushConstant16;
+      t16BitStorage.uniformAndStorageBuffer16BitAccess = features11.uniformAndStorageBuffer16BitAccess;
+      samplerYcbcrConversion.samplerYcbcrConversion    = features11.samplerYcbcrConversion;
+      protectedMemory.protectedMemory                  = features11.protectedMemory;
+      drawParameters.shaderDrawParameters              = features11.shaderDrawParameters;
+      variablePointers.variablePointers                = features11.variablePointers;
+      variablePointers.variablePointersStorageBuffer   = features11.variablePointersStorageBuffer;
+    }
+
+    void write(VkPhysicalDeviceVulkan11Features& features11)
+    {
+      features11.multiview                          = multiview.multiview;
+      features11.multiviewGeometryShader            = multiview.multiviewGeometryShader;
+      features11.multiviewTessellationShader        = multiview.multiviewTessellationShader;
+      features11.storageBuffer16BitAccess           = t16BitStorage.storageBuffer16BitAccess;
+      features11.storageInputOutput16               = t16BitStorage.storageInputOutput16;
+      features11.storagePushConstant16              = t16BitStorage.storagePushConstant16;
+      features11.uniformAndStorageBuffer16BitAccess = t16BitStorage.uniformAndStorageBuffer16BitAccess;
+      features11.samplerYcbcrConversion             = samplerYcbcrConversion.samplerYcbcrConversion;
+      features11.protectedMemory                    = protectedMemory.protectedMemory;
+      features11.shaderDrawParameters               = drawParameters.shaderDrawParameters;
+      features11.variablePointers                   = variablePointers.variablePointers;
+      features11.variablePointersStorageBuffer      = variablePointers.variablePointersStorageBuffer;
+    }
+  };
+  struct Properties11Old
+  {
+    VkPhysicalDeviceMaintenance3Properties maintenance3{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES};
+    VkPhysicalDeviceIDProperties           deviceID{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES};
+    VkPhysicalDeviceMultiviewProperties    multiview{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES};
+    VkPhysicalDeviceProtectedMemoryProperties protectedMemory{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES};
+    VkPhysicalDevicePointClippingProperties pointClipping{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES};
+    VkPhysicalDeviceSubgroupProperties      subgroup{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES};
+
+    Properties11Old()
+    {
+      maintenance3.pNext    = &deviceID;
+      deviceID.pNext        = &multiview;
+      multiview.pNext       = &protectedMemory;
+      protectedMemory.pNext = &pointClipping;
+      pointClipping.pNext   = &subgroup;
+      subgroup.pNext        = nullptr;
+    }
+
+    void write(VkPhysicalDeviceVulkan11Properties& properties11)
+    {
+      memcpy(properties11.deviceLUID, deviceID.deviceLUID, sizeof(properties11.deviceLUID));
+      memcpy(properties11.deviceUUID, deviceID.deviceUUID, sizeof(properties11.deviceUUID));
+      memcpy(properties11.driverUUID, deviceID.driverUUID, sizeof(properties11.driverUUID));
+      properties11.deviceLUIDValid                   = deviceID.deviceLUIDValid;
+      properties11.deviceNodeMask                    = deviceID.deviceNodeMask;
+      properties11.subgroupSize                      = subgroup.subgroupSize;
+      properties11.subgroupSupportedStages           = subgroup.supportedStages;
+      properties11.subgroupSupportedOperations       = subgroup.supportedOperations;
+      properties11.subgroupQuadOperationsInAllStages = subgroup.quadOperationsInAllStages;
+      properties11.pointClippingBehavior             = pointClipping.pointClippingBehavior;
+      properties11.maxMultiviewViewCount             = multiview.maxMultiviewViewCount;
+      properties11.maxMultiviewInstanceIndex         = multiview.maxMultiviewInstanceIndex;
+      properties11.protectedNoFault                  = protectedMemory.protectedNoFault;
+      properties11.maxPerSetDescriptors              = maintenance3.maxPerSetDescriptors;
+      properties11.maxMemoryAllocationSize           = maintenance3.maxMemoryAllocationSize;
+    }
+  };
+
+  // This struct holds all core feature information for a physical device
+  struct PhysicalDeviceInfo
+  {
+    VkPhysicalDeviceMemoryProperties     memoryProperties{};
+    std::vector<VkQueueFamilyProperties> queueProperties;
+
+    VkPhysicalDeviceFeatures         features10{};
+    VkPhysicalDeviceVulkan11Features features11{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES};
+    VkPhysicalDeviceVulkan12Features features12{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES};
+    VkPhysicalDeviceVulkan13Features features13{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES};
+
+    VkPhysicalDeviceProperties         properties10{};
+    VkPhysicalDeviceVulkan11Properties properties11{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES};
+    VkPhysicalDeviceVulkan12Properties properties12{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES};
+    VkPhysicalDeviceVulkan13Properties properties13{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES};
+  };
+
+  struct Queue
+  {
+    VkQueue  queue       = VK_NULL_HANDLE;
+    uint32_t familyIndex = ~0U;
+    uint32_t queueIndex  = ~0U;
+    float    priority    = 1.0f;
+
+    operator VkQueue() const { return queue; }
+    operator uint32_t() const { return familyIndex; }
+  };
+
+  VkInstance         m_instance{VK_NULL_HANDLE};
+  VkDevice           m_device{VK_NULL_HANDLE};
+  VkPhysicalDevice   m_physicalDevice{VK_NULL_HANDLE};
+  PhysicalDeviceInfo m_physicalInfo;
+  uint32_t           m_apiMajor = 0;
+  uint32_t           m_apiMinor = 0;
+
+  // following queues are automatically created if appropriate ContextCreateInfo.defaultQueue??? is set
+  // and ContextCreateInfo::requestedQueues contains a compatible config.
+  Queue m_queueGCT;  // for Graphics/Compute/Transfer
+  Queue m_queueT;    // for pure async Transfer Queue
+  Queue m_queueC;    // for async Compute
+
+  // additional queues must be created once through this function
+  // returns new Queue and pops entry from available Queues that were requested via info.requestedQueues
+  Queue createQueue(VkQueueFlags requiredFlags, const std::string& debugName, float priority = 1.0f);
+
+  operator VkDevice() const { return m_device; }
+
+  // All-in-one instance and device creation
+  bool init(const ContextCreateInfo& info);
+  void deinit();
+
+  // Individual object creation
+  bool initInstance(const ContextCreateInfo& info);
+  // deviceIndex is an index either into getPhysicalDevices or getPhysicalDeviceGroups
+  // depending on info.useDeviceGroups
+  bool initDevice(uint32_t deviceIndex, const ContextCreateInfo& info);
+
+  // Helpers
+  std::vector<uint32_t>                        getCompatibleDevices(const ContextCreateInfo& info);
+  std::vector<VkPhysicalDevice>                getPhysicalDevices();
+  std::vector<VkPhysicalDeviceGroupProperties> getPhysicalDeviceGroups();
+  std::vector<VkExtensionProperties>           getInstanceExtensions();
+  std::vector<VkLayerProperties>               getInstanceLayers();
+  std::vector<VkExtensionProperties>           getDeviceExtensions(VkPhysicalDevice physicalDevice);
+  void printPhysicalDeviceProperties(const VkPhysicalDeviceProperties& properties);
+  bool hasMandatoryExtensions(VkPhysicalDevice physicalDevice, const ContextCreateInfo& info, bool bVerbose);
+
+  // Returns if GCTQueue supports present
+  bool setGCTQueueWithPresent(VkSurfaceKHR surface);
+
+  // true if the context has the optional extension activated
+  bool hasDeviceExtension(const char* name) const;
+  bool hasInstanceExtension(const char* name) const;
+
+  void ignoreDebugMessage(int32_t msgID) { m_dbgIgnoreMessages.insert(msgID); }
+  void setDebugSeverityFilterMask(int32_t severity) { m_dbgSeverity = severity; }
+
+
+private:
+  struct QueueScore
+  {
+    uint32_t score       = 0;  // the lower the score, the more 'specialized' it is
+    uint32_t familyIndex = ~0U;
+    uint32_t queueIndex  = ~0U;
+    float    priority    = 1.0f;
+  };
+  using QueueScoreList = std::vector<QueueScore>;
+
+  // This list is created from ContextCreateInfo::requestedQueues.
+  // It contains the most specialized queues for compatible flags first.
+  // Each Context::createQueue call finds a compatible item in this list
+  // and removes it upon success.
+  QueueScoreList m_availableQueues;
+
+  // optional maxFamilyCounts overrides the device's max queue count per queue family
+  // optional priorities overrides default priority 1.0 and must be sized physical device's queue family count * maxQueueCount
+  void initQueueList(QueueScoreList& list, const uint32_t* maxFamilyCounts, const float* priorities, uint32_t maxQueueCount) const;
+  QueueScore removeQueueListItem(QueueScoreList& list, VkQueueFlags flags, float priority) const;
+
+  static VKAPI_ATTR VkBool32 VKAPI_CALL debugMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
+                                                               VkDebugUtilsMessageTypeFlagsEXT        messageType,
+                                                               const VkDebugUtilsMessengerCallbackDataEXT* callbackData,
+                                                               void*                                       userData);
+
+  std::vector<std::string> m_usedInstanceLayers;
+  std::vector<std::string> m_usedInstanceExtensions;
+  std::vector<std::string> m_usedDeviceExtensions;
+
+  // New Debug system
+  PFN_vkCreateDebugUtilsMessengerEXT  m_createDebugUtilsMessengerEXT  = nullptr;
+  PFN_vkDestroyDebugUtilsMessengerEXT m_destroyDebugUtilsMessengerEXT = nullptr;
+  VkDebugUtilsMessengerEXT            m_dbgMessenger                  = nullptr;
+
+  std::unordered_set<int32_t> m_dbgIgnoreMessages;
+  uint32_t m_dbgSeverity{VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT};
+
+  // nSight Aftermath
+  GpuCrashTracker m_gpuCrashTracker;
+
+  void initDebugUtils();
+  bool hasDebugUtils() const { return m_createDebugUtilsMessengerEXT != nullptr; }
+
+  VkResult                 fillFilteredNameArray(std::vector<std::string>&             used,
+                                                 const std::vector<VkLayerProperties>& properties,
+                                                 const ContextCreateInfo::EntryArray&  requested);
+  VkResult                 fillFilteredNameArray(std::vector<std::string>&                 used,
+                                                 const std::vector<VkExtensionProperties>& properties,
+                                                 const ContextCreateInfo::EntryArray&      requested,
+                                                 std::vector<void*>&                       featureStructs);
+  std::vector<std::string> checkEntryArray(const std::vector<VkExtensionProperties>& properties,
+                                           const ContextCreateInfo::EntryArray&      requested);
+  static void initPhysicalInfo(PhysicalDeviceInfo& info, VkPhysicalDevice physicalDevice, uint32_t versionMajor, uint32_t versionMinor);
+};
+
+
+}  // namespace nvvk
+
+#endif
--- a/raytracer/nvpro_core/nvvk/debug_util_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/debug_util_vk.cpp
@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include "debug_util_vk.hpp"
+
+namespace nvvk {
+
+bool DebugUtil::s_enabled = false;
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/debug_util_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/debug_util_vk.hpp
@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+/// # class DebugUtil
+/// This is a companion utility to add debug information to an application
+/// See https://vulkan.lunarg.com/doc/sdk/1.1.114.0/windows/chunked_spec/chap39.html
+/// - User defined name to objects
+/// - Logically annotate region of command buffers
+/// - Scoped command buffer label to make thing simpler
+
+#pragma once
+
+#include <algorithm>
+#include <string.h>
+#include <string>
+#include <vulkan/vulkan_core.h>
+#include "nvh/nvprint.hpp"
+
+namespace nvvk {
+
+class DebugUtil
+{
+public:
+  DebugUtil() = default;
+  DebugUtil(VkDevice device)
+      : m_device(device)
+  {
+  }
+
+  static void setEnabled(bool state) { s_enabled = state; }
+
+  void setup(VkDevice device) { m_device = device; }
+
+  void setObjectName(const uint64_t object, const std::string& name, VkObjectType t)
+  {
+    if(s_enabled)
+    {
+      VkDebugUtilsObjectNameInfoEXT s{VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, nullptr, t, object, name.c_str()};
+      vkSetDebugUtilsObjectNameEXT(m_device, &s);
+    }
+  }
+
+  // clang-format off
+  void setObjectName(VkBuffer object, const std::string& name)                  { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_BUFFER); }
+  void setObjectName(VkBufferView object, const std::string& name)              { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_BUFFER_VIEW); }
+  void setObjectName(VkCommandBuffer object, const std::string& name)           { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_COMMAND_BUFFER ); }
+  void setObjectName(VkCommandPool object, const std::string& name)             { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_COMMAND_POOL ); }
+  void setObjectName(VkDescriptorPool object, const std::string& name)          { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_DESCRIPTOR_POOL); }
+  void setObjectName(VkDescriptorSet object, const std::string& name)           { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_DESCRIPTOR_SET); }
+  void setObjectName(VkDescriptorSetLayout object, const std::string& name)     { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT); }
+  void setObjectName(VkDevice object, const std::string& name)                  { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_DEVICE); }
+  void setObjectName(VkDeviceMemory object, const std::string& name)            { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_DEVICE_MEMORY); }
+  void setObjectName(VkFramebuffer object, const std::string& name)             { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_FRAMEBUFFER); }
+  void setObjectName(VkImage object, const std::string& name)                   { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_IMAGE); }
+  void setObjectName(VkImageView object, const std::string& name)               { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_IMAGE_VIEW); }
+  void setObjectName(VkPipeline object, const std::string& name)                { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_PIPELINE); }
+  void setObjectName(VkPipelineLayout object, const std::string& name)          { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_PIPELINE_LAYOUT); }
+  void setObjectName(VkQueryPool object, const std::string& name)               { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_QUERY_POOL); }
+  void setObjectName(VkQueue object, const std::string& name)                   { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_QUEUE); }
+  void setObjectName(VkRenderPass object, const std::string& name)              { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_RENDER_PASS); }
+  void setObjectName(VkSampler object, const std::string& name)                 { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_SAMPLER); }
+  void setObjectName(VkSemaphore object, const std::string& name)               { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_SEMAPHORE); }
+  void setObjectName(VkShaderModule object, const std::string& name)            { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_SHADER_MODULE); }
+  void setObjectName(VkSwapchainKHR object, const std::string& name)            { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_SWAPCHAIN_KHR); }
+
+#if VK_NV_ray_tracing
+  void setObjectName(VkAccelerationStructureNV object, const std::string& name)  { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV); }
+#endif
+#if VK_KHR_acceleration_structure
+  void setObjectName(VkAccelerationStructureKHR object, const std::string& name) { setObjectName((uint64_t)object, name, VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR); }
+#endif
+  // clang-format on
+
+  //
+  //---------------------------------------------------------------------------
+  //
+  void beginLabel(VkCommandBuffer cmdBuf, const std::string& label)
+  {
+    if(s_enabled)
+    {
+      VkDebugUtilsLabelEXT s{VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, nullptr, label.c_str(), {1.0f, 1.0f, 1.0f, 1.0f}};
+      vkCmdBeginDebugUtilsLabelEXT(cmdBuf, &s);
+    }
+  }
+  void endLabel(VkCommandBuffer cmdBuf)
+  {
+    if(s_enabled)
+    {
+      vkCmdEndDebugUtilsLabelEXT(cmdBuf);
+    }
+  }
+  void insertLabel(VkCommandBuffer cmdBuf, const std::string& label)
+  {
+    if(s_enabled)
+    {
+      VkDebugUtilsLabelEXT s{VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, nullptr, label.c_str(), {1.0f, 1.0f, 1.0f, 1.0f}};
+      vkCmdInsertDebugUtilsLabelEXT(cmdBuf, &s);
+    }
+  }
+  //
+  // Begin and End Command Label MUST be balanced, this helps as it will always close the opened label
+  //
+  struct ScopedCmdLabel
+  {
+    ScopedCmdLabel(VkCommandBuffer cmdBuf, const std::string& label)
+        : m_cmdBuf(cmdBuf)
+    {
+      if(s_enabled)
+      {
+        VkDebugUtilsLabelEXT s{VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, nullptr, label.c_str(), {1.0f, 1.0f, 1.0f, 1.0f}};
+        vkCmdBeginDebugUtilsLabelEXT(cmdBuf, &s);
+      }
+    }
+    ~ScopedCmdLabel()
+    {
+      if(s_enabled)
+      {
+        vkCmdEndDebugUtilsLabelEXT(m_cmdBuf);
+      }
+    }
+    void setLabel(const std::string& label)
+    {
+      if(s_enabled)
+      {
+        VkDebugUtilsLabelEXT s{VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT, nullptr, label.c_str(), {1.0f, 1.0f, 1.0f, 1.0f}};
+        vkCmdInsertDebugUtilsLabelEXT(m_cmdBuf, &s);
+      }
+    }
+
+  private:
+    VkCommandBuffer m_cmdBuf;
+  };
+
+  ScopedCmdLabel scopeLabel(VkCommandBuffer cmdBuf, const std::string& label) { return ScopedCmdLabel(cmdBuf, label); }
+
+private:
+  VkDevice    m_device{VK_NULL_HANDLE};
+  static bool s_enabled;
+};
+
+//////////////////////////////////////////////////////////////////////////
+/// Macros to help automatically naming variables.
+/// Names will be in the form of MyClass::m_myBuffer (in example.cpp:123)
+///
+/// To use:
+/// - Debug member class MUST be named 'm_debug'
+/// - Individual name: NAME_VK(m_myBuffer.buffer) or with and index NAME_IDX_VK(m_texture.image, i)
+/// - Create/associate and name, instead of
+///     pipeline = createPipeline();
+///     NAME_VK(pipeline)
+///   call
+///     CREATE_NAMED_VK(pipeline , createPipeline());
+/// - Scope functions can also be automatically named, at the beginning of a function
+///   call LABEL_SCOPE_VK( commandBuffer )
+///
+///
+// clang-format off
+inline const char* fileNameSplitter(const char* n) { return std::max<const char*>(n, std::max(strrchr(n, '\\') + 1, strrchr(n, '/') + 1)); }
+inline const char* upToLastSpace(const char* n) { return std::max<const char*>(n, strrchr(n, ' ') + 1); }
+#define CLASS_NAME nvvk::upToLastSpace(typeid(*this).name())
+#define NAME_FILE_LOCATION  std::string(" in ") + std::string(nvvk::fileNameSplitter(__FILE__)) + std::string(":" S__LINE__ ")")
+
+// Individual naming
+#define NAME_VK(_x) m_debug.setObjectName(_x, (std::string(CLASS_NAME) + std::string("::") + std::string(#_x " (") + NAME_FILE_LOCATION).c_str())
+#define NAME2_VK(_x, _s) m_debug.setObjectName(_x, (std::string(_s) + std::string(" (" #_x) + NAME_FILE_LOCATION).c_str())
+#define NAME_IDX_VK(_x, _i) m_debug.setObjectName(_x, \
+                            (std::string(CLASS_NAME) + std::string("::") + std::string(#_x " (" #_i "=") + std::to_string(_i) + std::string(", ") + NAME_FILE_LOCATION).c_str())
+
+// Name in creation
+#define CREATE_NAMED_VK(_x, _c)              \
+  _x = _c;                                   \
+  NAME_VK(_x);
+#define CREATE_NAMED_IDX_VK(_x, _i, _c)      \
+  _x = _c;                                   \
+  NAME_IDX_VK(_x, _i);
+
+// Running scope
+#define LABEL_SCOPE_VK(_cmd)                                                                                                                \
+  auto _scopeLabel =  m_debug.scopeLabel(_cmd, std::string(CLASS_NAME) + std::string("::") + std::string(__func__) + std::string(", in ")   \
+                                   + std::string(nvvk::fileNameSplitter(__FILE__)) + std::string(":" S__LINE__ ")"))
+
+
+// Non-defined named variable of the above macros (Ex: m_myDbg->DBG_NAME(vulan_obj); )
+#define DBG_NAME(_x)                                                                                                   \
+  setObjectName(_x, (std::string(CLASS_NAME) + std::string("::") + std::string(#_x " (") + NAME_FILE_LOCATION).c_str())
+#define DBG_NAME_IDX(_x, _i)                                                                                           \
+  setObjectName(_x, (std::string(CLASS_NAME) + std::string("::") + std::string(#_x " (" #_i "=") + std::to_string(_i)  \
+                     + std::string(", ") + NAME_FILE_LOCATION)                                                         \
+                        .c_str())
+#define DBG_SCOPE(_cmd)                                                                                                \
+  scopeLabel(_cmd, std::string(CLASS_NAME) + std::string("::") + std::string(__func__) + std::string(", in ")          \
+                       + std::string(nvvk::fileNameSplitter(__FILE__)) + std::string(":" S__LINE__ ")"))
+
+
+// clang-format on
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/descriptorsets_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/descriptorsets_vk.cpp
@ -0,0 +1,467 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include "descriptorsets_vk.hpp"
+
+namespace nvvk {
+
+//////////////////////////////////////////////////////////////////////////
+
+void DescriptorSetContainer::init(VkDevice device)
+{
+  assert(m_device == VK_NULL_HANDLE);
+  m_device = device;
+}
+
+void DescriptorSetContainer::setBindings(const std::vector<VkDescriptorSetLayoutBinding>& bindings)
+{
+  m_bindings.setBindings(bindings);
+}
+
+void DescriptorSetContainer::addBinding(uint32_t           binding,
+                                        VkDescriptorType   descriptorType,
+                                        uint32_t           descriptorCount,
+                                        VkShaderStageFlags stageFlags,
+                                        const VkSampler*   pImmutableSamplers /*= nullptr*/)
+{
+  m_bindings.addBinding(binding, descriptorType, descriptorCount, stageFlags, pImmutableSamplers);
+}
+
+void DescriptorSetContainer::addBinding(VkDescriptorSetLayoutBinding binding)
+{
+  m_bindings.addBinding(binding);
+}
+
+void DescriptorSetContainer::setBindingFlags(uint32_t binding, VkDescriptorBindingFlags bindingFlag)
+{
+  m_bindings.setBindingFlags(binding, bindingFlag);
+}
+
+VkDescriptorSetLayout DescriptorSetContainer::initLayout(VkDescriptorSetLayoutCreateFlags flags /*= 0*/, DescriptorSupport supportFlags)
+{
+  assert(m_layout == VK_NULL_HANDLE);
+
+  m_layout = m_bindings.createLayout(m_device, flags, supportFlags);
+  return m_layout;
+}
+
+VkDescriptorPool DescriptorSetContainer::initPool(uint32_t numAllocatedSets)
+{
+  assert(m_pool == VK_NULL_HANDLE);
+  assert(m_layout);
+
+  m_pool = m_bindings.createPool(m_device, numAllocatedSets);
+  allocateDescriptorSets(m_device, m_pool, m_layout, numAllocatedSets, m_descriptorSets);
+  return m_pool;
+}
+
+VkPipelineLayout DescriptorSetContainer::initPipeLayout(uint32_t                    numRanges /*= 0*/,
+                                                        const VkPushConstantRange*  ranges /*= nullptr*/,
+                                                        VkPipelineLayoutCreateFlags flags /*= 0*/)
+{
+  assert(m_pipelineLayout == VK_NULL_HANDLE);
+  assert(m_layout);
+
+  VkResult                   result;
+  VkPipelineLayoutCreateInfo layoutCreateInfo = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
+  layoutCreateInfo.setLayoutCount             = 1;
+  layoutCreateInfo.pSetLayouts                = &m_layout;
+  layoutCreateInfo.pushConstantRangeCount     = numRanges;
+  layoutCreateInfo.pPushConstantRanges        = ranges;
+  layoutCreateInfo.flags                      = flags;
+
+  result = vkCreatePipelineLayout(m_device, &layoutCreateInfo, nullptr, &m_pipelineLayout);
+  assert(result == VK_SUCCESS);
+  return m_pipelineLayout;
+}
+
+void DescriptorSetContainer::deinitPool()
+{
+  if(!m_descriptorSets.empty())
+  {
+    m_descriptorSets.clear();
+  }
+
+  if(m_pool)
+  {
+    vkDestroyDescriptorPool(m_device, m_pool, nullptr);
+    m_pool = VK_NULL_HANDLE;
+  }
+}
+
+void DescriptorSetContainer::deinitLayout()
+{
+  if(m_pipelineLayout)
+  {
+    vkDestroyPipelineLayout(m_device, m_pipelineLayout, nullptr);
+    m_pipelineLayout = VK_NULL_HANDLE;
+  }
+
+  if(m_layout)
+  {
+    vkDestroyDescriptorSetLayout(m_device, m_layout, nullptr);
+    m_layout = VK_NULL_HANDLE;
+  }
+}
+
+void DescriptorSetContainer::deinit()
+{
+  deinitLayout();
+  deinitPool();
+  m_bindings.clear();
+  m_device = VK_NULL_HANDLE;
+}
+
+VkDescriptorSet DescriptorSetContainer::getSet(uint32_t dstSetIdx /*= 0*/) const
+{
+  if(m_descriptorSets.empty())
+  {
+    return {};
+  }
+
+  return m_descriptorSets[dstSetIdx];
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+VkDescriptorSetLayout DescriptorSetBindings::createLayout(VkDevice device, VkDescriptorSetLayoutCreateFlags flags, DescriptorSupport supportFlags)
+{
+  VkResult                                    result;
+  VkDescriptorSetLayoutBindingFlagsCreateInfo bindingsInfo = {
+      isSet(supportFlags, DescriptorSupport::CORE_1_2) ? VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO :
+                                                         VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT};
+
+  // Pad binding flags to match bindings if any exist
+  if(!m_bindingFlags.empty() && m_bindingFlags.size() <= m_bindings.size())
+  {
+    m_bindingFlags.resize(m_bindings.size(), 0);
+  }
+
+  bindingsInfo.bindingCount  = uint32_t(m_bindingFlags.size());
+  bindingsInfo.pBindingFlags = m_bindingFlags.data();
+
+  VkDescriptorSetLayoutCreateInfo createInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
+  createInfo.bindingCount                    = uint32_t(m_bindings.size());
+  createInfo.pBindings                       = m_bindings.data();
+  createInfo.flags                           = flags;
+  createInfo.pNext =
+      m_bindingFlags.empty() && !(isAnySet(supportFlags, (DescriptorSupport::CORE_1_2 | DescriptorSupport::INDEXING_EXT))) ?
+          nullptr :
+          &bindingsInfo;
+
+  VkDescriptorSetLayout descriptorSetLayout;
+  result = vkCreateDescriptorSetLayout(device, &createInfo, nullptr, &descriptorSetLayout);
+  assert(result == VK_SUCCESS);
+
+  return descriptorSetLayout;
+}
+
+void DescriptorSetBindings::addRequiredPoolSizes(std::vector<VkDescriptorPoolSize>& poolSizes, uint32_t numSets) const
+{
+  for(auto it = m_bindings.cbegin(); it != m_bindings.cend(); ++it)
+  {
+    // Bindings can have a zero descriptor count, used for the layout, but don't reserve storage for them.
+    if(it->descriptorCount == 0)
+    {
+      continue;
+    }
+
+    bool found = false;
+    for(auto itpool = poolSizes.begin(); itpool != poolSizes.end(); ++itpool)
+    {
+      if(itpool->type == it->descriptorType)
+      {
+        itpool->descriptorCount += it->descriptorCount * numSets;
+        found = true;
+        break;
+      }
+    }
+    if(!found)
+    {
+      VkDescriptorPoolSize poolSize{};
+      poolSize.type            = it->descriptorType;
+      poolSize.descriptorCount = it->descriptorCount * numSets;
+      poolSizes.push_back(poolSize);
+    }
+  }
+}
+
+VkDescriptorPool DescriptorSetBindings::createPool(VkDevice device, uint32_t maxSets /*= 1*/, VkDescriptorPoolCreateFlags flags /*= 0*/) const
+{
+  VkResult result;
+
+  // setup poolsizes for each descriptorType
+  std::vector<VkDescriptorPoolSize> poolSizes;
+  addRequiredPoolSizes(poolSizes, maxSets);
+
+  VkDescriptorPool           descrPool;
+  VkDescriptorPoolCreateInfo descrPoolInfo = {};
+  descrPoolInfo.sType                      = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
+  descrPoolInfo.pNext                      = nullptr;
+  descrPoolInfo.maxSets                    = maxSets;
+  descrPoolInfo.poolSizeCount              = uint32_t(poolSizes.size());
+  descrPoolInfo.pPoolSizes                 = poolSizes.data();
+  descrPoolInfo.flags                      = flags;
+
+  // scene pool
+  result = vkCreateDescriptorPool(device, &descrPoolInfo, nullptr, &descrPool);
+  assert(result == VK_SUCCESS);
+  return descrPool;
+}
+
+
+void DescriptorSetBindings::setBindingFlags(uint32_t binding, VkDescriptorBindingFlags bindingFlag)
+{
+  for(size_t i = 0; i < m_bindings.size(); i++)
+  {
+    if(m_bindings[i].binding == binding)
+    {
+      if(m_bindingFlags.size() <= m_bindings.size())
+      {
+        m_bindingFlags.resize(m_bindings.size(), 0);
+      }
+      m_bindingFlags[i] = bindingFlag;
+      return;
+    }
+  }
+  assert(0 && "binding not found");
+}
+
+VkDescriptorType DescriptorSetBindings::getType(uint32_t binding) const
+{
+  for(size_t i = 0; i < m_bindings.size(); i++)
+  {
+    if(m_bindings[i].binding == binding)
+    {
+      return m_bindings[i].descriptorType;
+    }
+  }
+  assert(0 && "binding not found");
+  return VK_DESCRIPTOR_TYPE_MAX_ENUM;
+}
+
+uint32_t DescriptorSetBindings::getCount(uint32_t binding) const
+{
+  for(size_t i = 0; i < m_bindings.size(); i++)
+  {
+    if(m_bindings[i].binding == binding)
+    {
+      return m_bindings[i].descriptorCount;
+    }
+  }
+  assert(0 && "binding not found");
+  return ~0;
+}
+
+VkWriteDescriptorSet DescriptorSetBindings::makeWrite(VkDescriptorSet dstSet, uint32_t dstBinding, uint32_t arrayElement) const
+{
+  VkWriteDescriptorSet writeSet = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
+  writeSet.descriptorType       = VK_DESCRIPTOR_TYPE_MAX_ENUM;
+  for(size_t i = 0; i < m_bindings.size(); i++)
+  {
+    if(m_bindings[i].binding == dstBinding)
+    {
+      writeSet.descriptorCount = 1;
+      writeSet.descriptorType  = m_bindings[i].descriptorType;
+      writeSet.dstBinding      = dstBinding;
+      writeSet.dstSet          = dstSet;
+      writeSet.dstArrayElement = arrayElement;
+      return writeSet;
+    }
+  }
+  assert(0 && "binding not found");
+  return writeSet;
+}
+
+VkWriteDescriptorSet DescriptorSetBindings::makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding) const
+{
+  VkWriteDescriptorSet writeSet = {VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
+  writeSet.descriptorType       = VK_DESCRIPTOR_TYPE_MAX_ENUM;
+  for(size_t i = 0; i < m_bindings.size(); i++)
+  {
+    if(m_bindings[i].binding == dstBinding)
+    {
+      writeSet.descriptorCount = m_bindings[i].descriptorCount;
+      writeSet.descriptorType  = m_bindings[i].descriptorType;
+      writeSet.dstBinding      = dstBinding;
+      writeSet.dstSet          = dstSet;
+      writeSet.dstArrayElement = 0;
+      return writeSet;
+    }
+  }
+  assert(0 && "binding not found");
+  return writeSet;
+}
+
+VkWriteDescriptorSet DescriptorSetBindings::makeWrite(VkDescriptorSet              dstSet,
+                                                      uint32_t                     dstBinding,
+                                                      const VkDescriptorImageInfo* pImageInfo,
+                                                      uint32_t                     arrayElement) const
+{
+  VkWriteDescriptorSet writeSet = makeWrite(dstSet, dstBinding, arrayElement);
+  assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER
+         || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
+         || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT);
+
+  writeSet.pImageInfo = pImageInfo;
+  return writeSet;
+}
+
+VkWriteDescriptorSet DescriptorSetBindings::makeWrite(VkDescriptorSet               dstSet,
+                                                      uint32_t                      dstBinding,
+                                                      const VkDescriptorBufferInfo* pBufferInfo,
+                                                      uint32_t                      arrayElement) const
+{
+  VkWriteDescriptorSet writeSet = makeWrite(dstSet, dstBinding, arrayElement);
+  assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
+         || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER
+         || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
+
+  writeSet.pBufferInfo = pBufferInfo;
+  return writeSet;
+}
+
+VkWriteDescriptorSet DescriptorSetBindings::makeWrite(VkDescriptorSet     dstSet,
+                                                      uint32_t            dstBinding,
+                                                      const VkBufferView* pTexelBufferView,
+                                                      uint32_t            arrayElement) const
+{
+  VkWriteDescriptorSet writeSet = makeWrite(dstSet, dstBinding, arrayElement);
+  assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER
+         || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
+
+  writeSet.pTexelBufferView = pTexelBufferView;
+  return writeSet;
+}
+
+#if VK_NV_ray_tracing
+VkWriteDescriptorSet DescriptorSetBindings::makeWrite(VkDescriptorSet                                    dstSet,
+                                                      uint32_t                                           dstBinding,
+                                                      const VkWriteDescriptorSetAccelerationStructureNV* pAccel,
+                                                      uint32_t arrayElement) const
+{
+  VkWriteDescriptorSet writeSet = makeWrite(dstSet, dstBinding, arrayElement);
+  assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV);
+
+  writeSet.pNext = pAccel;
+  return writeSet;
+}
+#endif
+#if VK_KHR_acceleration_structure
+VkWriteDescriptorSet DescriptorSetBindings::makeWrite(VkDescriptorSet                                     dstSet,
+                                                      uint32_t                                            dstBinding,
+                                                      const VkWriteDescriptorSetAccelerationStructureKHR* pAccel,
+                                                      uint32_t arrayElement) const
+{
+  VkWriteDescriptorSet writeSet = makeWrite(dstSet, dstBinding, arrayElement);
+  assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
+
+  writeSet.pNext = pAccel;
+  return writeSet;
+}
+#endif
+
+#if VK_EXT_inline_uniform_block
+VkWriteDescriptorSet DescriptorSetBindings::makeWrite(VkDescriptorSet                                  dstSet,
+                                                      uint32_t                                         dstBinding,
+                                                      const VkWriteDescriptorSetInlineUniformBlockEXT* pInline,
+                                                      uint32_t arrayElement) const
+{
+  VkWriteDescriptorSet writeSet = makeWrite(dstSet, dstBinding, arrayElement);
+  assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT);
+
+  writeSet.pNext = pInline;
+  return writeSet;
+}
+#endif
+VkWriteDescriptorSet DescriptorSetBindings::makeWriteArray(VkDescriptorSet              dstSet,
+                                                           uint32_t                     dstBinding,
+                                                           const VkDescriptorImageInfo* pImageInfo) const
+{
+  VkWriteDescriptorSet writeSet = makeWriteArray(dstSet, dstBinding);
+  assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER
+         || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE
+         || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT);
+
+  writeSet.pImageInfo = pImageInfo;
+  assert(writeSet.descriptorCount > 0);  // Can have a zero descriptors in the descriptorset layout, but can't write zero items.
+  return writeSet;
+}
+
+VkWriteDescriptorSet DescriptorSetBindings::makeWriteArray(VkDescriptorSet               dstSet,
+                                                           uint32_t                      dstBinding,
+                                                           const VkDescriptorBufferInfo* pBufferInfo) const
+{
+  VkWriteDescriptorSet writeSet = makeWriteArray(dstSet, dstBinding);
+  assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
+         || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER
+         || writeSet.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
+
+  writeSet.pBufferInfo = pBufferInfo;
+  return writeSet;
+}
+
+VkWriteDescriptorSet DescriptorSetBindings::makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding, const VkBufferView* pTexelBufferView) const
+{
+  VkWriteDescriptorSet writeSet = makeWriteArray(dstSet, dstBinding);
+  assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
+
+  writeSet.pTexelBufferView = pTexelBufferView;
+  return writeSet;
+}
+
+#if VK_NV_ray_tracing
+VkWriteDescriptorSet DescriptorSetBindings::makeWriteArray(VkDescriptorSet dstSet,
+                                                           uint32_t        dstBinding,
+                                                           const VkWriteDescriptorSetAccelerationStructureNV* pAccel) const
+{
+  VkWriteDescriptorSet writeSet = makeWriteArray(dstSet, dstBinding);
+  assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV);
+
+  writeSet.pNext = pAccel;
+  return writeSet;
+}
+#endif
+#if VK_KHR_acceleration_structure
+VkWriteDescriptorSet DescriptorSetBindings::makeWriteArray(VkDescriptorSet dstSet,
+                                                           uint32_t        dstBinding,
+                                                           const VkWriteDescriptorSetAccelerationStructureKHR* pAccel) const
+{
+  VkWriteDescriptorSet writeSet = makeWriteArray(dstSet, dstBinding);
+  assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR);
+
+  writeSet.pNext = pAccel;
+  return writeSet;
+}
+#endif
+#if VK_EXT_inline_uniform_block
+VkWriteDescriptorSet DescriptorSetBindings::makeWriteArray(VkDescriptorSet dstSet,
+                                                           uint32_t        dstBinding,
+                                                           const VkWriteDescriptorSetInlineUniformBlockEXT* pInline) const
+{
+  VkWriteDescriptorSet writeSet = makeWriteArray(dstSet, dstBinding);
+  assert(writeSet.descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT);
+
+  writeSet.pNext = pInline;
+  return writeSet;
+}
+#endif
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/descriptorsets_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/descriptorsets_vk.hpp
@ -0,0 +1,653 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#pragma once
+
+#include <assert.h>
+#include <platform.h>
+#include <vector>
+#include <vulkan/vulkan_core.h>
+
+namespace nvvk {
+
+
+// utility for additional feature support
+enum class DescriptorSupport : uint32_t
+{
+  CORE_1_0     = 0,  // VK Version 1.0
+  CORE_1_2     = 1,  // VK Version 1.2 (adds descriptor_indexing)
+  INDEXING_EXT = 2,  // VK_EXT_descriptor_indexing
+};
+using DescriptorSupport_t = std::underlying_type_t<DescriptorSupport>;
+inline DescriptorSupport operator|(DescriptorSupport lhs, DescriptorSupport rhs)
+{
+  return static_cast<DescriptorSupport>(static_cast<DescriptorSupport_t>(lhs) | static_cast<DescriptorSupport_t>(rhs));
+}
+inline DescriptorSupport operator&(DescriptorSupport lhs, DescriptorSupport rhs)
+{
+  return static_cast<DescriptorSupport>(static_cast<DescriptorSupport_t>(lhs) & static_cast<DescriptorSupport_t>(rhs));
+}
+inline bool isSet(DescriptorSupport test, DescriptorSupport query)
+{
+  return (test & query) == query;
+}
+inline bool isAnySet(DescriptorSupport test, DescriptorSupport query)
+{
+  return (test & query) != DescriptorSupport::CORE_1_0;
+}
+
+/** @DOC_START
+  # functions in nvvk
+
+  - createDescriptorPool : wrappers for vkCreateDescriptorPool
+  - allocateDescriptorSet : allocates a single VkDescriptorSet
+  - allocateDescriptorSets : allocates multiple VkDescriptorSets
+
+@DOC_END */
+
+inline VkDescriptorPool createDescriptorPool(VkDevice device, size_t poolSizeCount, const VkDescriptorPoolSize* poolSizes, uint32_t maxSets)
+{
+  VkResult result;
+
+  VkDescriptorPool           descrPool;
+  VkDescriptorPoolCreateInfo descrPoolInfo = {};
+  descrPoolInfo.sType                      = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
+  descrPoolInfo.pNext                      = nullptr;
+  descrPoolInfo.maxSets                    = maxSets;
+  descrPoolInfo.poolSizeCount              = uint32_t(poolSizeCount);
+  descrPoolInfo.pPoolSizes                 = poolSizes;
+
+  // scene pool
+  result = vkCreateDescriptorPool(device, &descrPoolInfo, nullptr, &descrPool);
+  assert(result == VK_SUCCESS);
+  return descrPool;
+}
+
+inline VkDescriptorPool createDescriptorPool(VkDevice device, const std::vector<VkDescriptorPoolSize>& poolSizes, uint32_t maxSets)
+{
+  return createDescriptorPool(device, poolSizes.size(), poolSizes.data(), maxSets);
+}
+
+
+inline VkDescriptorSet allocateDescriptorSet(VkDevice device, VkDescriptorPool pool, VkDescriptorSetLayout layout)
+{
+  VkResult                    result;
+  VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
+  allocInfo.descriptorPool              = pool;
+  allocInfo.descriptorSetCount          = 1;
+  allocInfo.pSetLayouts                 = &layout;
+
+  VkDescriptorSet set;
+  result = vkAllocateDescriptorSets(device, &allocInfo, &set);
+  assert(result == VK_SUCCESS);
+  return set;
+}
+
+inline void allocateDescriptorSets(VkDevice                      device,
+                                   VkDescriptorPool              pool,
+                                   VkDescriptorSetLayout         layout,
+                                   uint32_t                      count,
+                                   std::vector<VkDescriptorSet>& sets)
+{
+  sets.resize(count);
+  std::vector<VkDescriptorSetLayout> layouts(count, layout);
+
+  VkResult                    result;
+  VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
+  allocInfo.descriptorPool              = pool;
+  allocInfo.descriptorSetCount          = count;
+  allocInfo.pSetLayouts                 = layouts.data();
+
+  result = vkAllocateDescriptorSets(device, &allocInfo, sets.data());
+  assert(result == VK_SUCCESS);
+}
+
+/////////////////////////////////////////////////////////////////////////////
+/** @DOC_START
+  # class nvvk::DescriptorSetBindings
+
+  nvvk::DescriptorSetBindings is a helper class that keeps a vector of `VkDescriptorSetLayoutBinding` for a single
+  `VkDescriptorSetLayout`. Provides helper functions to create `VkDescriptorSetLayout`
+  as well as `VkDescriptorPool` based on this information, as well as utilities
+  to fill the `VkWriteDescriptorSet` structure with binding information stored
+  within the class.
+
+  The class comes with the convenience functionality that when you make a
+  VkWriteDescriptorSet you provide the binding slot, rather than the
+  index of the binding's storage within this class. This results in a small
+  linear search, but makes it easy to change the content/order of bindings
+  at creation time.
+
+  Example :
+  ```cpp
+  DescriptorSetBindings binds;
+
+  binds.addBinding( VIEW_BINDING, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1, VK_SHADER_STAGE_VERTEX_BIT);
+  binds.addBinding(XFORM_BINDING, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_VERTEX_BIT);
+
+  VkDescriptorSetLayout layout = binds.createLayout(device);
+
+  #if SINGLE_LAYOUT_POOL
+    // let's create a pool with 2 sets
+    VkDescriptorPool      pool   = binds.createPool(device, 2);
+  #else
+    // if you want to combine multiple layouts into a common pool
+    std::vector<VkDescriptorPoolSize> poolSizes;
+    bindsA.addRequiredPoolSizes(poolSizes, numSetsA);
+    bindsB.addRequiredPoolSizes(poolSizes, numSetsB);
+    VkDescriptorPool      pool   = nvvk::createDescriptorPool(device, poolSizes,
+                                                              numSetsA + numSetsB);
+  #endif
+
+  // fill them
+  std::vector<VkWriteDescriptorSet> updates;
+
+  updates.push_back(binds.makeWrite(0, VIEW_BINDING, &view0BufferInfo));
+  updates.push_back(binds.makeWrite(1, VIEW_BINDING, &view1BufferInfo));
+  updates.push_back(binds.makeWrite(0, XFORM_BINDING, &xform0BufferInfo));
+  updates.push_back(binds.makeWrite(1, XFORM_BINDING, &xform1BufferInfo));
+
+  vkUpdateDescriptorSets(device, updates.size(), updates.data(), 0, nullptr);
+  ```
+@DOC_END */
+
+class DescriptorSetBindings
+{
+public:
+  DescriptorSetBindings() = default;
+  DescriptorSetBindings(const std::vector<VkDescriptorSetLayoutBinding>& bindings)
+      : m_bindings(bindings)
+  {
+  }
+
+  // Add a binding to the descriptor set
+  void addBinding(uint32_t binding,          // Slot to which the descriptor will be bound, corresponding to the layout
+                                             // binding index in the shader
+                  VkDescriptorType   type,   // Type of the bound descriptor(s)
+                  uint32_t           count,  // Number of descriptors
+                  VkShaderStageFlags stageFlags,  // Shader stages at which the bound resources will be available
+                  const VkSampler*   pImmutableSampler = nullptr  // Corresponding sampler, in case of textures
+  )
+  {
+    m_bindings.push_back({binding, type, count, stageFlags, pImmutableSampler});
+  }
+
+  void addBinding(const VkDescriptorSetLayoutBinding& layoutBinding) { m_bindings.emplace_back(layoutBinding); }
+
+  void setBindings(const std::vector<VkDescriptorSetLayoutBinding>& bindings) { m_bindings = bindings; }
+  // requires use of SUPPORT_INDEXING_EXT/SUPPORT_INDEXING_V1_2 on createLayout
+  void setBindingFlags(uint32_t binding, VkDescriptorBindingFlags bindingFlags);
+
+  void clear()
+  {
+    m_bindings.clear();
+    m_bindingFlags.clear();
+  }
+  bool                                empty() const { return m_bindings.empty(); }
+  size_t                              size() const { return m_bindings.size(); }
+  const VkDescriptorSetLayoutBinding* data() const { return m_bindings.data(); }
+
+  VkDescriptorType getType(uint32_t binding) const;
+  uint32_t         getCount(uint32_t binding) const;
+
+
+  // Once the bindings have been added, this generates the descriptor layout corresponding to the
+  // bound resources.
+  VkDescriptorSetLayout createLayout(VkDevice                         device,
+                                     VkDescriptorSetLayoutCreateFlags flags        = 0,
+                                     DescriptorSupport                supportFlags = DescriptorSupport::CORE_1_0);
+
+  // Once the bindings have been added, this generates the descriptor pool with enough space to
+  // handle all the bound resources and allocate up to maxSets descriptor sets
+  VkDescriptorPool createPool(VkDevice device, uint32_t maxSets = 1, VkDescriptorPoolCreateFlags flags = 0) const;
+
+  // appends the required poolsizes for N sets
+  void addRequiredPoolSizes(std::vector<VkDescriptorPoolSize>& poolSizes, uint32_t numSets) const;
+
+  // provide single element
+  VkWriteDescriptorSet makeWrite(VkDescriptorSet dstSet, uint32_t dstBinding, uint32_t arrayElement = 0) const;
+  VkWriteDescriptorSet makeWrite(VkDescriptorSet              dstSet,
+                                 uint32_t                     dstBinding,
+                                 const VkDescriptorImageInfo* pImageInfo,
+                                 uint32_t                     arrayElement = 0) const;
+  VkWriteDescriptorSet makeWrite(VkDescriptorSet               dstSet,
+                                 uint32_t                      dstBinding,
+                                 const VkDescriptorBufferInfo* pBufferInfo,
+                                 uint32_t                      arrayElement = 0) const;
+  VkWriteDescriptorSet makeWrite(VkDescriptorSet     dstSet,
+                                 uint32_t            dstBinding,
+                                 const VkBufferView* pTexelBufferView,
+                                 uint32_t            arrayElement = 0) const;
+#if VK_NV_ray_tracing
+  VkWriteDescriptorSet makeWrite(VkDescriptorSet                                    dstSet,
+                                 uint32_t                                           dstBinding,
+                                 const VkWriteDescriptorSetAccelerationStructureNV* pAccel,
+                                 uint32_t                                           arrayElement = 0) const;
+#endif
+#if VK_KHR_acceleration_structure
+  VkWriteDescriptorSet makeWrite(VkDescriptorSet                                     dstSet,
+                                 uint32_t                                            dstBinding,
+                                 const VkWriteDescriptorSetAccelerationStructureKHR* pAccel,
+                                 uint32_t                                            arrayElement = 0) const;
+#endif
+#if VK_EXT_inline_uniform_block
+  VkWriteDescriptorSet makeWrite(VkDescriptorSet                                  dstSet,
+                                 uint32_t                                         dstBinding,
+                                 const VkWriteDescriptorSetInlineUniformBlockEXT* pInlineUniform,
+                                 uint32_t                                         arrayElement = 0) const;
+#endif
+  // provide full array
+  VkWriteDescriptorSet makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding) const;
+  VkWriteDescriptorSet makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding, const VkDescriptorImageInfo* pImageInfo) const;
+  VkWriteDescriptorSet makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding, const VkDescriptorBufferInfo* pBufferInfo) const;
+  VkWriteDescriptorSet makeWriteArray(VkDescriptorSet dstSet, uint32_t dstBinding, const VkBufferView* pTexelBufferView) const;
+#if VK_NV_ray_tracing
+  VkWriteDescriptorSet makeWriteArray(VkDescriptorSet                                    dstSet,
+                                      uint32_t                                           dstBinding,
+                                      const VkWriteDescriptorSetAccelerationStructureNV* pAccel) const;
+#endif
+#if VK_KHR_acceleration_structure
+  VkWriteDescriptorSet makeWriteArray(VkDescriptorSet                                     dstSet,
+                                      uint32_t                                            dstBinding,
+                                      const VkWriteDescriptorSetAccelerationStructureKHR* pAccel) const;
+#endif
+#if VK_EXT_inline_uniform_block
+  VkWriteDescriptorSet makeWriteArray(VkDescriptorSet                                  dstSet,
+                                      uint32_t                                         dstBinding,
+                                      const VkWriteDescriptorSetInlineUniformBlockEXT* pInline) const;
+#endif
+
+protected:
+  std::vector<VkDescriptorSetLayoutBinding> m_bindings;
+  std::vector<VkDescriptorBindingFlags>     m_bindingFlags;
+};
+
+/////////////////////////////////////////////////////////////
+/** @DOC_START
+# class nvvk::DescriptorSetContainer
+
+nvvk::DescriptorSetContainer is a container class that stores allocated DescriptorSets
+as well as reflection, layout and pool for a single
+VkDescripterSetLayout.
+
+Example:
+```cpp
+    container.init(device, allocator);
+
+    // setup dset layouts
+    container.addBinding(0, UBO...)
+    container.addBinding(1, SSBO...)
+    container.initLayout();
+
+    // allocate descriptorsets
+    container.initPool(17);
+
+    // update descriptorsets
+    writeUpdates.push_back( container.makeWrite(0, 0, &..) );
+    writeUpdates.push_back( container.makeWrite(0, 1, &..) );
+    writeUpdates.push_back( container.makeWrite(1, 0, &..) );
+    writeUpdates.push_back( container.makeWrite(1, 1, &..) );
+    writeUpdates.push_back( container.makeWrite(2, 0, &..) );
+    writeUpdates.push_back( container.makeWrite(2, 1, &..) );
+    ...
+
+    // at render time
+
+    vkCmdBindDescriptorSets(cmd, GRAPHICS, pipeLayout, 1, 1, container.at(7).getSets());
+```
+
+@DOC_END */
+class DescriptorSetContainer
+{
+public:
+  DescriptorSetContainer(DescriptorSetContainer const&)            = delete;
+  DescriptorSetContainer& operator=(DescriptorSetContainer const&) = delete;
+
+  DescriptorSetContainer() {}
+  DescriptorSetContainer(VkDevice device) { init(device); }
+  void init(VkDevice device);
+
+  ~DescriptorSetContainer() { deinit(); }
+
+  void setBindings(const std::vector<VkDescriptorSetLayoutBinding>& bindings);
+  void addBinding(VkDescriptorSetLayoutBinding layoutBinding);
+  void addBinding(uint32_t           binding,
+                  VkDescriptorType   descriptorType,
+                  uint32_t           descriptorCount,
+                  VkShaderStageFlags stageFlags,
+                  const VkSampler*   pImmutableSamplers = nullptr);
+
+  // requires use of SUPPORT_INDEXING_EXT/SUPPORT_INDEXING_V1_2 on initLayout
+  void setBindingFlags(uint32_t binding, VkDescriptorBindingFlags bindingFlags);
+
+  VkDescriptorSetLayout initLayout(VkDescriptorSetLayoutCreateFlags flags        = 0,
+                                   DescriptorSupport                supportFlags = DescriptorSupport::CORE_1_0);
+
+  // inits pool and immediately allocates all numSets-many DescriptorSets
+  VkDescriptorPool initPool(uint32_t numAllocatedSets);
+
+  // optionally generates a pipelinelayout for the descriptorsetlayout
+  VkPipelineLayout initPipeLayout(uint32_t                    numRanges = 0,
+                                  const VkPushConstantRange*  ranges    = nullptr,
+                                  VkPipelineLayoutCreateFlags flags     = 0);
+
+  void deinitPool();
+  void deinitLayout();
+  void deinit();
+
+  //////////////////////////////////////////////////////////////////////////
+  VkDescriptorSet        getSet(uint32_t dstSetIdx = 0) const;
+  const VkDescriptorSet* getSets(uint32_t dstSetIdx = 0) const { return m_descriptorSets.data() + dstSetIdx; }
+  uint32_t               getSetsCount() const { return static_cast<uint32_t>(m_descriptorSets.size()); }
+
+  const VkDescriptorSetLayout& getLayout() const { return m_layout; }
+  const VkPipelineLayout&      getPipeLayout() const { return m_pipelineLayout; }
+  const DescriptorSetBindings& getBindings() const { return m_bindings; }
+  VkDevice                     getDevice() const { return m_device; }
+
+  //////////////////////////////////////////////////////////////////////////
+
+  // provide single element
+  VkWriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding, const VkDescriptorImageInfo* pImageInfo, uint32_t arrayElement = 0) const
+  {
+    return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, pImageInfo, arrayElement);
+  }
+  VkWriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding, const VkDescriptorBufferInfo* pBufferInfo, uint32_t arrayElement = 0) const
+  {
+    return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, pBufferInfo, arrayElement);
+  }
+  VkWriteDescriptorSet makeWrite(uint32_t dstSetIdx, uint32_t dstBinding, const VkBufferView* pTexelBufferView, uint32_t arrayElement = 0) const
+  {
+    return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, pTexelBufferView, arrayElement);
+  }
+#if VK_NV_ray_tracing
+  VkWriteDescriptorSet makeWrite(uint32_t                                           dstSetIdx,
+                                 uint32_t                                           dstBinding,
+                                 const VkWriteDescriptorSetAccelerationStructureNV* pAccel,
+                                 uint32_t                                           arrayElement = 0) const
+  {
+    return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, pAccel, arrayElement);
+  }
+#endif
+#if VK_KHR_acceleration_structure
+  VkWriteDescriptorSet makeWrite(uint32_t                                            dstSetIdx,
+                                 uint32_t                                            dstBinding,
+                                 const VkWriteDescriptorSetAccelerationStructureKHR* pAccel,
+                                 uint32_t                                            arrayElement = 0) const
+  {
+    return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, pAccel, arrayElement);
+  }
+#endif
+#if VK_EXT_inline_uniform_block
+  VkWriteDescriptorSet makeWrite(uint32_t                                         dstSetIdx,
+                                 uint32_t                                         dstBinding,
+                                 const VkWriteDescriptorSetInlineUniformBlockEXT* pInline,
+                                 uint32_t                                         arrayElement = 0) const
+  {
+    return m_bindings.makeWrite(getSet(dstSetIdx), dstBinding, pInline, arrayElement);
+  }
+#endif
+  // provide full array
+  VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const VkDescriptorImageInfo* pImageInfo) const
+  {
+    return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, pImageInfo);
+  }
+  VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const VkDescriptorBufferInfo* pBufferInfo) const
+  {
+    return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, pBufferInfo);
+  }
+  VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const VkBufferView* pTexelBufferView) const
+  {
+    return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, pTexelBufferView);
+  }
+#if VK_NV_ray_tracing
+  VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const VkWriteDescriptorSetAccelerationStructureNV* pAccel) const
+  {
+    return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, pAccel);
+  }
+#endif
+#if VK_KHR_acceleration_structure
+  VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const VkWriteDescriptorSetAccelerationStructureKHR* pAccel) const
+  {
+    return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, pAccel);
+  }
+#endif
+#if VK_EXT_inline_uniform_block
+  VkWriteDescriptorSet makeWriteArray(uint32_t dstSetIdx, uint32_t dstBinding, const VkWriteDescriptorSetInlineUniformBlockEXT* pInline) const
+  {
+    return m_bindings.makeWriteArray(getSet(dstSetIdx), dstBinding, pInline);
+  }
+#endif
+
+protected:
+  VkDevice                     m_device         = VK_NULL_HANDLE;
+  VkDescriptorSetLayout        m_layout         = VK_NULL_HANDLE;
+  VkDescriptorPool             m_pool           = VK_NULL_HANDLE;
+  VkPipelineLayout             m_pipelineLayout = VK_NULL_HANDLE;
+  std::vector<VkDescriptorSet> m_descriptorSets = {};
+  DescriptorSetBindings        m_bindings       = {};
+};
+
+//////////////////////////////////////////////////////////////////////////
+/** @DOC_START
+# class nvvk::TDescriptorSetContainer<SETS,PIPES=1>
+
+nvvk::TDescriptorSetContainer is a templated version of DescriptorSetContainer :
+
+- SETS  - many DescriptorSetContainers
+- PIPES - many VkPipelineLayouts
+
+The pipeline layouts are stored separately, the class does
+not use the pipeline layouts of the embedded DescriptorSetContainers.
+
+Example :
+
+```cpp
+Usage, e.g.SETS = 2, PIPES = 2
+
+container.init(device, allocator);
+
+// setup dset layouts
+container.at(0).addBinding(0, UBO...)
+container.at(0).addBinding(1, SSBO...)
+container.at(0).initLayout();
+container.at(1).addBinding(0, COMBINED_SAMPLER...)
+container.at(1).initLayout();
+
+// pipe 0 uses set 0 alone
+container.initPipeLayout(0, 1);
+// pipe 1 uses sets 0, 1
+container.initPipeLayout(1, 2);
+
+// allocate descriptorsets
+container.at(0).initPool(1);
+container.at(1).initPool(16);
+
+// update descriptorsets
+
+writeUpdates.push_back(container.at(0).makeWrite(0, 0, &..));
+writeUpdates.push_back(container.at(0).makeWrite(0, 1, &..));
+writeUpdates.push_back(container.at(1).makeWrite(0, 0, &..));
+writeUpdates.push_back(container.at(1).makeWrite(1, 0, &..));
+writeUpdates.push_back(container.at(1).makeWrite(2, 0, &..));
+...
+
+// at render time
+
+vkCmdBindDescriptorSets(cmd, GRAPHICS, container.getPipeLayout(0), 0, 1, container.at(0).getSets());
+..
+vkCmdBindDescriptorSets(cmd, GRAPHICS, container.getPipeLayout(1), 1, 1, container.at(1).getSets(7));
+```
+@DOC_END */
+template <int SETS, int PIPES = 1>
+class TDescriptorSetContainer
+{
+public:
+  TDescriptorSetContainer() {}
+  TDescriptorSetContainer(VkDevice device) { init(device); }
+  ~TDescriptorSetContainer() { deinit(); }
+
+  void init(VkDevice device);
+  void deinit();
+  void deinitLayouts();
+  void deinitPools();
+
+  // pipelayout uses range of m_sets[0.. first null or SETS[
+  VkPipelineLayout initPipeLayout(uint32_t                    pipe,
+                                  uint32_t                    numRanges = 0,
+                                  const VkPushConstantRange*  ranges    = nullptr,
+                                  VkPipelineLayoutCreateFlags flags     = 0);
+
+  // pipelayout uses range of m_sets[0..numDsets[
+  VkPipelineLayout initPipeLayout(uint32_t                    pipe,
+                                  uint32_t                    numDsets,
+                                  uint32_t                    numRanges = 0,
+                                  const VkPushConstantRange*  ranges    = nullptr,
+                                  VkPipelineLayoutCreateFlags flags     = 0);
+
+  DescriptorSetContainer&       at(uint32_t set) { return m_sets[set]; }
+  const DescriptorSetContainer& at(uint32_t set) const { return m_sets[set]; }
+  DescriptorSetContainer&       operator[](uint32_t set) { return m_sets[set]; }
+  const DescriptorSetContainer& operator[](uint32_t set) const { return m_sets[set]; }
+  VkPipelineLayout              getPipeLayout(uint32_t pipe = 0) const
+  {
+    assert(pipe <= PIPES);
+    return m_pipelayouts[pipe];
+  }
+
+protected:
+  VkPipelineLayout       m_pipelayouts[PIPES] = {};
+  DescriptorSetContainer m_sets[SETS];
+};
+
+//////////////////////////////////////////////////////////////////////////
+
+template <int SETS, int PIPES>
+VkPipelineLayout TDescriptorSetContainer<SETS, PIPES>::initPipeLayout(uint32_t                    pipe,
+                                                                      uint32_t                    numDsets,
+                                                                      uint32_t                    numRanges /*= 0*/,
+                                                                      const VkPushConstantRange*  ranges /*= nullptr*/,
+                                                                      VkPipelineLayoutCreateFlags flags /*= 0*/)
+{
+  assert(pipe <= uint32_t(PIPES));
+  assert(numDsets <= uint32_t(SETS));
+  assert(m_pipelayouts[pipe] == VK_NULL_HANDLE);
+
+  VkDevice device = m_sets[0].getDevice();
+
+  VkDescriptorSetLayout setLayouts[SETS];
+  for(uint32_t d = 0; d < numDsets; d++)
+  {
+    setLayouts[d] = m_sets[d].getLayout();
+    assert(setLayouts[d]);
+  }
+
+  VkResult                   result;
+  VkPipelineLayoutCreateInfo layoutCreateInfo = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
+  layoutCreateInfo.setLayoutCount             = numDsets;
+  layoutCreateInfo.pSetLayouts                = setLayouts;
+  layoutCreateInfo.pushConstantRangeCount     = numRanges;
+  layoutCreateInfo.pPushConstantRanges        = ranges;
+  layoutCreateInfo.flags                      = flags;
+
+  result = vkCreatePipelineLayout(device, &layoutCreateInfo, nullptr, &m_pipelayouts[pipe]);
+  assert(result == VK_SUCCESS);
+  return m_pipelayouts[pipe];
+}
+
+template <int SETS, int PIPES>
+VkPipelineLayout TDescriptorSetContainer<SETS, PIPES>::initPipeLayout(uint32_t                    pipe,
+                                                                      uint32_t                    numRanges /*= 0*/,
+                                                                      const VkPushConstantRange*  ranges /*= nullptr*/,
+                                                                      VkPipelineLayoutCreateFlags flags /*= 0*/)
+{
+  assert(pipe <= uint32_t(PIPES));
+  assert(m_pipelayouts[pipe] == VK_NULL_HANDLE);
+
+  VkDevice device = m_sets[0].getDevice();
+
+  VkDescriptorSetLayout setLayouts[SETS];
+  int                   used;
+  for(used = 0; used < SETS; used++)
+  {
+    setLayouts[used] = m_sets[used].getLayout();
+    if(!setLayouts[used])
+      break;
+  }
+
+  VkResult                   result;
+  VkPipelineLayoutCreateInfo layoutCreateInfo = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
+  layoutCreateInfo.setLayoutCount             = uint32_t(used);
+  layoutCreateInfo.pSetLayouts                = setLayouts;
+  layoutCreateInfo.pushConstantRangeCount     = numRanges;
+  layoutCreateInfo.pPushConstantRanges        = ranges;
+  layoutCreateInfo.flags                      = flags;
+
+  result = vkCreatePipelineLayout(device, &layoutCreateInfo, nullptr, &m_pipelayouts[pipe]);
+  assert(result == VK_SUCCESS);
+  return m_pipelayouts[pipe];
+}
+
+template <int SETS, int PIPES>
+void TDescriptorSetContainer<SETS, PIPES>::deinitPools()
+{
+  for(int d = 0; d < SETS; d++)
+  {
+    m_sets[d].deinitPool();
+  }
+}
+
+template <int SETS, int PIPES>
+void TDescriptorSetContainer<SETS, PIPES>::deinitLayouts()
+{
+  VkDevice device = m_sets[0].getDevice();
+
+  for(int p = 0; p < PIPES; p++)
+  {
+    if(m_pipelayouts[p])
+    {
+      vkDestroyPipelineLayout(device, m_pipelayouts[p], nullptr);
+      m_pipelayouts[p] = VK_NULL_HANDLE;
+    }
+  }
+  for(int d = 0; d < SETS; d++)
+  {
+    m_sets[d].deinitLayout();
+  }
+}
+
+template <int SETS, int PIPES>
+void TDescriptorSetContainer<SETS, PIPES>::deinit()
+{
+  deinitPools();
+  deinitLayouts();
+}
+
+template <int SETS, int PIPES>
+void TDescriptorSetContainer<SETS, PIPES>::init(VkDevice device)
+{
+  for(int d = 0; d < SETS; d++)
+  {
+    m_sets[d].init(device);
+  }
+}
+
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/dynamicrendering_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/dynamicrendering_vk.cpp
@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "dynamicrendering_vk.hpp"
+
+
+namespace nvvk {
+#ifdef VK_KHR_dynamic_rendering
+// Helper for VK_KHR_dynamic_rendering
+createRenderingInfo::createRenderingInfo(VkRect2D                        renderArea,
+                                         const std::vector<VkImageView>& colorViews,
+                                         const VkImageView&              depthView,
+                                         VkAttachmentLoadOp              colorLoadOp /*= VK_ATTACHMENT_LOAD_OP_CLEAR*/,
+                                         VkAttachmentLoadOp              depthLoadOp /*= VK_ATTACHMENT_LOAD_OP_CLEAR*/,
+                                         VkClearColorValue               clearColorValue /*= {0.f, 0.f, 0.f, 0.f}*/,
+                                         VkClearDepthStencilValue        clearDepthValue /*= {1.f, 0U}*/,
+                                         VkRenderingFlagsKHR             flags /*= 0*/)
+    : VkRenderingInfoKHR{VK_STRUCTURE_TYPE_RENDERING_INFO_KHR}
+{
+  for(auto& cv : colorViews)
+  {
+    VkRenderingAttachmentInfoKHR colorAttachment{VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR};
+    colorAttachment.clearValue.color = clearColorValue;
+    colorAttachment.imageView        = cv;
+    colorAttachment.imageLayout      = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL_KHR;
+    colorAttachment.loadOp           = colorLoadOp;
+    colorAttachment.storeOp          = VK_ATTACHMENT_STORE_OP_STORE;
+    colorAttachments.emplace_back(colorAttachment);
+  }
+
+  depthStencilAttachment.imageView               = depthView;
+  depthStencilAttachment.imageLayout             = VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL_KHR;
+  depthStencilAttachment.loadOp                  = depthLoadOp;
+  depthStencilAttachment.storeOp                 = VK_ATTACHMENT_STORE_OP_STORE;
+  depthStencilAttachment.clearValue.depthStencil = clearDepthValue;
+
+  this->renderArea           = renderArea;
+  this->layerCount           = 1;
+  this->colorAttachmentCount = static_cast<uint32_t>(colorAttachments.size());
+  this->pColorAttachments    = colorAttachments.data();
+  this->pDepthAttachment     = &depthStencilAttachment;
+  this->pStencilAttachment   = &depthStencilAttachment;
+  this->flags                = flags;
+}
+#endif
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/dynamicrendering_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/dynamicrendering_vk.hpp
@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#pragma once
+
+#include <vulkan/vulkan_core.h>
+#include <vector>
+namespace nvvk {
+
+/** @DOC_START
+# struct nvvk::RenderPass
+
+>  Wrapper for VkRenderingInfoKHR
+
+This class is a wrapper around the VkRenderingInfoKHR structure, which is used to create a render pass with dynamic attachments.
+ @DOC_END */
+#ifdef VK_KHR_dynamic_rendering
+struct createRenderingInfo : public VkRenderingInfoKHR
+{
+  createRenderingInfo(VkRect2D                        renderArea,
+                      const std::vector<VkImageView>& colorViews,
+                      const VkImageView&              depthView,
+                      VkAttachmentLoadOp              colorLoadOp     = VK_ATTACHMENT_LOAD_OP_CLEAR,
+                      VkAttachmentLoadOp              depthLoadOp     = VK_ATTACHMENT_LOAD_OP_CLEAR,
+                      VkClearColorValue               clearColorValue = {{0.f, 0.f, 0.f, 0.f}},
+                      VkClearDepthStencilValue        clearDepthValue = {1.f, 0U},
+                      VkRenderingFlagsKHR             flags           = 0);
+
+  VkRenderingAttachmentInfoKHR              depthStencilAttachment{VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR};
+  std::vector<VkRenderingAttachmentInfoKHR> colorAttachments;
+};
+#endif
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/error_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/error_vk.cpp
@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include "error_vk.hpp"
+
+#include <nvh/nvprint.hpp>
+
+namespace nvvk {
+
+CheckResultCallback g_checkResultCallback;
+
+void setCheckResultHook(const CheckResultCallback& callback)
+{
+  g_checkResultCallback = callback;
+}
+
+const char* getResultString(VkResult result)
+{
+  const char* resultString = "unknown";
+
+#define STR(a)                                                                                                         \
+  case a:                                                                                                              \
+    resultString = #a;                                                                                                 \
+    break;
+
+  switch(result)
+  {
+    STR(VK_SUCCESS);
+    STR(VK_NOT_READY);
+    STR(VK_TIMEOUT);
+    STR(VK_EVENT_SET);
+    STR(VK_EVENT_RESET);
+    STR(VK_INCOMPLETE);
+    STR(VK_ERROR_OUT_OF_HOST_MEMORY);
+    STR(VK_ERROR_OUT_OF_DEVICE_MEMORY);
+    STR(VK_ERROR_INITIALIZATION_FAILED);
+    STR(VK_ERROR_DEVICE_LOST);
+    STR(VK_ERROR_MEMORY_MAP_FAILED);
+    STR(VK_ERROR_LAYER_NOT_PRESENT);
+    STR(VK_ERROR_EXTENSION_NOT_PRESENT);
+    STR(VK_ERROR_FEATURE_NOT_PRESENT);
+    STR(VK_ERROR_INCOMPATIBLE_DRIVER);
+    STR(VK_ERROR_TOO_MANY_OBJECTS);
+    STR(VK_ERROR_FORMAT_NOT_SUPPORTED);
+    STR(VK_ERROR_FRAGMENTED_POOL);
+    STR(VK_ERROR_OUT_OF_POOL_MEMORY);
+    STR(VK_ERROR_INVALID_EXTERNAL_HANDLE);
+    STR(VK_ERROR_SURFACE_LOST_KHR);
+    STR(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
+    STR(VK_SUBOPTIMAL_KHR);
+    STR(VK_ERROR_OUT_OF_DATE_KHR);
+    STR(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
+    STR(VK_ERROR_VALIDATION_FAILED_EXT);
+    STR(VK_ERROR_INVALID_SHADER_NV);
+    STR(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
+    STR(VK_ERROR_FRAGMENTATION_EXT);
+    STR(VK_ERROR_NOT_PERMITTED_EXT);
+    STR(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
+    STR(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
+  }
+#undef STR
+  return resultString;
+}
+
+bool checkResult(VkResult result, const char* message)
+{
+  if(g_checkResultCallback)
+    return g_checkResultCallback(result, nullptr, -1, message);
+
+  if(result == VK_SUCCESS)
+  {
+    return false;
+  }
+
+  if(result < 0)
+  {
+    if(message)
+    {
+      LOGE("VkResult %d - %s - %s\n", result, getResultString(result), message);
+    }
+    else
+    {
+      LOGE("VkResult %d - %s\n", result, getResultString(result));
+    }
+    assert(!"Critical Vulkan Error");
+    return true;
+  }
+
+  return false;
+}
+
+//--------------------------------------------------------------------------------------------------
+// Check the result of Vulkan and in case of error, provide a string about what happened
+//
+bool checkResult(VkResult result, const char* file, int32_t line)
+{
+  if(g_checkResultCallback)
+    return g_checkResultCallback(result, file, line, nullptr);
+
+  if(result == VK_SUCCESS)
+  {
+    return false;
+  }
+
+  if(result < 0)
+  {
+    LOGE("%s(%d): Vulkan Error : %s\n", file, line, getResultString(result));
+    assert(!"Critical Vulkan Error");
+
+    return true;
+  }
+
+  return false;
+}
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/error_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/error_vk.hpp
@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+//////////////////////////////////////////////////////////////////////////
+/** @DOC_START
+# Function nvvk::checkResult
+>  Returns true on critical error result, logs errors.
+
+Use `NVVK_CHECK(result)` to automatically log filename/linenumber.
+@DOC_END */
+
+#pragma once
+
+#include <cassert>
+#include <functional>
+#include <vulkan/vulkan_core.h>
+
+namespace nvvk {
+bool checkResult(VkResult result, const char* message = nullptr);
+bool checkResult(VkResult result, const char* file, int32_t line);
+
+/** @DOC_START
+# Function nvvk::setCheckResultHook
+>  Allow replacing nvvk::checkResult() calls. E.g. to catch
+`VK_ERROR_DEVICE_LOST` and wait for aftermath to write the crash dump.
+@DOC_END */
+using CheckResultCallback = std::function<bool(VkResult, const char*, int32_t, const char*)>;
+void setCheckResultHook(const CheckResultCallback& callback);
+
+#ifndef NVVK_CHECK
+#define NVVK_CHECK(result) nvvk::checkResult(result, __FILE__, __LINE__)
+#endif
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/extensions_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/extensions_vk.cpp
--- a/raytracer/nvpro_core/nvvk/extensions_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/extensions_vk.hpp
@ -0,0 +1,589 @@
+/*
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <vulkan/vulkan.h>
+
+/** @DOC_START
+
+# Function load_VK_EXTENSIONS
+>  load_VK_EXTENSIONS : Vulkan Extension Loader
+
+The extensions_vk files takes care of loading and providing the symbols of Vulkan C Api extensions.
+It is generated by `extensions_vk.py` and generates all extensions found in vk.xml. See script for details.
+.
+
+The framework triggers this implicitly in the `nvvk::Context` class, immediately after creating the device.
+
+```cpp
+// loads all known extensions
+load_VK_EXTENSIONS(instance, vkGetInstanceProcAddr, device, vkGetDeviceProcAddr);
+```
+
+@DOC_END */
+
+/* Load all avaiable extensions */
+void load_VK_EXTENSIONS(VkInstance instance, PFN_vkGetInstanceProcAddr getInstanceProcAddr, VkDevice device, PFN_vkGetDeviceProcAddr getDeviceProcAddr);
+
+/* NVVK_GENERATE_DEFINE */
+#if defined(VK_AMDX_shader_enqueue)
+#define NVVK_HAS_VK_AMDX_shader_enqueue
+#endif /* VK_AMDX_shader_enqueue */
+#if defined(VK_AMD_buffer_marker)
+#define NVVK_HAS_VK_AMD_buffer_marker
+#endif /* VK_AMD_buffer_marker */
+#if defined(VK_AMD_display_native_hdr)
+#define NVVK_HAS_VK_AMD_display_native_hdr
+#endif /* VK_AMD_display_native_hdr */
+#if defined(VK_AMD_draw_indirect_count)
+#define NVVK_HAS_VK_AMD_draw_indirect_count
+#endif /* VK_AMD_draw_indirect_count */
+#if defined(VK_AMD_shader_info)
+#define NVVK_HAS_VK_AMD_shader_info
+#endif /* VK_AMD_shader_info */
+#if defined(VK_ANDROID_external_memory_android_hardware_buffer)
+#define NVVK_HAS_VK_ANDROID_external_memory_android_hardware_buffer
+#endif /* VK_ANDROID_external_memory_android_hardware_buffer */
+#if defined(VK_EXT_acquire_drm_display)
+#define NVVK_HAS_VK_EXT_acquire_drm_display
+#endif /* VK_EXT_acquire_drm_display */
+#if defined(VK_EXT_acquire_xlib_display)
+#define NVVK_HAS_VK_EXT_acquire_xlib_display
+#endif /* VK_EXT_acquire_xlib_display */
+#if defined(VK_EXT_attachment_feedback_loop_dynamic_state)
+#define NVVK_HAS_VK_EXT_attachment_feedback_loop_dynamic_state
+#endif /* VK_EXT_attachment_feedback_loop_dynamic_state */
+#if defined(VK_EXT_buffer_device_address)
+#define NVVK_HAS_VK_EXT_buffer_device_address
+#endif /* VK_EXT_buffer_device_address */
+#if defined(VK_EXT_calibrated_timestamps)
+#define NVVK_HAS_VK_EXT_calibrated_timestamps
+#endif /* VK_EXT_calibrated_timestamps */
+#if defined(VK_EXT_color_write_enable)
+#define NVVK_HAS_VK_EXT_color_write_enable
+#endif /* VK_EXT_color_write_enable */
+#if defined(VK_EXT_conditional_rendering)
+#define NVVK_HAS_VK_EXT_conditional_rendering
+#endif /* VK_EXT_conditional_rendering */
+#if defined(VK_EXT_debug_marker)
+#define NVVK_HAS_VK_EXT_debug_marker
+#endif /* VK_EXT_debug_marker */
+#if defined(VK_EXT_debug_report)
+#define NVVK_HAS_VK_EXT_debug_report
+#endif /* VK_EXT_debug_report */
+#if defined(VK_EXT_debug_utils)
+#define NVVK_HAS_VK_EXT_debug_utils
+#endif /* VK_EXT_debug_utils */
+#if defined(VK_EXT_depth_bias_control)
+#define NVVK_HAS_VK_EXT_depth_bias_control
+#endif /* VK_EXT_depth_bias_control */
+#if defined(VK_EXT_descriptor_buffer)
+#define NVVK_HAS_VK_EXT_descriptor_buffer
+#endif /* VK_EXT_descriptor_buffer */
+#if defined(VK_EXT_descriptor_buffer) && (defined(VK_KHR_acceleration_structure) || defined(VK_NV_ray_tracing))
+#define NVVK_HAS_VK_EXT_descriptor_buffer
+#endif /* VK_EXT_descriptor_buffer && (VK_KHR_acceleration_structure || VK_NV_ray_tracing) */
+#if defined(VK_EXT_device_fault)
+#define NVVK_HAS_VK_EXT_device_fault
+#endif /* VK_EXT_device_fault */
+#if defined(VK_EXT_direct_mode_display)
+#define NVVK_HAS_VK_EXT_direct_mode_display
+#endif /* VK_EXT_direct_mode_display */
+#if defined(VK_EXT_directfb_surface)
+#define NVVK_HAS_VK_EXT_directfb_surface
+#endif /* VK_EXT_directfb_surface */
+#if defined(VK_EXT_discard_rectangles)
+#define NVVK_HAS_VK_EXT_discard_rectangles
+#endif /* VK_EXT_discard_rectangles */
+#if defined(VK_EXT_discard_rectangles) && VK_EXT_DISCARD_RECTANGLES_SPEC_VERSION >= 2
+#define NVVK_HAS_VK_EXT_discard_rectangles
+#endif /* VK_EXT_discard_rectangles && VK_EXT_DISCARD_RECTANGLES_SPEC_VERSION >= 2 */
+#if defined(VK_EXT_display_control)
+#define NVVK_HAS_VK_EXT_display_control
+#endif /* VK_EXT_display_control */
+#if defined(VK_EXT_display_surface_counter)
+#define NVVK_HAS_VK_EXT_display_surface_counter
+#endif /* VK_EXT_display_surface_counter */
+#if defined(VK_EXT_extended_dynamic_state)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state
+#endif /* VK_EXT_extended_dynamic_state */
+#if defined(VK_EXT_extended_dynamic_state2)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state2
+#endif /* VK_EXT_extended_dynamic_state2 */
+#if defined(VK_EXT_extended_dynamic_state3)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state3
+#endif /* VK_EXT_extended_dynamic_state3 */
+#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_clip_space_w_scaling)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state3
+#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_clip_space_w_scaling */
+#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_viewport_swizzle)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state3
+#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_viewport_swizzle */
+#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_fragment_coverage_to_color)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state3
+#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_fragment_coverage_to_color */
+#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_framebuffer_mixed_samples)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state3
+#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_framebuffer_mixed_samples */
+#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_shading_rate_image)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state3
+#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_shading_rate_image */
+#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_representative_fragment_test)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state3
+#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_representative_fragment_test */
+#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_coverage_reduction_mode)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state3
+#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_coverage_reduction_mode */
+#if defined(VK_EXT_external_memory_host)
+#define NVVK_HAS_VK_EXT_external_memory_host
+#endif /* VK_EXT_external_memory_host */
+#if defined(VK_EXT_full_screen_exclusive)
+#define NVVK_HAS_VK_EXT_full_screen_exclusive
+#endif /* VK_EXT_full_screen_exclusive */
+#if defined(VK_EXT_full_screen_exclusive) && defined(VK_KHR_device_group)
+#define NVVK_HAS_VK_EXT_full_screen_exclusive
+#endif /* VK_EXT_full_screen_exclusive && VK_KHR_device_group */
+#if defined(VK_EXT_full_screen_exclusive) && defined(VK_VERSION_1_1)
+#define NVVK_HAS_VK_EXT_full_screen_exclusive
+#endif /* VK_EXT_full_screen_exclusive && VK_VERSION_1_1 */
+#if defined(VK_EXT_hdr_metadata)
+#define NVVK_HAS_VK_EXT_hdr_metadata
+#endif /* VK_EXT_hdr_metadata */
+#if defined(VK_EXT_headless_surface)
+#define NVVK_HAS_VK_EXT_headless_surface
+#endif /* VK_EXT_headless_surface */
+#if defined(VK_EXT_host_image_copy)
+#define NVVK_HAS_VK_EXT_host_image_copy
+#endif /* VK_EXT_host_image_copy */
+#if defined(VK_EXT_host_query_reset)
+#define NVVK_HAS_VK_EXT_host_query_reset
+#endif /* VK_EXT_host_query_reset */
+#if defined(VK_EXT_image_compression_control)
+#define NVVK_HAS_VK_EXT_image_compression_control
+#endif /* VK_EXT_image_compression_control */
+#if defined(VK_EXT_image_drm_format_modifier)
+#define NVVK_HAS_VK_EXT_image_drm_format_modifier
+#endif /* VK_EXT_image_drm_format_modifier */
+#if defined(VK_EXT_line_rasterization)
+#define NVVK_HAS_VK_EXT_line_rasterization
+#endif /* VK_EXT_line_rasterization */
+#if defined(VK_EXT_mesh_shader)
+#define NVVK_HAS_VK_EXT_mesh_shader
+#endif /* VK_EXT_mesh_shader */
+#if defined(VK_EXT_metal_objects)
+#define NVVK_HAS_VK_EXT_metal_objects
+#endif /* VK_EXT_metal_objects */
+#if defined(VK_EXT_metal_surface)
+#define NVVK_HAS_VK_EXT_metal_surface
+#endif /* VK_EXT_metal_surface */
+#if defined(VK_EXT_multi_draw)
+#define NVVK_HAS_VK_EXT_multi_draw
+#endif /* VK_EXT_multi_draw */
+#if defined(VK_EXT_opacity_micromap)
+#define NVVK_HAS_VK_EXT_opacity_micromap
+#endif /* VK_EXT_opacity_micromap */
+#if defined(VK_EXT_pageable_device_local_memory)
+#define NVVK_HAS_VK_EXT_pageable_device_local_memory
+#endif /* VK_EXT_pageable_device_local_memory */
+#if defined(VK_EXT_pipeline_properties)
+#define NVVK_HAS_VK_EXT_pipeline_properties
+#endif /* VK_EXT_pipeline_properties */
+#if defined(VK_EXT_private_data)
+#define NVVK_HAS_VK_EXT_private_data
+#endif /* VK_EXT_private_data */
+#if defined(VK_EXT_sample_locations)
+#define NVVK_HAS_VK_EXT_sample_locations
+#endif /* VK_EXT_sample_locations */
+#if defined(VK_EXT_shader_module_identifier)
+#define NVVK_HAS_VK_EXT_shader_module_identifier
+#endif /* VK_EXT_shader_module_identifier */
+#if defined(VK_EXT_shader_object)
+#define NVVK_HAS_VK_EXT_shader_object
+#endif /* VK_EXT_shader_object */
+#if defined(VK_EXT_shader_object) && defined(VK_NV_clip_space_w_scaling)
+#define NVVK_HAS_VK_EXT_shader_object
+#endif /* VK_EXT_shader_object && VK_NV_clip_space_w_scaling */
+#if defined(VK_EXT_shader_object) && defined(VK_NV_viewport_swizzle)
+#define NVVK_HAS_VK_EXT_shader_object
+#endif /* VK_EXT_shader_object && VK_NV_viewport_swizzle */
+#if defined(VK_EXT_shader_object) && defined(VK_NV_fragment_coverage_to_color)
+#define NVVK_HAS_VK_EXT_shader_object
+#endif /* VK_EXT_shader_object && VK_NV_fragment_coverage_to_color */
+#if defined(VK_EXT_shader_object) && defined(VK_NV_framebuffer_mixed_samples)
+#define NVVK_HAS_VK_EXT_shader_object
+#endif /* VK_EXT_shader_object && VK_NV_framebuffer_mixed_samples */
+#if defined(VK_EXT_shader_object) && defined(VK_NV_shading_rate_image)
+#define NVVK_HAS_VK_EXT_shader_object
+#endif /* VK_EXT_shader_object && VK_NV_shading_rate_image */
+#if defined(VK_EXT_shader_object) && defined(VK_NV_representative_fragment_test)
+#define NVVK_HAS_VK_EXT_shader_object
+#endif /* VK_EXT_shader_object && VK_NV_representative_fragment_test */
+#if defined(VK_EXT_shader_object) && defined(VK_NV_coverage_reduction_mode)
+#define NVVK_HAS_VK_EXT_shader_object
+#endif /* VK_EXT_shader_object && VK_NV_coverage_reduction_mode */
+#if defined(VK_EXT_swapchain_maintenance1)
+#define NVVK_HAS_VK_EXT_swapchain_maintenance1
+#endif /* VK_EXT_swapchain_maintenance1 */
+#if defined(VK_EXT_tooling_info)
+#define NVVK_HAS_VK_EXT_tooling_info
+#endif /* VK_EXT_tooling_info */
+#if defined(VK_EXT_transform_feedback)
+#define NVVK_HAS_VK_EXT_transform_feedback
+#endif /* VK_EXT_transform_feedback */
+#if defined(VK_EXT_validation_cache)
+#define NVVK_HAS_VK_EXT_validation_cache
+#endif /* VK_EXT_validation_cache */
+#if defined(VK_EXT_vertex_input_dynamic_state)
+#define NVVK_HAS_VK_EXT_vertex_input_dynamic_state
+#endif /* VK_EXT_vertex_input_dynamic_state */
+#if defined(VK_FUCHSIA_buffer_collection)
+#define NVVK_HAS_VK_FUCHSIA_buffer_collection
+#endif /* VK_FUCHSIA_buffer_collection */
+#if defined(VK_FUCHSIA_external_memory)
+#define NVVK_HAS_VK_FUCHSIA_external_memory
+#endif /* VK_FUCHSIA_external_memory */
+#if defined(VK_FUCHSIA_external_semaphore)
+#define NVVK_HAS_VK_FUCHSIA_external_semaphore
+#endif /* VK_FUCHSIA_external_semaphore */
+#if defined(VK_FUCHSIA_imagepipe_surface)
+#define NVVK_HAS_VK_FUCHSIA_imagepipe_surface
+#endif /* VK_FUCHSIA_imagepipe_surface */
+#if defined(VK_GGP_stream_descriptor_surface)
+#define NVVK_HAS_VK_GGP_stream_descriptor_surface
+#endif /* VK_GGP_stream_descriptor_surface */
+#if defined(VK_GOOGLE_display_timing)
+#define NVVK_HAS_VK_GOOGLE_display_timing
+#endif /* VK_GOOGLE_display_timing */
+#if defined(VK_HUAWEI_cluster_culling_shader)
+#define NVVK_HAS_VK_HUAWEI_cluster_culling_shader
+#endif /* VK_HUAWEI_cluster_culling_shader */
+#if defined(VK_HUAWEI_invocation_mask)
+#define NVVK_HAS_VK_HUAWEI_invocation_mask
+#endif /* VK_HUAWEI_invocation_mask */
+#if defined(VK_HUAWEI_subpass_shading)
+#define NVVK_HAS_VK_HUAWEI_subpass_shading
+#endif /* VK_HUAWEI_subpass_shading */
+#if defined(VK_INTEL_performance_query)
+#define NVVK_HAS_VK_INTEL_performance_query
+#endif /* VK_INTEL_performance_query */
+#if defined(VK_KHR_acceleration_structure)
+#define NVVK_HAS_VK_KHR_acceleration_structure
+#endif /* VK_KHR_acceleration_structure */
+#if defined(VK_KHR_android_surface)
+#define NVVK_HAS_VK_KHR_android_surface
+#endif /* VK_KHR_android_surface */
+#if defined(VK_KHR_bind_memory2)
+#define NVVK_HAS_VK_KHR_bind_memory2
+#endif /* VK_KHR_bind_memory2 */
+#if defined(VK_KHR_buffer_device_address)
+#define NVVK_HAS_VK_KHR_buffer_device_address
+#endif /* VK_KHR_buffer_device_address */
+#if defined(VK_KHR_calibrated_timestamps)
+#define NVVK_HAS_VK_KHR_calibrated_timestamps
+#endif /* VK_KHR_calibrated_timestamps */
+#if defined(VK_KHR_cooperative_matrix)
+#define NVVK_HAS_VK_KHR_cooperative_matrix
+#endif /* VK_KHR_cooperative_matrix */
+#if defined(VK_KHR_copy_commands2)
+#define NVVK_HAS_VK_KHR_copy_commands2
+#endif /* VK_KHR_copy_commands2 */
+#if defined(VK_KHR_create_renderpass2)
+#define NVVK_HAS_VK_KHR_create_renderpass2
+#endif /* VK_KHR_create_renderpass2 */
+#if defined(VK_KHR_deferred_host_operations)
+#define NVVK_HAS_VK_KHR_deferred_host_operations
+#endif /* VK_KHR_deferred_host_operations */
+#if defined(VK_KHR_descriptor_update_template)
+#define NVVK_HAS_VK_KHR_descriptor_update_template
+#endif /* VK_KHR_descriptor_update_template */
+#if defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor)
+#define NVVK_HAS_VK_KHR_descriptor_update_template
+#endif /* VK_KHR_descriptor_update_template && VK_KHR_push_descriptor */
+#if defined(VK_KHR_device_group)
+#define NVVK_HAS_VK_KHR_device_group
+#endif /* VK_KHR_device_group */
+#if defined(VK_KHR_device_group) && defined(VK_KHR_surface)
+#define NVVK_HAS_VK_KHR_device_group
+#endif /* VK_KHR_device_group && VK_KHR_surface */
+#if defined(VK_KHR_device_group) && defined(VK_KHR_swapchain)
+#define NVVK_HAS_VK_KHR_device_group
+#endif /* VK_KHR_device_group && VK_KHR_swapchain */
+#if defined(VK_KHR_device_group_creation)
+#define NVVK_HAS_VK_KHR_device_group_creation
+#endif /* VK_KHR_device_group_creation */
+#if defined(VK_KHR_draw_indirect_count)
+#define NVVK_HAS_VK_KHR_draw_indirect_count
+#endif /* VK_KHR_draw_indirect_count */
+#if defined(VK_KHR_dynamic_rendering)
+#define NVVK_HAS_VK_KHR_dynamic_rendering
+#endif /* VK_KHR_dynamic_rendering */
+#if defined(VK_KHR_external_fence_capabilities)
+#define NVVK_HAS_VK_KHR_external_fence_capabilities
+#endif /* VK_KHR_external_fence_capabilities */
+#if defined(VK_KHR_external_fence_fd)
+#define NVVK_HAS_VK_KHR_external_fence_fd
+#endif /* VK_KHR_external_fence_fd */
+#if defined(VK_KHR_external_fence_win32)
+#define NVVK_HAS_VK_KHR_external_fence_win32
+#endif /* VK_KHR_external_fence_win32 */
+#if defined(VK_KHR_external_memory_capabilities)
+#define NVVK_HAS_VK_KHR_external_memory_capabilities
+#endif /* VK_KHR_external_memory_capabilities */
+#if defined(VK_KHR_external_memory_fd)
+#define NVVK_HAS_VK_KHR_external_memory_fd
+#endif /* VK_KHR_external_memory_fd */
+#if defined(VK_KHR_external_memory_win32)
+#define NVVK_HAS_VK_KHR_external_memory_win32
+#endif /* VK_KHR_external_memory_win32 */
+#if defined(VK_KHR_external_semaphore_capabilities)
+#define NVVK_HAS_VK_KHR_external_semaphore_capabilities
+#endif /* VK_KHR_external_semaphore_capabilities */
+#if defined(VK_KHR_external_semaphore_fd)
+#define NVVK_HAS_VK_KHR_external_semaphore_fd
+#endif /* VK_KHR_external_semaphore_fd */
+#if defined(VK_KHR_external_semaphore_win32)
+#define NVVK_HAS_VK_KHR_external_semaphore_win32
+#endif /* VK_KHR_external_semaphore_win32 */
+#if defined(VK_KHR_fragment_shading_rate)
+#define NVVK_HAS_VK_KHR_fragment_shading_rate
+#endif /* VK_KHR_fragment_shading_rate */
+#if defined(VK_KHR_get_memory_requirements2)
+#define NVVK_HAS_VK_KHR_get_memory_requirements2
+#endif /* VK_KHR_get_memory_requirements2 */
+#if defined(VK_KHR_get_physical_device_properties2)
+#define NVVK_HAS_VK_KHR_get_physical_device_properties2
+#endif /* VK_KHR_get_physical_device_properties2 */
+#if defined(VK_KHR_maintenance1)
+#define NVVK_HAS_VK_KHR_maintenance1
+#endif /* VK_KHR_maintenance1 */
+#if defined(VK_KHR_maintenance3)
+#define NVVK_HAS_VK_KHR_maintenance3
+#endif /* VK_KHR_maintenance3 */
+#if defined(VK_KHR_maintenance4)
+#define NVVK_HAS_VK_KHR_maintenance4
+#endif /* VK_KHR_maintenance4 */
+#if defined(VK_KHR_maintenance5)
+#define NVVK_HAS_VK_KHR_maintenance5
+#endif /* VK_KHR_maintenance5 */
+#if defined(VK_KHR_maintenance6)
+#define NVVK_HAS_VK_KHR_maintenance6
+#endif /* VK_KHR_maintenance6 */
+#if defined(VK_KHR_maintenance6) && defined(VK_KHR_push_descriptor)
+#define NVVK_HAS_VK_KHR_maintenance6
+#endif /* VK_KHR_maintenance6 && VK_KHR_push_descriptor */
+#if defined(VK_KHR_maintenance6) && defined(VK_EXT_descriptor_buffer)
+#define NVVK_HAS_VK_KHR_maintenance6
+#endif /* VK_KHR_maintenance6 && VK_EXT_descriptor_buffer */
+#if defined(VK_KHR_map_memory2)
+#define NVVK_HAS_VK_KHR_map_memory2
+#endif /* VK_KHR_map_memory2 */
+#if defined(VK_KHR_performance_query)
+#define NVVK_HAS_VK_KHR_performance_query
+#endif /* VK_KHR_performance_query */
+#if defined(VK_KHR_pipeline_executable_properties)
+#define NVVK_HAS_VK_KHR_pipeline_executable_properties
+#endif /* VK_KHR_pipeline_executable_properties */
+#if defined(VK_KHR_present_wait)
+#define NVVK_HAS_VK_KHR_present_wait
+#endif /* VK_KHR_present_wait */
+#if defined(VK_KHR_push_descriptor)
+#define NVVK_HAS_VK_KHR_push_descriptor
+#endif /* VK_KHR_push_descriptor */
+#if defined(VK_KHR_push_descriptor) && defined(VK_VERSION_1_1)
+#define NVVK_HAS_VK_KHR_push_descriptor
+#endif /* VK_KHR_push_descriptor && VK_VERSION_1_1 */
+#if defined(VK_KHR_push_descriptor) && defined(VK_KHR_descriptor_update_template)
+#define NVVK_HAS_VK_KHR_push_descriptor
+#endif /* VK_KHR_push_descriptor && VK_KHR_descriptor_update_template */
+#if defined(VK_KHR_ray_tracing_maintenance1) && defined(VK_KHR_ray_tracing_pipeline)
+#define NVVK_HAS_VK_KHR_ray_tracing_maintenance1
+#endif /* VK_KHR_ray_tracing_maintenance1 && VK_KHR_ray_tracing_pipeline */
+#if defined(VK_KHR_ray_tracing_pipeline)
+#define NVVK_HAS_VK_KHR_ray_tracing_pipeline
+#endif /* VK_KHR_ray_tracing_pipeline */
+#if defined(VK_KHR_sampler_ycbcr_conversion)
+#define NVVK_HAS_VK_KHR_sampler_ycbcr_conversion
+#endif /* VK_KHR_sampler_ycbcr_conversion */
+#if defined(VK_KHR_shared_presentable_image)
+#define NVVK_HAS_VK_KHR_shared_presentable_image
+#endif /* VK_KHR_shared_presentable_image */
+#if defined(VK_KHR_swapchain) && defined(VK_VERSION_1_1)
+#define NVVK_HAS_VK_KHR_swapchain
+#endif /* VK_KHR_swapchain && VK_VERSION_1_1 */
+#if defined(VK_KHR_synchronization2)
+#define NVVK_HAS_VK_KHR_synchronization2
+#endif /* VK_KHR_synchronization2 */
+#if defined(VK_KHR_synchronization2) && defined(VK_AMD_buffer_marker)
+#define NVVK_HAS_VK_KHR_synchronization2
+#endif /* VK_KHR_synchronization2 && VK_AMD_buffer_marker */
+#if defined(VK_KHR_synchronization2) && defined(VK_NV_device_diagnostic_checkpoints)
+#define NVVK_HAS_VK_KHR_synchronization2
+#endif /* VK_KHR_synchronization2 && VK_NV_device_diagnostic_checkpoints */
+#if defined(VK_KHR_timeline_semaphore)
+#define NVVK_HAS_VK_KHR_timeline_semaphore
+#endif /* VK_KHR_timeline_semaphore */
+#if defined(VK_KHR_video_decode_queue)
+#define NVVK_HAS_VK_KHR_video_decode_queue
+#endif /* VK_KHR_video_decode_queue */
+#if defined(VK_KHR_video_encode_queue)
+#define NVVK_HAS_VK_KHR_video_encode_queue
+#endif /* VK_KHR_video_encode_queue */
+#if defined(VK_KHR_video_queue)
+#define NVVK_HAS_VK_KHR_video_queue
+#endif /* VK_KHR_video_queue */
+#if defined(VK_MVK_ios_surface)
+#define NVVK_HAS_VK_MVK_ios_surface
+#endif /* VK_MVK_ios_surface */
+#if defined(VK_MVK_macos_surface)
+#define NVVK_HAS_VK_MVK_macos_surface
+#endif /* VK_MVK_macos_surface */
+#if defined(VK_NN_vi_surface)
+#define NVVK_HAS_VK_NN_vi_surface
+#endif /* VK_NN_vi_surface */
+#if defined(VK_NVX_binary_import)
+#define NVVK_HAS_VK_NVX_binary_import
+#endif /* VK_NVX_binary_import */
+#if defined(VK_NVX_image_view_handle)
+#define NVVK_HAS_VK_NVX_image_view_handle
+#endif /* VK_NVX_image_view_handle */
+#if defined(VK_NV_acquire_winrt_display)
+#define NVVK_HAS_VK_NV_acquire_winrt_display
+#endif /* VK_NV_acquire_winrt_display */
+#if defined(VK_NV_clip_space_w_scaling)
+#define NVVK_HAS_VK_NV_clip_space_w_scaling
+#endif /* VK_NV_clip_space_w_scaling */
+#if defined(VK_NV_cooperative_matrix)
+#define NVVK_HAS_VK_NV_cooperative_matrix
+#endif /* VK_NV_cooperative_matrix */
+#if defined(VK_NV_copy_memory_indirect)
+#define NVVK_HAS_VK_NV_copy_memory_indirect
+#endif /* VK_NV_copy_memory_indirect */
+#if defined(VK_NV_coverage_reduction_mode)
+#define NVVK_HAS_VK_NV_coverage_reduction_mode
+#endif /* VK_NV_coverage_reduction_mode */
+#if defined(VK_NV_cuda_kernel_launch)
+#define NVVK_HAS_VK_NV_cuda_kernel_launch
+#endif /* VK_NV_cuda_kernel_launch */
+#if defined(VK_NV_device_diagnostic_checkpoints)
+#define NVVK_HAS_VK_NV_device_diagnostic_checkpoints
+#endif /* VK_NV_device_diagnostic_checkpoints */
+#if defined(VK_NV_device_generated_commands)
+#define NVVK_HAS_VK_NV_device_generated_commands
+#endif /* VK_NV_device_generated_commands */
+#if defined(VK_NV_device_generated_commands_compute)
+#define NVVK_HAS_VK_NV_device_generated_commands_compute
+#endif /* VK_NV_device_generated_commands_compute */
+#if defined(VK_NV_external_memory_capabilities)
+#define NVVK_HAS_VK_NV_external_memory_capabilities
+#endif /* VK_NV_external_memory_capabilities */
+#if defined(VK_NV_external_memory_rdma)
+#define NVVK_HAS_VK_NV_external_memory_rdma
+#endif /* VK_NV_external_memory_rdma */
+#if defined(VK_NV_external_memory_win32)
+#define NVVK_HAS_VK_NV_external_memory_win32
+#endif /* VK_NV_external_memory_win32 */
+#if defined(VK_NV_fragment_shading_rate_enums)
+#define NVVK_HAS_VK_NV_fragment_shading_rate_enums
+#endif /* VK_NV_fragment_shading_rate_enums */
+#if defined(VK_NV_low_latency2)
+#define NVVK_HAS_VK_NV_low_latency2
+#endif /* VK_NV_low_latency2 */
+#if defined(VK_NV_memory_decompression)
+#define NVVK_HAS_VK_NV_memory_decompression
+#endif /* VK_NV_memory_decompression */
+#if defined(VK_NV_mesh_shader)
+#define NVVK_HAS_VK_NV_mesh_shader
+#endif /* VK_NV_mesh_shader */
+#if defined(VK_NV_optical_flow)
+#define NVVK_HAS_VK_NV_optical_flow
+#endif /* VK_NV_optical_flow */
+#if defined(VK_NV_ray_tracing)
+#define NVVK_HAS_VK_NV_ray_tracing
+#endif /* VK_NV_ray_tracing */
+#if defined(VK_NV_scissor_exclusive) && VK_NV_SCISSOR_EXCLUSIVE_SPEC_VERSION >= 2
+#define NVVK_HAS_VK_NV_scissor_exclusive
+#endif /* VK_NV_scissor_exclusive && VK_NV_SCISSOR_EXCLUSIVE_SPEC_VERSION >= 2 */
+#if defined(VK_NV_scissor_exclusive)
+#define NVVK_HAS_VK_NV_scissor_exclusive
+#endif /* VK_NV_scissor_exclusive */
+#if defined(VK_NV_shading_rate_image)
+#define NVVK_HAS_VK_NV_shading_rate_image
+#endif /* VK_NV_shading_rate_image */
+#if defined(VK_QCOM_tile_properties)
+#define NVVK_HAS_VK_QCOM_tile_properties
+#endif /* VK_QCOM_tile_properties */
+#if defined(VK_QNX_external_memory_screen_buffer)
+#define NVVK_HAS_VK_QNX_external_memory_screen_buffer
+#endif /* VK_QNX_external_memory_screen_buffer */
+#if defined(VK_QNX_screen_surface)
+#define NVVK_HAS_VK_QNX_screen_surface
+#endif /* VK_QNX_screen_surface */
+#if defined(VK_VALVE_descriptor_set_host_mapping)
+#define NVVK_HAS_VK_VALVE_descriptor_set_host_mapping
+#endif /* VK_VALVE_descriptor_set_host_mapping */
+#if defined(VK_EXT_extended_dynamic_state) || defined(VK_EXT_shader_object)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state
+#endif /* VK_EXT_extended_dynamic_state || VK_EXT_shader_object */
+#if defined(VK_EXT_extended_dynamic_state2) || defined(VK_EXT_shader_object)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state2
+#endif /* VK_EXT_extended_dynamic_state2 || VK_EXT_shader_object */
+#if defined(VK_EXT_extended_dynamic_state3) || defined(VK_EXT_shader_object)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state3
+#endif /* VK_EXT_extended_dynamic_state3 || VK_EXT_shader_object */
+#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_clip_space_w_scaling) || defined(VK_EXT_shader_object) && defined(VK_NV_clip_space_w_scaling)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state3
+#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_clip_space_w_scaling || VK_EXT_shader_object && VK_NV_clip_space_w_scaling */
+#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_viewport_swizzle) || defined(VK_EXT_shader_object) && defined(VK_NV_viewport_swizzle)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state3
+#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_viewport_swizzle || VK_EXT_shader_object && VK_NV_viewport_swizzle */
+#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_fragment_coverage_to_color) || defined(VK_EXT_shader_object) && defined(VK_NV_fragment_coverage_to_color)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state3
+#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_fragment_coverage_to_color || VK_EXT_shader_object && VK_NV_fragment_coverage_to_color */
+#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_framebuffer_mixed_samples) || defined(VK_EXT_shader_object) && defined(VK_NV_framebuffer_mixed_samples)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state3
+#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_framebuffer_mixed_samples || VK_EXT_shader_object && VK_NV_framebuffer_mixed_samples */
+#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_shading_rate_image) || defined(VK_EXT_shader_object) && defined(VK_NV_shading_rate_image)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state3
+#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_shading_rate_image || VK_EXT_shader_object && VK_NV_shading_rate_image */
+#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_representative_fragment_test) || defined(VK_EXT_shader_object) && defined(VK_NV_representative_fragment_test)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state3
+#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_representative_fragment_test || VK_EXT_shader_object && VK_NV_representative_fragment_test */
+#if defined(VK_EXT_extended_dynamic_state3) && defined(VK_NV_coverage_reduction_mode) || defined(VK_EXT_shader_object) && defined(VK_NV_coverage_reduction_mode)
+#define NVVK_HAS_VK_EXT_extended_dynamic_state3
+#endif /* VK_EXT_extended_dynamic_state3 && VK_NV_coverage_reduction_mode || VK_EXT_shader_object && VK_NV_coverage_reduction_mode */
+#if defined(VK_EXT_full_screen_exclusive) && defined(VK_KHR_device_group) || defined(VK_EXT_full_screen_exclusive) && defined(VK_VERSION_1_1)
+#define NVVK_HAS_VK_EXT_full_screen_exclusive
+#endif /* VK_EXT_full_screen_exclusive && VK_KHR_device_group || VK_EXT_full_screen_exclusive && VK_VERSION_1_1 */
+#if defined(VK_EXT_host_image_copy) || defined(VK_EXT_image_compression_control)
+#define NVVK_HAS_VK_EXT_host_image_copy
+#endif /* VK_EXT_host_image_copy || VK_EXT_image_compression_control */
+#if defined(VK_EXT_shader_object) || defined(VK_EXT_vertex_input_dynamic_state)
+#define NVVK_HAS_VK_EXT_shader_object
+#endif /* VK_EXT_shader_object || VK_EXT_vertex_input_dynamic_state */
+#if defined(VK_KHR_descriptor_update_template) && defined(VK_KHR_push_descriptor) || defined(VK_KHR_push_descriptor) && defined(VK_VERSION_1_1) || defined(VK_KHR_push_descriptor) && defined(VK_KHR_descriptor_update_template)
+#define NVVK_HAS_VK_KHR_descriptor_update_template
+#endif /* VK_KHR_descriptor_update_template && VK_KHR_push_descriptor || VK_KHR_push_descriptor && VK_VERSION_1_1 || VK_KHR_push_descriptor && VK_KHR_descriptor_update_template */
+#if defined(VK_KHR_device_group) && defined(VK_KHR_surface) || defined(VK_KHR_swapchain) && defined(VK_VERSION_1_1)
+#define NVVK_HAS_VK_KHR_device_group
+#endif /* VK_KHR_device_group && VK_KHR_surface || VK_KHR_swapchain && VK_VERSION_1_1 */
+#if defined(VK_KHR_device_group) && defined(VK_KHR_swapchain) || defined(VK_KHR_swapchain) && defined(VK_VERSION_1_1)
+#define NVVK_HAS_VK_KHR_device_group
+#endif /* VK_KHR_device_group && VK_KHR_swapchain || VK_KHR_swapchain && VK_VERSION_1_1 */
+/* NVVK_GENERATE_DEFINE */
--- a/raytracer/nvpro_core/nvvk/extensions_vk.py
+++ b/raytracer/nvpro_core/nvvk/extensions_vk.py
@ -0,0 +1,445 @@
+#
+#  Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+#  SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
+#  SPDX-License-Identifier: Apache-2.0
+#
+
+#
+# The following script originate from Volk (https://github.com/zeux/volk) and adapted to the need
+# of nvpro-core samples.
+#
+
+
+# Copyright (c) 2018-2023 Arseny Kapoulkine
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+
+#!/usr/bin/python3
+# This will generate the entry point for all Vulkan extensions
+# Code blocks are created and will be replace between
+# 'NVVK_GENERATE_'<BLOCK_NAME>
+import argparse
+import os.path
+import urllib
+import urllib.request
+import xml.etree.ElementTree as etree
+import re
+from collections import OrderedDict
+
+# Ignoring those extensions because they are part of vulkan-1.lib
+ExcludeList = [
+    "defined(VK_KHR_surface)",
+    "defined(VK_KHR_win32_surface)",
+    "defined(VK_KHR_xlib_surface)",
+    "defined(VK_KHR_wayland_surface)",
+    "defined(VK_KHR_xcb_surface)",
+    "defined(VK_KHR_display)",
+    "defined(VK_KHR_swapchain)",
+    "defined(VK_KHR_get_surface_capabilities2)",
+    "defined(VK_KHR_get_display_properties2)",
+    "defined(VK_KHR_display_swapchain)",
+    "VK_VERSION_1_0",
+    "VK_VERSION_1_1",
+    "VK_VERSION_1_2",
+    "VK_VERSION_1_3",
+]
+
+# Debugging - To be sure that the exclude list is excluding commands
+# exported in vulkan-1, populate the list here. If there is a duplicate
+# the name of the command and the extension name will be printed out.
+ExportedCommands = []  # dumpbin /EXPORTS vulkan-1.lib
+
+
+# Commands that were added in newer extension revisions.
+# Extensions such as VK_EXT_discard_rectangles have had specification revisions
+# that added new commands. Since these commands should only be used if the
+# extension's `VkExtensionProperties::specVersion` is high enough, this table
+# tracks the first `specVersion` in which each newer command was introduced
+# (as this information is not currently contained in vk.xml).
+cmdversions = {
+    "vkCmdSetDiscardRectangleEnableEXT": 2,
+    "vkCmdSetDiscardRectangleModeEXT": 2,
+    "vkCmdSetExclusiveScissorEnableNV": 2,
+}
+
+
+def parse_xml(path):
+    # Parsing the Vulkan 'vk.xml' document
+    file = urllib.request.urlopen(path) if path.startswith("http") else open(path, "r")
+    with file:
+        tree = etree.parse(file)
+        return tree
+
+
+def patch_file(fileName, blocks):
+    # Find each section of NVVK_GENERATE_ and replace with block of text
+    result = []
+    block = None
+
+    scriptDir = os.path.dirname(os.path.realpath(__file__))
+    path = os.path.join(scriptDir, fileName)
+
+    with open(path, "r") as file:
+        for line in file.readlines():
+            if block:
+                if line == block:
+                    result.append(line)
+                    block = None
+            else:
+                result.append(line)
+                # C comment marker
+                if line.strip().startswith("/* NVVK_GENERATE_"):
+                    block = line
+                    result.append(blocks[line.strip()[17:-3]])
+
+    with open(path, "w", newline="\n") as file:
+        for line in result:
+            file.write(line)
+
+
+def is_descendant_type(types, name, base):
+    # Finding the base type of each type:
+    # <type category="handle" parent="VkPhysicalDevice"
+    # objtypeenum="VK_OBJECT_TYPE_DEVICE"><type>VK_DEFINE_HANDLE</type>(<name>VkDevice</name>)</type>
+    # <type category="handle" parent="VkDevice"
+    # objtypeenum="VK_OBJECT_TYPE_QUEUE"><type>VK_DEFINE_HANDLE</type>(<name>VkQueue</name>)</type>
+    if name == base:
+        return True
+    type = types.get(name)
+    if len(type) == 0:
+        return False
+    parents = type.get("parent")
+    if not parents:
+        return False
+    return any(
+        [is_descendant_type(types, parent, base) for parent in parents.split(",")]
+    )
+
+
+def defined(key):
+    return "defined(" + key + ")"
+
+
+def cdepends(key):
+    return (
+        re.sub(r"[a-zA-Z0-9_]+", lambda m: defined(m.group(0)), key)
+        .replace(",", " || ")
+        .replace("+", " && ")
+    )
+
+
+# Remove "defined(..)"
+def remove_defined(input_string):
+    return re.sub(r"defined\((.*?)\)", r"\1", input_string)
+
+def toStr(txt):
+    # Return the string if it exist or '' if None
+    if txt:
+        return txt
+    return ""
+
+
+def get_function(rtype, name, params):
+    # Returning the function declaration
+
+    fct_args = []  # incoming argument
+    call_args = []  # call arguments
+    for p in params:
+        ptype = p.find("type")
+        pname = p.find("name")
+        papi = p.attrib.get("api")
+        # Avoid `vulkansc`
+        if not papi or papi == "vulkan":
+            fct_args.append(
+                "".join(
+                    [
+                        toStr(p.text),
+                        ptype.text,
+                        ptype.tail,
+                        pname.text,
+                        toStr(pname.tail),
+                    ]
+                )
+            )  # 'const', 'vkSome', '*', 'some', '[2]'
+            call_args.append(pname.text)
+
+    # Function signature
+    fct = "VKAPI_ATTR " + rtype + " VKAPI_CALL " + name + "(\n"
+    # Arguments of the function
+    fct += "\t" + ", \n\t".join(fct_args) + ") \n"
+    fct += "{ \n  "
+    # fct += ' assert(pfn_'+name+');\n'
+    # Check if the function is returning a value
+    if rtype != "void":
+        fct += "return "
+    fct += "pfn_" + name + "(" + ", ".join(call_args) + "); \n"
+    fct += "}\n"
+    return fct
+
+
+def get_vk_xml_path(spec_arg):
+    """
+    Find the Vulkan specification XML file by looking for (highest priority to
+    lowest) an incoming `spec` argument, a local copy within the Vulkan SDK,
+    or by downloading it from KhronosGroup/Vulkan-Docs.
+    """
+    if spec_arg is not None:
+        return spec_arg
+
+    # VULKAN_SDK is a newer version of VK_SDK_PATH. The Linux Tarball Vulkan SDK
+    # instructions only say to set VULKAN_SDK - so VULKAN_SDK might exist while
+    # VK_SDK_PATH might not.
+    vulkan_sdk_env = os.getenv("VULKAN_SDK")
+    if vulkan_sdk_env is not None:
+        local_spec_path = os.path.normpath(
+            vulkan_sdk_env + "/share/vulkan/registry/vk.xml"
+        )
+        if os.path.isfile(local_spec_path):
+            return local_spec_path
+
+    # Ubuntu installations might not have VULKAN_SDK set, but have vk.xml in /usr.
+    if os.path.isfile("/usr/share/vulkan/registry/vk.xml"):
+        return "/usr/share/vulkan/registry/vk.xml"
+
+    print(
+        "Warning: no `spec` parameter was provided, and vk.xml could not be "
+        "found in the path given by the VULKAN_SDK environment variable or in "
+        "system folders. This script will download the latest copy of vk.xml "
+        "online, which may be incompatible with an installed Vulkan installation."
+    )
+    return "https://raw.githubusercontent.com/KhronosGroup/Vulkan-Docs/main/xml/vk.xml"
+
+
+#
+# MAIN Entry
+#
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Generates entry points for Vulkan extensions in extensions_vk.cpp."
+    )
+    parser.add_argument(
+        "--beta",
+        action="store_true",
+        help="Includes provisional Vulkan extensions; these extensions are not guaranteed to be consistent across Vulkan SDK versions.",
+    )
+    parser.add_argument(
+        "spec",
+        type=str,
+        nargs="?",
+        help="Optional path to a vk.xml file to use to generate extensions. Otherwise, uses the vk.xml in the Vulkan SDK distribution specified in the VULKAN_SDK environment variable.",
+    )
+
+    args = parser.parse_args()
+
+    # Retrieving the XML file
+    specpath = get_vk_xml_path(args.spec)
+    spec = parse_xml(specpath)
+
+    print("Using " + specpath)
+    
+    # CODE BLOCS
+    blocks = {}
+
+    # CODE BLOCS for generated code
+    block_keys = ("STATIC_PFN", "LOAD_PROC", "DECLARE", "DEFINE")
+
+    for key in block_keys:
+        blocks[key] = ""
+
+    # Retrieving the version of the Vulkan specification
+    version = spec.find('types/type[name="VK_HEADER_VERSION"]')
+    blocks["VERSION_INFO"] = (
+        "// Generated using Vulkan " + version.find("name").tail.strip() + "\n"
+    )
+
+    # Patching the files
+    patch_file("extensions_vk.cpp", blocks)
+
+    # Ordered list of commands per extension group
+    command_groups = OrderedDict()
+    instance_commands = set()
+
+    for feature in spec.findall("feature"):
+        api = feature.get("api")
+        if "vulkan" not in api.split(","):
+            continue
+        key = feature.get("name")
+        cmdrefs = feature.findall("require/command")
+        command_groups[key] = [cmdref.get("name") for cmdref in cmdrefs]
+
+    # Retrieve all extension and sorted alphabetically
+    for ext in sorted(
+        spec.findall("extensions/extension"), key=lambda ext: ext.get("name")
+    ):
+        # Only add the extension if 'vulkan' is part of the support attribute
+        supported = ext.get("supported")
+        if "vulkan" not in supported.split(","):
+            continue
+
+        # Discard beta extensions
+        if ext.get("provisional") == "true" and not args.beta:
+            continue
+
+        name = ext.get("name")
+        type = ext.get("type")  # device or instance
+
+        for req in ext.findall("require"):
+            # Adding all commands for this extension
+            key = defined(name)
+            if req.get("feature"):  # old-style XML depends specification
+                for i in req.get("feature").split(","):
+                    key += " && " + defined(i)
+            if req.get("extension"):  # old-style XML depends specification
+                for i in req.get("extension").split(","):
+                    key += " && " + defined(i)
+            if req.get("depends"):  # new-style XML depends specification
+                dep = cdepends(req.get("depends"))
+                key += " && " + ("(" + dep + ")" if "||" in dep else dep)
+            cmdrefs = req.findall("command")
+
+            # Add ifdef section and split commands with high version
+            for cmdref in cmdrefs:
+                ver = cmdversions.get(cmdref.get("name"))
+                if ver:
+                    command_groups.setdefault(
+                        key + " && " + name.upper() + "_SPEC_VERSION >= " + str(ver), []
+                    ).append(cmdref.get("name"))
+                else:
+                    command_groups.setdefault(key, []).append(cmdref.get("name"))
+
+            # Adding commands that are 'instance' instead of 'device'
+            if type == "instance":
+                for cmdref in cmdrefs:
+                    instance_commands.add(cmdref.get("name"))
+
+    # From a command, find which group it's belong
+    commands_to_groups = OrderedDict()
+    for group, cmdnames in command_groups.items():
+        for name in cmdnames:
+            commands_to_groups.setdefault(name, []).append(group)
+
+    for group, cmdnames in command_groups.items():
+        command_groups[group] = [
+            name for name in cmdnames if len(commands_to_groups[name]) == 1
+        ]
+
+    for name, groups in commands_to_groups.items():
+        if len(groups) == 1:
+            continue
+        key = " || ".join([g for g in groups])
+        command_groups.setdefault(key, []).append(name)
+
+    # Finding the alias name for a function: <command
+    # name="vkGetPhysicalDeviceExternalBufferPropertiesKHR"
+    # alias="vkGetPhysicalDeviceExternalBufferProperties"/>
+    commands = {}
+    for cmd in spec.findall("commands/command"):
+        if not cmd.get("alias"):
+            name = cmd.findtext("proto/name")
+            commands[name] = cmd
+
+    for cmd in spec.findall("commands/command"):
+        if cmd.get("alias"):
+            name = cmd.get("name")
+            commands[name] = commands[cmd.get("alias")]
+
+    # Finding all Vulkan types to be use by is_descendant_type
+    types = {}
+    for type in spec.findall("types/type"):
+        name = type.findtext("name")
+        if name:
+            types[name] = type
+
+    for key in block_keys:
+        blocks[key] = ""
+
+    # For each group, get the list of all commands
+    for group, cmdnames in command_groups.items():
+        # Skipping some extensions
+        if group in ExcludeList:
+            continue
+
+        ifdef = "#if " + group + "\n"
+
+        for key in block_keys:
+            blocks[key] += ifdef
+
+        # Name the NVVK_HAS with the first part of the group
+        ext_name = group
+        if "&&" in group:
+            ext_name = group.split("&&")[0].strip()
+        elif "||" in group:
+            ext_name = group.split("||")[0].strip()
+        if ext_name != None:
+            blocks["DEFINE"] += "#define NVVK_HAS_" + remove_defined(ext_name) + "\n"
+
+        # Getting all commands within the group
+        for name in sorted(cmdnames):
+            # Finding the 'alias' command
+            cmd = commands[name]
+
+            if name in ExportedCommands:
+                print("Command " + name + " from group " + group)
+
+            # Get the first argument type, which defines if it is an instance
+            # function
+            type = cmd.findtext("param[1]/type")
+
+            # Create the function declaration block
+            params = cmd.findall("param")
+            return_type = cmd.findtext("proto/type")
+            blocks["DECLARE"] += get_function(return_type, name, params)
+
+            # Loading proc address can be device or instance
+            if (
+                is_descendant_type(types, type, "VkDevice")
+                and name not in instance_commands
+            ):
+                blocks["LOAD_PROC"] += (
+                    "  pfn_"
+                    + name
+                    + " = (PFN_"
+                    + name
+                    + ')getDeviceProcAddr(device, "'
+                    + name
+                    + '");\n'
+                )
+            elif is_descendant_type(types, type, "VkInstance"):
+                blocks["LOAD_PROC"] += (
+                    "  pfn_"
+                    + name
+                    + " = (PFN_"
+                    + name
+                    + ')getInstanceProcAddr(instance, "'
+                    + name
+                    + '");\n'
+                )
+
+            # Creates the bloc for all static functions
+            blocks["STATIC_PFN"] += "static PFN_" + name + " pfn_" + name + "= 0;\n"
+
+        # Adding the #endif or removing empty blocks
+        for key in block_keys:
+            if blocks[key].endswith(ifdef):
+                blocks[key] = blocks[key][: -len(ifdef)]
+            else:
+                blocks[key] += "#endif /* " + remove_defined(group) + " */\n"
+
+    # Patching the files
+    patch_file("extensions_vk.hpp", blocks)
+    patch_file("extensions_vk.cpp", blocks)
--- a/raytracer/nvpro_core/nvvk/gizmos_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/gizmos_vk.cpp
@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include "gizmos_vk.hpp"
+
+#include <glm/gtc/type_ptr.hpp>
+
+namespace nvvk {
+
+//#include "E:\temp\glsl\axis.vert.h"
+static const uint32_t s_vert_spv[] = {
+    0x07230203, 0x00010500, 0x0008000a, 0x0000006e, 0x00000000, 0x00020011, 0x00000001, 0x0006000b, 0x00000001,
+    0x4c534c47, 0x6474732e, 0x3035342e, 0x00000000, 0x0003000e, 0x00000000, 0x00000001, 0x000b000f, 0x00000000,
+    0x00000004, 0x6e69616d, 0x00000000, 0x0000000c, 0x0000002e, 0x00000032, 0x0000003b, 0x00000041, 0x00000045,
+    0x00030003, 0x00000002, 0x000001c2, 0x00040005, 0x00000004, 0x6e69616d, 0x00000000, 0x00050005, 0x0000000c,
+    0x6f727261, 0x65765f77, 0x00007472, 0x00030005, 0x0000002c, 0x00736f70, 0x00060005, 0x0000002e, 0x565f6c67,
+    0x65747265, 0x646e4978, 0x00007865, 0x00070005, 0x00000032, 0x495f6c67, 0x6174736e, 0x4965636e, 0x7865646e,
+    0x00000000, 0x00050005, 0x00000039, 0x65746e69, 0x6c6f7072, 0x00746e61, 0x00050006, 0x00000039, 0x00000000,
+    0x6f6c6f43, 0x00000072, 0x00030005, 0x0000003b, 0x0074754f, 0x00060005, 0x0000003f, 0x505f6c67, 0x65567265,
+    0x78657472, 0x00000000, 0x00060006, 0x0000003f, 0x00000000, 0x505f6c67, 0x7469736f, 0x006e6f69, 0x00030005,
+    0x00000041, 0x00000000, 0x00060005, 0x00000043, 0x73755075, 0x6e6f4368, 0x6e617473, 0x00000074, 0x00060006,
+    0x00000043, 0x00000000, 0x6e617274, 0x726f6673, 0x0000006d, 0x00030005, 0x00000045, 0x00006370, 0x00040047,
+    0x0000002e, 0x0000000b, 0x0000002a, 0x00040047, 0x00000032, 0x0000000b, 0x0000002b, 0x00030047, 0x00000039,
+    0x00000002, 0x00040047, 0x0000003b, 0x0000001e, 0x00000000, 0x00050048, 0x0000003f, 0x00000000, 0x0000000b,
+    0x00000000, 0x00030047, 0x0000003f, 0x00000002, 0x00040048, 0x00000043, 0x00000000, 0x00000005, 0x00050048,
+    0x00000043, 0x00000000, 0x00000023, 0x00000000, 0x00050048, 0x00000043, 0x00000000, 0x00000007, 0x00000010,
+    0x00030047, 0x00000043, 0x00000002, 0x00020013, 0x00000002, 0x00030021, 0x00000003, 0x00000002, 0x00030016,
+    0x00000006, 0x00000020, 0x00040017, 0x00000007, 0x00000006, 0x00000003, 0x00040015, 0x00000008, 0x00000020,
+    0x00000000, 0x0004002b, 0x00000008, 0x00000009, 0x00000008, 0x0004001c, 0x0000000a, 0x00000007, 0x00000009,
+    0x00040020, 0x0000000b, 0x00000006, 0x0000000a, 0x0004003b, 0x0000000b, 0x0000000c, 0x00000006, 0x00040015,
+    0x0000000d, 0x00000020, 0x00000001, 0x0004002b, 0x0000000d, 0x0000000e, 0x00000000, 0x0004002b, 0x00000006,
+    0x0000000f, 0x3f800000, 0x0004002b, 0x00000006, 0x00000010, 0x00000000, 0x0006002c, 0x00000007, 0x00000011,
+    0x0000000f, 0x00000010, 0x00000010, 0x00040020, 0x00000012, 0x00000006, 0x00000007, 0x0004002b, 0x0000000d,
+    0x00000014, 0x00000001, 0x0004002b, 0x00000006, 0x00000015, 0x3f400000, 0x0004002b, 0x00000006, 0x00000016,
+    0x3dcccccd, 0x0006002c, 0x00000007, 0x00000017, 0x00000015, 0x00000016, 0x00000016, 0x0004002b, 0x0000000d,
+    0x00000019, 0x00000002, 0x0004002b, 0x00000006, 0x0000001a, 0xbdcccccd, 0x0006002c, 0x00000007, 0x0000001b,
+    0x00000015, 0x00000016, 0x0000001a, 0x0004002b, 0x0000000d, 0x0000001d, 0x00000003, 0x0006002c, 0x00000007,
+    0x0000001e, 0x00000015, 0x0000001a, 0x0000001a, 0x0004002b, 0x0000000d, 0x00000020, 0x00000004, 0x0006002c,
+    0x00000007, 0x00000021, 0x00000015, 0x0000001a, 0x00000016, 0x0004002b, 0x0000000d, 0x00000023, 0x00000005,
+    0x0004002b, 0x0000000d, 0x00000025, 0x00000006, 0x0006002c, 0x00000007, 0x00000026, 0x00000010, 0x00000010,
+    0x00000010, 0x0004002b, 0x0000000d, 0x00000028, 0x00000007, 0x0006002c, 0x00000007, 0x00000029, 0x00000015,
+    0x00000010, 0x00000010, 0x00040020, 0x0000002b, 0x00000007, 0x00000007, 0x00040020, 0x0000002d, 0x00000001,
+    0x0000000d, 0x0004003b, 0x0000002d, 0x0000002e, 0x00000001, 0x0004003b, 0x0000002d, 0x00000032, 0x00000001,
+    0x00020014, 0x00000034, 0x00040017, 0x00000038, 0x00000006, 0x00000004, 0x0003001e, 0x00000039, 0x00000038,
+    0x00040020, 0x0000003a, 0x00000003, 0x00000039, 0x0004003b, 0x0000003a, 0x0000003b, 0x00000003, 0x0007002c,
+    0x00000038, 0x0000003c, 0x0000000f, 0x00000010, 0x00000010, 0x0000000f, 0x00040020, 0x0000003d, 0x00000003,
+    0x00000038, 0x0003001e, 0x0000003f, 0x00000038, 0x00040020, 0x00000040, 0x00000003, 0x0000003f, 0x0004003b,
+    0x00000040, 0x00000041, 0x00000003, 0x00040018, 0x00000042, 0x00000038, 0x00000004, 0x0003001e, 0x00000043,
+    0x00000042, 0x00040020, 0x00000044, 0x00000009, 0x00000043, 0x0004003b, 0x00000044, 0x00000045, 0x00000009,
+    0x00040020, 0x00000046, 0x00000009, 0x00000042, 0x0007002c, 0x00000038, 0x00000055, 0x00000010, 0x0000000f,
+    0x00000010, 0x0000000f, 0x0007002c, 0x00000038, 0x00000062, 0x00000010, 0x00000010, 0x0000000f, 0x0000000f,
+    0x00050036, 0x00000002, 0x00000004, 0x00000000, 0x00000003, 0x000200f8, 0x00000005, 0x0004003b, 0x0000002b,
+    0x0000002c, 0x00000007, 0x00050041, 0x00000012, 0x00000013, 0x0000000c, 0x0000000e, 0x0003003e, 0x00000013,
+    0x00000011, 0x00050041, 0x00000012, 0x00000018, 0x0000000c, 0x00000014, 0x0003003e, 0x00000018, 0x00000017,
+    0x00050041, 0x00000012, 0x0000001c, 0x0000000c, 0x00000019, 0x0003003e, 0x0000001c, 0x0000001b, 0x00050041,
+    0x00000012, 0x0000001f, 0x0000000c, 0x0000001d, 0x0003003e, 0x0000001f, 0x0000001e, 0x00050041, 0x00000012,
+    0x00000022, 0x0000000c, 0x00000020, 0x0003003e, 0x00000022, 0x00000021, 0x00050041, 0x00000012, 0x00000024,
+    0x0000000c, 0x00000023, 0x0003003e, 0x00000024, 0x00000017, 0x00050041, 0x00000012, 0x00000027, 0x0000000c,
+    0x00000025, 0x0003003e, 0x00000027, 0x00000026, 0x00050041, 0x00000012, 0x0000002a, 0x0000000c, 0x00000028,
+    0x0003003e, 0x0000002a, 0x00000029, 0x0004003d, 0x0000000d, 0x0000002f, 0x0000002e, 0x00050041, 0x00000012,
+    0x00000030, 0x0000000c, 0x0000002f, 0x0004003d, 0x00000007, 0x00000031, 0x00000030, 0x0003003e, 0x0000002c,
+    0x00000031, 0x0004003d, 0x0000000d, 0x00000033, 0x00000032, 0x000500aa, 0x00000034, 0x00000035, 0x00000033,
+    0x0000000e, 0x000300f7, 0x00000037, 0x00000000, 0x000400fa, 0x00000035, 0x00000036, 0x00000050, 0x000200f8,
+    0x00000036, 0x00050041, 0x0000003d, 0x0000003e, 0x0000003b, 0x0000000e, 0x0003003e, 0x0000003e, 0x0000003c,
+    0x00050041, 0x00000046, 0x00000047, 0x00000045, 0x0000000e, 0x0004003d, 0x00000042, 0x00000048, 0x00000047,
+    0x0004003d, 0x00000007, 0x00000049, 0x0000002c, 0x00050051, 0x00000006, 0x0000004a, 0x00000049, 0x00000000,
+    0x00050051, 0x00000006, 0x0000004b, 0x00000049, 0x00000001, 0x00050051, 0x00000006, 0x0000004c, 0x00000049,
+    0x00000002, 0x00070050, 0x00000038, 0x0000004d, 0x0000004a, 0x0000004b, 0x0000004c, 0x0000000f, 0x00050091,
+    0x00000038, 0x0000004e, 0x00000048, 0x0000004d, 0x00050041, 0x0000003d, 0x0000004f, 0x00000041, 0x0000000e,
+    0x0003003e, 0x0000004f, 0x0000004e, 0x000200f9, 0x00000037, 0x000200f8, 0x00000050, 0x0004003d, 0x0000000d,
+    0x00000051, 0x00000032, 0x000500aa, 0x00000034, 0x00000052, 0x00000051, 0x00000014, 0x000300f7, 0x00000054,
+    0x00000000, 0x000400fa, 0x00000052, 0x00000053, 0x00000061, 0x000200f8, 0x00000053, 0x00050041, 0x0000003d,
+    0x00000056, 0x0000003b, 0x0000000e, 0x0003003e, 0x00000056, 0x00000055, 0x00050041, 0x00000046, 0x00000057,
+    0x00000045, 0x0000000e, 0x0004003d, 0x00000042, 0x00000058, 0x00000057, 0x0004003d, 0x00000007, 0x00000059,
+    0x0000002c, 0x0008004f, 0x00000007, 0x0000005a, 0x00000059, 0x00000059, 0x00000001, 0x00000000, 0x00000002,
+    0x00050051, 0x00000006, 0x0000005b, 0x0000005a, 0x00000000, 0x00050051, 0x00000006, 0x0000005c, 0x0000005a,
+    0x00000001, 0x00050051, 0x00000006, 0x0000005d, 0x0000005a, 0x00000002, 0x00070050, 0x00000038, 0x0000005e,
+    0x0000005b, 0x0000005c, 0x0000005d, 0x0000000f, 0x00050091, 0x00000038, 0x0000005f, 0x00000058, 0x0000005e,
+    0x00050041, 0x0000003d, 0x00000060, 0x00000041, 0x0000000e, 0x0003003e, 0x00000060, 0x0000005f, 0x000200f9,
+    0x00000054, 0x000200f8, 0x00000061, 0x00050041, 0x0000003d, 0x00000063, 0x0000003b, 0x0000000e, 0x0003003e,
+    0x00000063, 0x00000062, 0x00050041, 0x00000046, 0x00000064, 0x00000045, 0x0000000e, 0x0004003d, 0x00000042,
+    0x00000065, 0x00000064, 0x0004003d, 0x00000007, 0x00000066, 0x0000002c, 0x0008004f, 0x00000007, 0x00000067,
+    0x00000066, 0x00000066, 0x00000001, 0x00000002, 0x00000000, 0x00050051, 0x00000006, 0x00000068, 0x00000067,
+    0x00000000, 0x00050051, 0x00000006, 0x00000069, 0x00000067, 0x00000001, 0x00050051, 0x00000006, 0x0000006a,
+    0x00000067, 0x00000002, 0x00070050, 0x00000038, 0x0000006b, 0x00000068, 0x00000069, 0x0000006a, 0x0000000f,
+    0x00050091, 0x00000038, 0x0000006c, 0x00000065, 0x0000006b, 0x00050041, 0x0000003d, 0x0000006d, 0x00000041,
+    0x0000000e, 0x0003003e, 0x0000006d, 0x0000006c, 0x000200f9, 0x00000054, 0x000200f8, 0x00000054, 0x000200f9,
+    0x00000037, 0x000200f8, 0x00000037, 0x000100fd, 0x00010038};
+
+
+//#include "E:\temp\glsl\axis.frag.h"
+static const uint32_t s_frag_spv[] = {
+    0x07230203, 0x00010500, 0x0008000a, 0x00000012, 0x00000000, 0x00020011, 0x00000001, 0x0006000b, 0x00000001,
+    0x4c534c47, 0x6474732e, 0x3035342e, 0x00000000, 0x0003000e, 0x00000000, 0x00000001, 0x0007000f, 0x00000004,
+    0x00000004, 0x6e69616d, 0x00000000, 0x00000009, 0x0000000c, 0x00030010, 0x00000004, 0x00000007, 0x00030003,
+    0x00000002, 0x000001c2, 0x00040005, 0x00000004, 0x6e69616d, 0x00000000, 0x00040005, 0x00000009, 0x6c6f4366,
+    0x0000726f, 0x00050005, 0x0000000a, 0x65746e69, 0x6c6f7072, 0x00746e61, 0x00050006, 0x0000000a, 0x00000000,
+    0x6f6c6f43, 0x00000072, 0x00030005, 0x0000000c, 0x00006e49, 0x00040047, 0x00000009, 0x0000001e, 0x00000000,
+    0x00030047, 0x0000000a, 0x00000002, 0x00040047, 0x0000000c, 0x0000001e, 0x00000000, 0x00020013, 0x00000002,
+    0x00030021, 0x00000003, 0x00000002, 0x00030016, 0x00000006, 0x00000020, 0x00040017, 0x00000007, 0x00000006,
+    0x00000004, 0x00040020, 0x00000008, 0x00000003, 0x00000007, 0x0004003b, 0x00000008, 0x00000009, 0x00000003,
+    0x0003001e, 0x0000000a, 0x00000007, 0x00040020, 0x0000000b, 0x00000001, 0x0000000a, 0x0004003b, 0x0000000b,
+    0x0000000c, 0x00000001, 0x00040015, 0x0000000d, 0x00000020, 0x00000001, 0x0004002b, 0x0000000d, 0x0000000e,
+    0x00000000, 0x00040020, 0x0000000f, 0x00000001, 0x00000007, 0x00050036, 0x00000002, 0x00000004, 0x00000000,
+    0x00000003, 0x000200f8, 0x00000005, 0x00050041, 0x0000000f, 0x00000010, 0x0000000c, 0x0000000e, 0x0004003d,
+    0x00000007, 0x00000011, 0x00000010, 0x0003003e, 0x00000009, 0x00000011, 0x000100fd, 0x00010038};
+
+
+//--------------------------------------------------------------------------------------------------
+//
+//
+void AxisVK::display(VkCommandBuffer cmdBuf, const glm::mat4& transform, const VkExtent2D& screenSize)
+{
+  vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineTriangleFan);
+
+  // Setup viewport:
+  VkViewport viewport{};
+  viewport.width    = float(screenSize.width);
+  viewport.height   = float(screenSize.height);
+  viewport.minDepth = 0;
+  viewport.maxDepth = 1;
+  VkRect2D rect;
+  rect.offset = VkOffset2D{0, 0};
+  rect.extent = VkExtent2D{screenSize.width, screenSize.height};
+  vkCmdSetViewport(cmdBuf, 0, 1, &viewport);
+  vkCmdSetScissor(cmdBuf, 0, 1, &rect);
+
+
+  // Set the orthographic matrix in the bottom left corner
+  {
+    const float     pixelW   = m_axisSize / screenSize.width;
+    const float     pixelH   = m_axisSize / screenSize.height;
+    const glm::mat4 matOrtho = {pixelW * .8f,  0.0f,          0.0f,  0.0f,  //
+                                0.0f,          -pixelH * .8f, 0.0f,  0.0f,  //
+                                0.0f,          0.0f,          -0.5f, 0.0f,  //
+                                -1.f + pixelW, 1.f - pixelH,  0.5f,  1.0f};
+
+    glm::mat4 modelView = transform;
+    modelView[3]        = glm::vec4(0, 0, 0, 1);
+    modelView           = matOrtho * modelView;
+    // Push the matrix to the shader
+    vkCmdPushConstants(cmdBuf, m_pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(glm::mat4), glm::value_ptr(modelView));
+  }
+
+  // Draw 3 times the tip of the arrow, the shader is flipping the orientation and setting the color
+  vkCmdDraw(cmdBuf, 6, 3, 0, 0);
+  // Now draw the line of the arrow using the last 2 vertex of the buffer (offset 5)
+  vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipelineLines);
+  vkCmdDraw(cmdBuf, 2, 3, 6, 0);
+}
+
+void AxisVK::createAxisObject(CreateAxisInfo& info)
+{
+  // The shader need Push Constants: the transformation matrix
+  const VkPushConstantRange push_constants{VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(glm::mat4)};
+
+  VkPipelineLayoutCreateInfo layout_info{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
+  layout_info.pushConstantRangeCount = 1;
+  layout_info.pPushConstantRanges    = &push_constants;
+  vkCreatePipelineLayout(m_device, &layout_info, nullptr, &m_pipelineLayout);
+
+  // Creation of the pipeline
+  VkShaderModule smVertex;
+  VkShaderModule smFrag;
+
+  VkShaderModuleCreateInfo createInfo{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
+  createInfo.codeSize = sizeof(s_vert_spv);
+  createInfo.pCode    = s_vert_spv;
+  vkCreateShaderModule(m_device, &createInfo, nullptr, &smVertex);
+  createInfo.codeSize = sizeof(s_frag_spv);
+  createInfo.pCode    = s_frag_spv;
+  vkCreateShaderModule(m_device, &createInfo, nullptr, &smFrag);
+
+  // Pipeline state
+  nvvk::GraphicsPipelineState gps;
+  gps.inputAssemblyState.topology         = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
+  gps.rasterizationState.cullMode         = VK_CULL_MODE_NONE;
+  gps.depthStencilState.depthTestEnable   = VK_TRUE;
+  gps.depthStencilState.stencilTestEnable = VK_FALSE;
+  gps.depthStencilState.depthCompareOp    = VK_COMPARE_OP_LESS_OR_EQUAL;
+
+  // Creating the tips
+  nvvk::GraphicsPipelineGenerator gpg(m_device, m_pipelineLayout, info.renderPass, gps);
+  gpg.addShader(smVertex, VK_SHADER_STAGE_VERTEX_BIT);
+  gpg.addShader(smFrag, VK_SHADER_STAGE_FRAGMENT_BIT);
+
+  // Dynamic Rendering
+  VkPipelineRenderingCreateInfoKHR rfInfo{VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR};
+  if(info.renderPass == VK_NULL_HANDLE)
+  {
+    rfInfo.colorAttachmentCount    = static_cast<uint32_t>(info.colorFormat.size());
+    rfInfo.pColorAttachmentFormats = info.colorFormat.data();
+    rfInfo.depthAttachmentFormat   = info.depthFormat;
+    rfInfo.stencilAttachmentFormat = info.stencilFormat;
+    gpg.createInfo.pNext           = &rfInfo;
+  }
+
+  m_pipelineTriangleFan = gpg.createPipeline();
+
+  // Creating the lines
+  gps.inputAssemblyState.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
+  m_pipelineLines                 = gpg.createPipeline();
+
+  vkDestroyShaderModule(m_device, smVertex, nullptr);
+  vkDestroyShaderModule(m_device, smFrag, nullptr);
+}
+
+
+// glsl_shader.vert, compiled with: (see comment)
+/*************************************************
+
+#version 450 core
+// glslangValidator.exe --target-env vulkan1.2 --vn s_vert_spv -o axis.vert.h axis.vert
+
+layout(push_constant) uniform uPushConstant
+{
+  mat4 transform;
+}
+pc;
+
+out gl_PerVertex
+{
+  vec4 gl_Position;
+};
+
+layout(location = 0) out interpolant
+{
+  vec4 Color;
+}
+Out;
+
+// Arrow along the x axis
+const float asize = 1.0f;  // lenght of arrow
+const float atip = 0.1f;   // width of arrow tip
+const float abase = 0.66f; // 0.25 == tip lenght
+
+vec3 arrow_vert[8];
+
+void main()
+{
+  arrow_vert[0] = vec3(asize, 0, 0); // Tip
+  arrow_vert[1] = vec3(abase, atip, atip);
+  arrow_vert[2] = vec3(abase, atip, -atip);
+  arrow_vert[3] = vec3(abase, -atip, -atip);
+  arrow_vert[4] = vec3(abase, -atip, atip);
+  arrow_vert[5] = vec3(abase, atip, atip);
+
+  arrow_vert[6] = vec3(0, 0, 0);     // To draw the line
+  arrow_vert[7] = vec3(abase, 0, 0); // To draw the line
+
+  //  const float t = 0.04f;
+  //  arrow_vert[6] = vec3(0, t, t);      // To draw the line
+  //  arrow_vert[7] = vec3(abase, t, t);  // To draw the line
+  //  arrow_vert[8] = vec3(0, -t, t);     // To draw the line
+  //  arrow_vert[9] = vec3(abase, -t, t); // To draw the line
+  //  //
+  //  arrow_vert[10] = vec3(0, -t, -t);     // To draw the line
+  //  arrow_vert[11] = vec3(abase, -t, -t); // To draw the line
+  //                                        //
+  //  arrow_vert[12] = vec3(0, t, -t);      // To draw the line
+  //  arrow_vert[13] = vec3(abase, t, -t);  // To draw the line
+  //
+  //  arrow_vert[14] = vec3(0, t, t);     // To draw the line
+  //  arrow_vert[15] = vec3(abase, t, t); // To draw the line
+
+  vec3 pos = arrow_vert[gl_VertexIndex];
+  // Out.Color = aColor;
+  if (gl_InstanceIndex == 0)
+  {
+    Out.Color = vec4(1, 0, 0, 1);
+    gl_Position = pc.transform * vec4(pos.xyz, 1);
+  }
+  else if (gl_InstanceIndex == 1)
+  {
+    Out.Color = vec4(0, 1, 0, 1);
+    gl_Position = pc.transform * vec4(pos.yxz, 1);
+  }
+  else
+  {
+    Out.Color = vec4(0, 0, 1, 1);
+    gl_Position = pc.transform * vec4(pos.yzx, 1);
+  }
+}
+
+
+*********************/
+
+
+// glsl_shader.frag
+/*************************************************
+
+#version 450 core
+// glslangValidator.exe --target-env vulkan1.2 --vn s_frag_spv -o axis.frag.h axis.frag
+layout(location = 0) out vec4 fColor;
+
+layout(location = 0) in interpolant
+{
+  vec4 Color;
+}
+In;
+
+void main()
+{
+  fColor = In.Color;
+}
+
+
+*********************/
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/gizmos_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/gizmos_vk.hpp
@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#pragma once
+#include <array>
+#include <vector>
+
+#include <vulkan/vulkan_core.h>
+
+#include <glm/glm.hpp>
+#include "nvvk/pipeline_vk.hpp"  // Using the Pipeline Generator Utility
+
+
+namespace nvvk {
+
+//--------------------------------------------------------------------------------------------------
+/** @DOC_START
+ # class nvvk::Axis
+
+ nvvk::Axis displays an Axis representing the orientation of the camera in the bottom left corner of the window.
+ - Initialize the Axis using `init()`
+ - Add `display()` in a inline rendering pass, one of the lass command
+ 
+ Example:  
+ ```cpp
+ m_axis.display(cmdBuf, CameraManip.getMatrix(), windowSize);
+ ``` 
+@DOC_END */
+
+class AxisVK
+{
+public:
+  struct CreateAxisInfo
+  {
+    VkRenderPass          renderPass{VK_NULL_HANDLE};
+    uint32_t              subpass{0};
+    std::vector<VkFormat> colorFormat;
+    VkFormat              depthFormat{};
+    VkFormat              stencilFormat{};
+    float                 axisSize{50.f};
+  };
+
+
+  void init(VkDevice device, VkRenderPass renderPass, uint32_t subpass = 0, float axisSize = 50.f)
+  {
+    m_device   = device;
+    m_axisSize = axisSize;
+
+    CreateAxisInfo info;
+    info.renderPass = renderPass;
+    info.subpass    = subpass;
+    createAxisObject(info);
+  }
+
+  void init(VkDevice device, CreateAxisInfo info)
+  {
+    m_device   = device;
+    m_axisSize = info.axisSize;
+    createAxisObject(info);
+  }
+
+  void deinit()
+  {
+    vkDestroyPipeline(m_device, m_pipelineTriangleFan, nullptr);
+    vkDestroyPipeline(m_device, m_pipelineLines, nullptr);
+    vkDestroyPipelineLayout(m_device, m_pipelineLayout, nullptr);
+  }
+
+  void display(VkCommandBuffer cmdBuf, const glm::mat4& transform, const VkExtent2D& screenSize);
+
+  void setAxisSize(float s) { m_axisSize = s; }
+
+private:
+  void createAxisObject(CreateAxisInfo& info);
+
+  VkPipeline       m_pipelineTriangleFan = {};
+  VkPipeline       m_pipelineLines       = {};
+  VkPipelineLayout m_pipelineLayout      = {};
+  float            m_axisSize            = 50.f;  // Size in pixel
+
+  VkDevice m_device{VK_NULL_HANDLE};
+};
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/images_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/images_vk.cpp
@ -0,0 +1,318 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include "images_vk.hpp"
+#include <cassert>
+
+namespace nvvk {
+
+VkImageMemoryBarrier makeImageMemoryBarrier(VkImage            img,
+                                            VkAccessFlags      srcAccess,
+                                            VkAccessFlags      dstAccess,
+                                            VkImageLayout      oldLayout,
+                                            VkImageLayout      newLayout,
+                                            VkImageAspectFlags aspectMask)
+{
+  VkImageMemoryBarrier barrier        = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
+  barrier.srcAccessMask               = srcAccess;
+  barrier.dstAccessMask               = dstAccess;
+  barrier.oldLayout                   = oldLayout;
+  barrier.newLayout                   = newLayout;
+  barrier.dstQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED;
+  barrier.srcQueueFamilyIndex         = VK_QUEUE_FAMILY_IGNORED;
+  barrier.image                       = img;
+  barrier.subresourceRange            = {0};
+  barrier.subresourceRange.aspectMask = aspectMask;
+  barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
+  barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
+
+  return barrier;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Return the access flag for an image layout
+VkAccessFlags accessFlagsForImageLayout(VkImageLayout layout)
+{
+  switch(layout)
+  {
+    case VK_IMAGE_LAYOUT_PREINITIALIZED:
+      return VK_ACCESS_HOST_WRITE_BIT;
+    case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
+      return VK_ACCESS_TRANSFER_WRITE_BIT;
+    case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
+      return VK_ACCESS_TRANSFER_READ_BIT;
+    case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
+      return VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+    case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+      return VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+    case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
+      return VK_ACCESS_SHADER_READ_BIT;
+    default:
+      return VkAccessFlags();
+  }
+}
+
+VkPipelineStageFlags pipelineStageForLayout(VkImageLayout layout)
+{
+  switch(layout)
+  {
+    case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
+    case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
+      return VK_PIPELINE_STAGE_TRANSFER_BIT;
+    case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
+      return VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+    case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+      return VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;  // We do this to allow queue other than graphic
+                                                  // return VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
+    case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
+      return VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;  // We do this to allow queue other than graphic
+                                                  // return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+    case VK_IMAGE_LAYOUT_PREINITIALIZED:
+      return VK_PIPELINE_STAGE_HOST_BIT;
+    case VK_IMAGE_LAYOUT_UNDEFINED:
+      return VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+    default:
+      return VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
+  }
+}
+
+void cmdBarrierImageLayout(VkCommandBuffer                cmdbuffer,
+                           VkImage                        image,
+                           VkImageLayout                  oldImageLayout,
+                           VkImageLayout                  newImageLayout,
+                           const VkImageSubresourceRange& subresourceRange)
+{
+  // Create an image barrier to change the layout
+  VkImageMemoryBarrier imageMemoryBarrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
+  imageMemoryBarrier.oldLayout        = oldImageLayout;
+  imageMemoryBarrier.newLayout        = newImageLayout;
+  imageMemoryBarrier.image            = image;
+  imageMemoryBarrier.subresourceRange = subresourceRange;
+  imageMemoryBarrier.srcAccessMask    = accessFlagsForImageLayout(oldImageLayout);
+  imageMemoryBarrier.dstAccessMask    = accessFlagsForImageLayout(newImageLayout);
+  // Fix for a validation issue - should be needed when VkImage sharing mode is VK_SHARING_MODE_EXCLUSIVE
+  // and the values of srcQueueFamilyIndex and dstQueueFamilyIndex are equal, no ownership transfer is performed,
+  // and the barrier operates as if they were both set to VK_QUEUE_FAMILY_IGNORED.
+  imageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  imageMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+  VkPipelineStageFlags srcStageMask      = pipelineStageForLayout(oldImageLayout);
+  VkPipelineStageFlags destStageMask     = pipelineStageForLayout(newImageLayout);
+  vkCmdPipelineBarrier(cmdbuffer, srcStageMask, destStageMask, 0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier);
+}
+
+void cmdBarrierImageLayout(VkCommandBuffer cmdbuffer, VkImage image, VkImageLayout oldImageLayout, VkImageLayout newImageLayout, VkImageAspectFlags aspectMask)
+{
+  VkImageSubresourceRange subresourceRange;
+  subresourceRange.aspectMask     = aspectMask;
+  subresourceRange.levelCount     = VK_REMAINING_MIP_LEVELS;
+  subresourceRange.layerCount     = VK_REMAINING_ARRAY_LAYERS;
+  subresourceRange.baseMipLevel   = 0;
+  subresourceRange.baseArrayLayer = 0;
+  cmdBarrierImageLayout(cmdbuffer, image, oldImageLayout, newImageLayout, subresourceRange);
+}
+
+VkImageCreateInfo makeImage2DCreateInfo(const VkExtent2D& size, VkFormat format, VkImageUsageFlags usage, bool mipmaps)
+{
+  VkImageCreateInfo icInfo = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
+  icInfo.imageType         = VK_IMAGE_TYPE_2D;
+  icInfo.format            = format;
+  icInfo.samples           = VK_SAMPLE_COUNT_1_BIT;
+  icInfo.mipLevels         = mipmaps ? mipLevels(size) : 1;
+  icInfo.arrayLayers       = 1;
+  icInfo.extent.width      = size.width;
+  icInfo.extent.height     = size.height;
+  icInfo.extent.depth      = 1;
+  icInfo.usage             = usage | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
+  return icInfo;
+}
+
+
+VkImageViewCreateInfo makeImage2DViewCreateInfo(VkImage            image,
+                                                VkFormat           format /*= VK_FORMAT_R8G8B8A8_UNORM*/,
+                                                VkImageAspectFlags aspectFlags /*= VK_IMAGE_ASPECT_COLOR_BIT*/,
+                                                uint32_t           levels /*= 1*/,
+                                                const void*        pNextImageView /*= nullptr*/)
+{
+  VkImageViewCreateInfo viewInfo{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
+  viewInfo.pNext                           = pNextImageView;
+  viewInfo.image                           = image;
+  viewInfo.viewType                        = VK_IMAGE_VIEW_TYPE_2D;
+  viewInfo.format                          = format;
+  viewInfo.subresourceRange.aspectMask     = aspectFlags;
+  viewInfo.subresourceRange.baseMipLevel   = 0;
+  viewInfo.subresourceRange.levelCount     = levels;
+  viewInfo.subresourceRange.baseArrayLayer = 0;
+  viewInfo.subresourceRange.layerCount     = 1;
+
+  return viewInfo;
+}
+
+VkImageViewCreateInfo makeImageViewCreateInfo(VkImage image, const VkImageCreateInfo& imageInfo, bool isCube)
+{
+  VkImageViewCreateInfo viewInfo{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
+  viewInfo.pNext = nullptr;
+  viewInfo.image = image;
+
+  switch(imageInfo.imageType)
+  {
+    case VK_IMAGE_TYPE_1D:
+      viewInfo.viewType = VK_IMAGE_VIEW_TYPE_1D;
+      break;
+    case VK_IMAGE_TYPE_2D:
+      viewInfo.viewType = isCube ? VK_IMAGE_VIEW_TYPE_CUBE : VK_IMAGE_VIEW_TYPE_2D;
+      break;
+    case VK_IMAGE_TYPE_3D:
+      viewInfo.viewType = VK_IMAGE_VIEW_TYPE_3D;
+      break;
+    default:
+      assert(0);
+  }
+
+  viewInfo.format                          = imageInfo.format;
+  viewInfo.subresourceRange.aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT;
+  viewInfo.subresourceRange.baseMipLevel   = 0;
+  viewInfo.subresourceRange.levelCount     = VK_REMAINING_MIP_LEVELS;
+  viewInfo.subresourceRange.baseArrayLayer = 0;
+  viewInfo.subresourceRange.layerCount     = VK_REMAINING_ARRAY_LAYERS;
+
+  return viewInfo;
+}
+
+VkImageCreateInfo makeImage3DCreateInfo(const VkExtent3D& size, VkFormat format, VkImageUsageFlags usage, bool mipmaps)
+{
+  VkImageCreateInfo icInfo = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
+  icInfo.imageType         = VK_IMAGE_TYPE_3D;
+  icInfo.format            = format;
+  icInfo.mipLevels         = mipmaps ? mipLevels(size) : 1;
+  icInfo.arrayLayers       = 1;
+  icInfo.samples           = VK_SAMPLE_COUNT_1_BIT;
+  icInfo.extent.width      = size.width;
+  icInfo.extent.height     = size.height;
+  icInfo.extent.depth      = size.depth;
+  icInfo.usage             = usage | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+  return icInfo;
+}
+
+VkImageCreateInfo makeImageCubeCreateInfo(const VkExtent2D& size, VkFormat format, VkImageUsageFlags usage, bool mipmaps)
+{
+  VkImageCreateInfo icInfo{VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
+  icInfo.imageType     = VK_IMAGE_TYPE_2D;
+  icInfo.format        = format;
+  icInfo.mipLevels     = mipmaps ? mipLevels(size) : 1;
+  icInfo.arrayLayers   = 6;
+  icInfo.samples       = VK_SAMPLE_COUNT_1_BIT;
+  icInfo.extent.width  = size.width;
+  icInfo.extent.height = size.height;
+  icInfo.extent.depth  = 1;
+  icInfo.usage         = usage | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+  icInfo.flags         = VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
+  return icInfo;
+}
+
+// This mipmap generation relies on blitting
+// A more sophisticated version could be done with computer shader
+// We will publish how to in the future
+
+void cmdGenerateMipmaps(VkCommandBuffer cmdBuf, VkImage image, VkFormat imageFormat, const VkExtent2D& size, uint32_t levelCount, uint32_t layerCount, VkImageLayout currentLayout)
+{
+  // Transfer the top level image to a layout 'eTransferSrcOptimal` and its access to 'eTransferRead'
+  VkImageMemoryBarrier barrier{VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
+  barrier.subresourceRange.baseArrayLayer = 0;
+  barrier.subresourceRange.aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT;
+  barrier.subresourceRange.baseMipLevel   = 0;
+  barrier.subresourceRange.layerCount     = layerCount;
+  barrier.subresourceRange.levelCount     = 1;
+  barrier.image                           = image;
+  barrier.oldLayout                       = currentLayout;
+  barrier.newLayout                       = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
+  barrier.srcAccessMask                   = accessFlagsForImageLayout(currentLayout);
+  barrier.dstAccessMask                   = VK_ACCESS_TRANSFER_READ_BIT;
+  barrier.srcQueueFamilyIndex             = VK_QUEUE_FAMILY_IGNORED;
+  barrier.dstQueueFamilyIndex             = VK_QUEUE_FAMILY_IGNORED;
+  vkCmdPipelineBarrier(cmdBuf, pipelineStageForLayout(currentLayout), VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0,
+                       nullptr, 1, &barrier);
+
+  if(levelCount > 1)
+  {
+    // transfer remaining mips to DST optimal
+    barrier.newLayout                     = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+    barrier.dstAccessMask                 = VK_ACCESS_TRANSFER_WRITE_BIT;
+    barrier.subresourceRange.baseMipLevel = 1;
+    barrier.subresourceRange.levelCount   = VK_REMAINING_MIP_LEVELS;
+    vkCmdPipelineBarrier(cmdBuf, pipelineStageForLayout(currentLayout), VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr,
+                         0, nullptr, 1, &barrier);
+  };
+
+  int32_t mipWidth  = size.width;
+  int32_t mipHeight = size.height;
+
+  for(uint32_t i = 1; i < levelCount; i++)
+  {
+
+    VkImageBlit blit;
+    blit.srcOffsets[0]                 = {0, 0, 0};
+    blit.srcOffsets[1]                 = {mipWidth, mipHeight, 1};
+    blit.srcSubresource.aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT;
+    blit.srcSubresource.mipLevel       = i - 1;
+    blit.srcSubresource.baseArrayLayer = 0;
+    blit.srcSubresource.layerCount     = layerCount;
+    blit.dstOffsets[0]                 = {0, 0, 0};
+    blit.dstOffsets[1]                 = {mipWidth > 1 ? mipWidth / 2 : 1, mipHeight > 1 ? mipHeight / 2 : 1, 1};
+    blit.dstSubresource.aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT;
+    blit.dstSubresource.mipLevel       = i;
+    blit.dstSubresource.baseArrayLayer = 0;
+    blit.dstSubresource.layerCount     = layerCount;
+
+    vkCmdBlitImage(cmdBuf, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
+                   &blit, VK_FILTER_LINEAR);
+
+
+    // Next
+    {
+      // Transition the current miplevel into a eTransferSrcOptimal layout, to be used as the source for the next one.
+      barrier.subresourceRange.baseMipLevel = i;
+      barrier.subresourceRange.levelCount   = 1;
+      barrier.oldLayout                     = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+      barrier.newLayout                     = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
+      barrier.srcAccessMask                 = VK_ACCESS_TRANSFER_WRITE_BIT;
+      barrier.dstAccessMask                 = VK_ACCESS_TRANSFER_READ_BIT;
+      vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0,
+                           nullptr, 1, &barrier);
+    }
+
+
+    if(mipWidth > 1)
+      mipWidth /= 2;
+    if(mipHeight > 1)
+      mipHeight /= 2;
+  }
+
+  // Transition all miplevels (now in VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) back to currentLayout
+  barrier.subresourceRange.baseMipLevel = 0;
+  barrier.subresourceRange.levelCount   = VK_REMAINING_MIP_LEVELS;
+  barrier.oldLayout                     = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
+  barrier.newLayout                     = currentLayout;
+  barrier.srcAccessMask                 = VK_ACCESS_TRANSFER_READ_BIT;
+  barrier.dstAccessMask                 = accessFlagsForImageLayout(currentLayout);
+  vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, pipelineStageForLayout(currentLayout), 0, 0, nullptr, 0,
+                       nullptr, 1, &barrier);
+}
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/images_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/images_vk.hpp
@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#pragma once
+
+#include <vulkan/vulkan_core.h>
+
+#include <algorithm>
+#include <cmath>
+
+namespace nvvk {
+//////////////////////////////////////////////////////////////////////////
+/** @DOC_START
+    # functions in nvvk
+
+    - makeImageMemoryBarrier : returns VkImageMemoryBarrier for an image based on provided layouts and access flags.
+    - mipLevels : return number of mips for 2d/3d extent
+
+    - accessFlagsForImageLayout : helps resource transtions
+    - pipelineStageForLayout : helps resource transitions
+    - cmdBarrierImageLayout : inserts barrier for image transition
+
+    - cmdGenerateMipmaps : basic mipmap creation for images (meant for one-shot operations)
+
+    - makeImage2DCreateInfo : aids 2d image creation
+    - makeImage3DCreateInfo : aids 3d descriptor set updating
+    - makeImageCubeCreateInfo : aids cube descriptor set updating
+    - makeImageViewCreateInfo : aids common image view creation, derives info from VkImageCreateInfo
+    - makeImage2DViewCreateInfo : aids 2d image view creation
+@DOC_END */
+
+VkImageMemoryBarrier makeImageMemoryBarrier(VkImage            image,
+                                            VkAccessFlags      srcAccess,
+                                            VkAccessFlags      dstAccess,
+                                            VkImageLayout      oldLayout,
+                                            VkImageLayout      newLayout,
+                                            VkImageAspectFlags aspectMask = VK_IMAGE_ASPECT_COLOR_BIT);
+
+
+//--------------------------------------------------------------------------------------------------
+inline uint32_t mipLevels(VkExtent2D extent)
+{
+  return static_cast<uint32_t>(std::floor(std::log2(std::max(extent.width, extent.height)))) + 1;
+}
+
+inline uint32_t mipLevels(VkExtent3D extent)
+{
+  return static_cast<uint32_t>(std::floor(std::log2(std::max(extent.width, extent.height)))) + 1;
+}
+
+//--------------------------------------------------------------------------------------------------
+// Transition Pipeline Layout tools
+
+VkAccessFlags        accessFlagsForImageLayout(VkImageLayout layout);
+VkPipelineStageFlags pipelineStageForLayout(VkImageLayout layout);
+
+void cmdBarrierImageLayout(VkCommandBuffer                cmdbuffer,
+                           VkImage                        image,
+                           VkImageLayout                  oldImageLayout,
+                           VkImageLayout                  newImageLayout,
+                           const VkImageSubresourceRange& subresourceRange);
+
+void cmdBarrierImageLayout(VkCommandBuffer cmdbuffer, VkImage image, VkImageLayout oldImageLayout, VkImageLayout newImageLayout, VkImageAspectFlags aspectMask);
+
+inline void cmdBarrierImageLayout(VkCommandBuffer cmdbuffer, VkImage image, VkImageLayout oldImageLayout, VkImageLayout newImageLayout)
+{
+  cmdBarrierImageLayout(cmdbuffer, image, oldImageLayout, newImageLayout, VK_IMAGE_ASPECT_COLOR_BIT);
+}
+
+
+VkImageCreateInfo makeImage3DCreateInfo(const VkExtent3D& size,
+                                        VkFormat          format  = VK_FORMAT_R8G8B8A8_UNORM,
+                                        VkImageUsageFlags usage   = VK_IMAGE_USAGE_SAMPLED_BIT,
+                                        bool              mipmaps = false);
+
+
+VkImageCreateInfo makeImage2DCreateInfo(const VkExtent2D& size,
+                                        VkFormat          format  = VK_FORMAT_R8G8B8A8_UNORM,
+                                        VkImageUsageFlags usage   = VK_IMAGE_USAGE_SAMPLED_BIT,
+                                        bool              mipmaps = false);
+
+VkImageCreateInfo makeImageCubeCreateInfo(const VkExtent2D& size,
+                                          VkFormat          format  = VK_FORMAT_R8G8B8A8_UNORM,
+                                          VkImageUsageFlags usage   = VK_IMAGE_USAGE_SAMPLED_BIT,
+                                          bool              mipmaps = false);
+
+// derives format and view type from imageInfo, special case for IMAGE_2D to treat as cube
+// view enables all mips and layers
+VkImageViewCreateInfo makeImageViewCreateInfo(VkImage image, const VkImageCreateInfo& imageInfo, bool isCube = false);
+
+
+VkImageViewCreateInfo makeImage2DViewCreateInfo(VkImage            image,
+                                                VkFormat           format         = VK_FORMAT_R8G8B8A8_UNORM,
+                                                VkImageAspectFlags aspectFlags    = VK_IMAGE_ASPECT_COLOR_BIT,
+                                                uint32_t           levels         = VK_REMAINING_MIP_LEVELS,
+                                                const void*        pNextImageView = nullptr);
+
+void cmdGenerateMipmaps(VkCommandBuffer   cmdBuf,
+                        VkImage           image,
+                        VkFormat          imageFormat,
+                        const VkExtent2D& size,
+                        uint32_t          levelCount,
+                        uint32_t          layerCount    = 1,
+                        VkImageLayout     currentLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/memallocator_dedicated_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/memallocator_dedicated_vk.cpp
@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "memallocator_dedicated_vk.hpp"
+
+#include "error_vk.hpp"
+#include "debug_util_vk.hpp"
+
+#include <cassert>
+
+namespace nvvk {
+
+class DedicatedMemoryHandle : public MemHandleBase
+{
+public:
+  DedicatedMemoryHandle()                             = default;
+  DedicatedMemoryHandle(const DedicatedMemoryHandle&) = default;
+  DedicatedMemoryHandle(DedicatedMemoryHandle&&)      = default;
+
+  DedicatedMemoryHandle& operator=(const DedicatedMemoryHandle&) = default;
+  DedicatedMemoryHandle& operator=(DedicatedMemoryHandle&&)      = default;
+
+  VkDeviceMemory getMemory() const { return m_memory; }
+  VkDeviceSize   getSize() const { return m_size; }
+
+private:
+  friend class DedicatedMemoryAllocator;
+  DedicatedMemoryHandle(VkDeviceMemory memory, VkDeviceSize size)
+      : m_memory(memory)
+      , m_size(size)
+  {
+  }
+
+  VkDeviceMemory m_memory;
+  VkDeviceSize   m_size;
+};
+
+DedicatedMemoryHandle* castDedicatedMemoryHandle(MemHandle memHandle)
+{
+  if(!memHandle)
+    return nullptr;
+#ifndef NDEBUG
+  auto dedicatedMemHandle = static_cast<DedicatedMemoryHandle*>(memHandle);
+#else
+  auto dedicatedMemHandle = dynamic_cast<DedicatedMemoryHandle*>(memHandle);
+  assert(dedicatedMemHandle);
+#endif
+
+  return dedicatedMemHandle;
+}
+
+DedicatedMemoryAllocator::DedicatedMemoryAllocator(VkDevice device, VkPhysicalDevice physDevice)
+{
+  init(device, physDevice);
+}
+
+DedicatedMemoryAllocator::~DedicatedMemoryAllocator()
+{
+  deinit();
+}
+
+bool DedicatedMemoryAllocator::init(VkDevice device, VkPhysicalDevice physDevice)
+{
+  m_device         = device;
+  m_physicalDevice = physDevice;
+  vkGetPhysicalDeviceMemoryProperties(m_physicalDevice, &m_physicalMemoryProperties);
+
+  return true;
+}
+
+void DedicatedMemoryAllocator::deinit()
+{
+  m_device = NULL;
+}
+
+MemHandle DedicatedMemoryAllocator::allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult)
+{
+  MemAllocateInfo localInfo(allocInfo);
+  localInfo.setAllocationFlags(allocInfo.getAllocationFlags() | m_flags);
+
+  BakedAllocateInfo bakedInfo;
+  fillBakedAllocateInfo(m_physicalMemoryProperties, localInfo, bakedInfo);
+
+  VkDeviceMemory memory = VK_NULL_HANDLE;
+  VkResult       result = vkAllocateMemory(m_device, &bakedInfo.memAllocInfo, nullptr, &memory);
+  NVVK_CHECK(result);
+  if(pResult)
+  {
+    *pResult = result;
+  }
+
+  if(result == VK_SUCCESS)
+  {
+    auto dedicatedMemHandle = new DedicatedMemoryHandle(memory, bakedInfo.memAllocInfo.allocationSize);
+
+    if(!allocInfo.getDebugName().empty())
+    {
+      const MemInfo& memInfo = getMemoryInfo(dedicatedMemHandle);
+      nvvk::DebugUtil(m_device).setObjectName(memInfo.memory, localInfo.getDebugName());
+    }
+
+    return dedicatedMemHandle;
+  }
+  else
+  {
+    return NullMemHandle;
+  }
+}
+
+void DedicatedMemoryAllocator::freeMemory(MemHandle memHandle)
+{
+  if(!memHandle)
+    return;
+
+  auto dedicatedHandle = castDedicatedMemoryHandle(memHandle);
+
+  vkFreeMemory(m_device, dedicatedHandle->getMemory(), nullptr);
+
+  delete dedicatedHandle;
+
+  return;
+}
+
+MemAllocator::MemInfo DedicatedMemoryAllocator::getMemoryInfo(MemHandle memHandle) const
+{
+  auto dedicatedHandle = castDedicatedMemoryHandle(memHandle);
+
+  return MemInfo{dedicatedHandle->getMemory(), 0, dedicatedHandle->getSize()};
+}
+
+void* DedicatedMemoryAllocator::map(MemHandle memHandle, VkDeviceSize offset, VkDeviceSize size, VkResult* pResult)
+{
+  auto     dedicatedHandle = castDedicatedMemoryHandle(memHandle);
+  void*    ptr             = nullptr;
+  VkResult result = vkMapMemory(m_device, dedicatedHandle->getMemory(), offset, size, 0 /*VkMemoryFlags*/, &ptr);
+
+  NVVK_CHECK(result);
+  if(pResult)
+  {
+    *pResult = result;
+  }
+
+  return ptr;
+}
+
+void DedicatedMemoryAllocator::unmap(MemHandle memHandle)
+{
+  auto dedicatedHandle = castDedicatedMemoryHandle(memHandle);
+  vkUnmapMemory(m_device, dedicatedHandle->getMemory());
+}
+
+
+VkDevice DedicatedMemoryAllocator::getDevice() const
+{
+  return m_device;
+}
+
+
+VkPhysicalDevice DedicatedMemoryAllocator::getPhysicalDevice() const
+{
+  return m_physicalDevice;
+}
+
+
+void DedicatedMemoryAllocator::setAllocateFlags(VkMemoryAllocateFlags flags)
+{
+  m_flags = flags;
+}
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/memallocator_dedicated_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/memallocator_dedicated_vk.hpp
@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include "memallocator_vk.hpp"
+
+namespace nvvk {
+
+/** @DOC_START
+ # class nvvk::DedicatedMemoryAllocator
+ nvvk::DedicatedMemoryAllocator is a simple implementation of the MemAllocator interface, using
+ one vkDeviceMemory allocation per allocMemory() call. The simplicity of the implementation is
+ bought with potential slowness (vkAllocateMemory tends to be very slow) and running
+ out of operating system resources quickly (as some OSs limit the number of physical
+ memory allocations per process).
+@DOC_END */
+class DedicatedMemoryAllocator : public MemAllocator
+{
+public:
+  DedicatedMemoryAllocator() = default;
+  explicit DedicatedMemoryAllocator(VkDevice device, VkPhysicalDevice physDevice);
+  virtual ~DedicatedMemoryAllocator();
+
+  bool init(VkDevice device, VkPhysicalDevice physDevice);
+  void deinit();
+
+  virtual MemHandle allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult = nullptr) override;
+  virtual void      freeMemory(MemHandle memHandle) override;
+  virtual MemInfo   getMemoryInfo(MemHandle memHandle) const override;
+  virtual void* map(MemHandle memHandle, VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE, VkResult* pResult = nullptr) override;
+  virtual void unmap(MemHandle memHandle) override;
+
+  virtual VkDevice         getDevice() const override;
+  virtual VkPhysicalDevice getPhysicalDevice() const override;
+
+  void setAllocateFlags(VkMemoryAllocateFlags flags);
+
+private:
+  VkDevice                         m_device{NULL};
+  VkPhysicalDevice                 m_physicalDevice{NULL};
+  VkPhysicalDeviceMemoryProperties m_physicalMemoryProperties;
+  VkMemoryAllocateFlags            m_flags{0};
+};
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/memallocator_dma_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/memallocator_dma_vk.hpp
@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include "memallocator_vk.hpp"
+#include "memorymanagement_vk.hpp"
+
+namespace nvvk {
+
+class DeviceMemoryAllocator;
+
+/** @DOC_START
+ # class nvvk::DMAMemoryAllocator
+ nvvk::DMAMemoryAllocator is  using nvvk::DeviceMemoryAllocator internally.
+ nvvk::DeviceMemoryAllocator derives from nvvk::MemAllocator as well, so this class here is for those prefering a reduced wrapper;
+@DOC_END */
+class DMAMemoryAllocator : public MemAllocator
+{
+public:
+  DMAMemoryAllocator() = default;
+  explicit DMAMemoryAllocator(nvvk::DeviceMemoryAllocator* dma) { init(dma); }
+  virtual ~DMAMemoryAllocator() { deinit(); }
+
+  bool init(nvvk::DeviceMemoryAllocator* dma)
+  {
+    m_dma = dma;
+    return m_dma != nullptr;
+  }
+  void deinit() { m_dma = nullptr; }
+
+  // Implement MemAllocator interface
+  virtual MemHandle allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult = nullptr) override
+  {
+    return m_dma->allocMemory(allocInfo, pResult);
+  }
+  virtual void    freeMemory(MemHandle memHandle) override { return m_dma->freeMemory(memHandle); }
+  virtual MemInfo getMemoryInfo(MemHandle memHandle) const override { return m_dma->getMemoryInfo(memHandle); }
+  virtual void* map(MemHandle memHandle, VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE, VkResult* pResult = nullptr) override
+  {
+    return m_dma->map(memHandle, offset, size, pResult);
+  }
+  virtual void unmap(MemHandle memHandle) override { return m_dma->unmap(memHandle); }
+
+  virtual VkDevice         getDevice() const override { return m_dma->getDevice(); }
+  virtual VkPhysicalDevice getPhysicalDevice() const override { return m_dma->getPhysicalDevice(); }
+
+  // Utility function
+  AllocationID getAllocationID(MemHandle memHandle) const { return m_dma->getAllocationID(memHandle); }
+
+private:
+  nvvk::DeviceMemoryAllocator* m_dma;
+};
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/memallocator_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/memallocator_vk.cpp
@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "memallocator_vk.hpp"
+
+#include <cassert>
+
+namespace nvvk {
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+MemAllocateInfo::MemAllocateInfo(const VkMemoryRequirements& memReqs, VkMemoryPropertyFlags memProps, bool isTilingOptimal)
+    : m_memReqs(memReqs)
+    , m_memProps(memProps)
+    , m_isTilingOptimal(isTilingOptimal)
+{
+}
+
+MemAllocateInfo::MemAllocateInfo(VkDevice device, VkBuffer buffer, VkMemoryPropertyFlags memProps)
+{
+  VkBufferMemoryRequirementsInfo2 bufferReqs = {VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, nullptr, buffer};
+  VkMemoryDedicatedRequirements   dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
+  VkMemoryRequirements2           memReqs       = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, &dedicatedRegs};
+
+  vkGetBufferMemoryRequirements2(device, &bufferReqs, &memReqs);
+
+  m_memReqs  = memReqs.memoryRequirements;
+  m_memProps = memProps;
+
+  if(dedicatedRegs.requiresDedicatedAllocation)
+  {
+    setDedicatedBuffer(buffer);
+  }
+
+  setTilingOptimal(false);
+}
+
+MemAllocateInfo::MemAllocateInfo(VkDevice device, VkImage image, VkMemoryPropertyFlags memProps, bool allowDedicatedAllocation)
+{
+  VkImageMemoryRequirementsInfo2 imageReqs     = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, nullptr, image};
+  VkMemoryDedicatedRequirements  dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
+  VkMemoryRequirements2          memReqs       = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, &dedicatedRegs};
+
+  vkGetImageMemoryRequirements2(device, &imageReqs, &memReqs);
+
+  m_memReqs  = memReqs.memoryRequirements;
+  m_memProps = memProps;
+
+  if(dedicatedRegs.requiresDedicatedAllocation || (dedicatedRegs.prefersDedicatedAllocation && allowDedicatedAllocation))
+  {
+    setDedicatedImage(image);
+  }
+
+  setTilingOptimal(true);
+}
+
+MemAllocateInfo& MemAllocateInfo::setDedicatedImage(VkImage image)
+{
+  assert(!m_dedicatedBuffer);
+  m_dedicatedImage = image;
+
+  return *this;
+}
+MemAllocateInfo& MemAllocateInfo::setDedicatedBuffer(VkBuffer buffer)
+{
+  assert(!m_dedicatedImage);
+  m_dedicatedBuffer = buffer;
+
+  return *this;
+}
+MemAllocateInfo& MemAllocateInfo::setAllocationFlags(VkMemoryAllocateFlags flags)
+{
+  m_allocateFlags |= flags;
+  return *this;
+}
+
+MemAllocateInfo& MemAllocateInfo::setDeviceMask(uint32_t mask)
+{
+  m_deviceMask = mask;
+  return *this;
+}
+
+
+MemAllocateInfo& MemAllocateInfo::setDebugName(const std::string& name)
+{
+  m_debugName = name;
+  return *this;
+}
+
+MemAllocateInfo& MemAllocateInfo::setExportable(bool exportable)
+{
+  m_isExportable = exportable;
+  return *this;
+}
+
+// Determines which heap to allocate from
+MemAllocateInfo& MemAllocateInfo::setMemoryProperties(VkMemoryPropertyFlags flags)
+{
+  m_memProps = flags;
+  return *this;
+}
+// Determines size and alignment
+MemAllocateInfo& MemAllocateInfo::setMemoryRequirements(VkMemoryRequirements requirements)
+{
+  m_memReqs = requirements;
+  return *this;
+}
+
+
+MemAllocateInfo& MemAllocateInfo::setTilingOptimal(bool isTilingOptimal)
+{
+  m_isTilingOptimal = isTilingOptimal;
+  return *this;
+}
+
+MemAllocateInfo& MemAllocateInfo::setPriority(const float priority /*= 0.5f*/)
+{
+  m_priority = priority;
+  return *this;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+uint32_t getMemoryType(const VkPhysicalDeviceMemoryProperties& memoryProperties, uint32_t typeBits, const VkMemoryPropertyFlags& properties)
+{
+  for(uint32_t i = 0; i < memoryProperties.memoryTypeCount; i++)
+  {
+    if(((typeBits & (1 << i)) > 0) && (memoryProperties.memoryTypes[i].propertyFlags & properties) == properties)
+    {
+      return i;
+    }
+  }
+  assert(0);
+  return ~0u;
+}
+
+bool fillBakedAllocateInfo(const VkPhysicalDeviceMemoryProperties& physMemProps, const MemAllocateInfo& info, BakedAllocateInfo& baked)
+{
+  baked.memAllocInfo.allocationSize = info.getMemoryRequirements().size;
+  baked.memAllocInfo.memoryTypeIndex =
+      getMemoryType(physMemProps, info.getMemoryRequirements().memoryTypeBits, info.getMemoryProperties());
+
+  // Put it last in the chain, so we can directly pass it into the DeviceMemoryAllocator::alloc function
+  if(info.getDedicatedBuffer() || info.getDedicatedImage())
+  {
+    baked.dedicatedInfo.pNext = baked.memAllocInfo.pNext;
+    baked.memAllocInfo.pNext  = &baked.dedicatedInfo;
+
+    baked.dedicatedInfo.buffer = info.getDedicatedBuffer();
+    baked.dedicatedInfo.image  = info.getDedicatedImage();
+  }
+
+  if(info.getExportable())
+  {
+    baked.exportInfo.pNext   = baked.memAllocInfo.pNext;
+    baked.memAllocInfo.pNext = &baked.exportInfo;
+#ifdef WIN32
+    baked.exportInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
+#else
+    baked.exportInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
+#endif
+  }
+
+  if(info.getDeviceMask() || info.getAllocationFlags())
+  {
+    baked.flagsInfo.pNext    = baked.memAllocInfo.pNext;
+    baked.memAllocInfo.pNext = &baked.flagsInfo;
+
+    baked.flagsInfo.flags      = info.getAllocationFlags();
+    baked.flagsInfo.deviceMask = info.getDeviceMask();
+
+    if(baked.flagsInfo.deviceMask)
+    {
+      baked.flagsInfo.flags |= VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT;
+    }
+  }
+
+  return true;
+}
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/memallocator_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/memallocator_vk.hpp
@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <vulkan/vulkan_core.h>
+
+#include <string>
+
+namespace nvvk {
+class MemHandleBase;
+typedef MemHandleBase* MemHandle;
+static const MemHandle NullMemHandle = nullptr;
+
+
+/** @DOC_START
+  # class nvvk::MemHandle
+
+  nvvk::MemHandle represents a memory allocation or sub-allocation from the
+  generic nvvk::MemAllocator interface. Ideally use `nvvk::NullMemHandle` for
+  setting to 'NULL'. MemHandle may change to a non-pointer type in future.
+
+  # class nvvk::MemAllocateInfo
+
+  nvvk::MemAllocateInfo is collecting almost all parameters a Vulkan allocation could potentially need.
+  This keeps MemAllocator's interface simple and extensible.
+@DOC_END */
+
+class MemAllocateInfo
+{
+public:
+  MemAllocateInfo(const VkMemoryRequirements& memReqs,  // determine size, alignment and memory type
+                  VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,  // determine device_local, host_visible, host coherent etc...
+                  bool isTilingOptimal = false  // determine if the alocation is going to be used for an VK_IMAGE_TILING_OPTIMAL image
+  );
+
+  // Convenience constructures that infer the allocation information from the buffer object directly
+  MemAllocateInfo(VkDevice device, VkBuffer buffer, VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+  // Convenience constructures that infer the allocation information from the image object directly.
+  // If the driver _prefers_ a dedicated allocation for this particular image and allowDedicatedAllocation is true, a dedicated allocation will be requested.
+  // If the driver _requires_ a dedicated allocation, a dedicated allocation will be requested regardless of 'allowDedicatedAllocation'.
+  MemAllocateInfo(VkDevice              device,
+                  VkImage               image,
+                  VkMemoryPropertyFlags memProps                 = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+                  bool                  allowDedicatedAllocation = true);
+
+  // Determines which heap to allocate from
+  MemAllocateInfo& setMemoryProperties(VkMemoryPropertyFlags flags);
+  // Determines size and alignment
+  MemAllocateInfo& setMemoryRequirements(VkMemoryRequirements requirements);
+  // TilingOptimal should be set for images. The allocator may choose to separate linear and tiling allocations
+  MemAllocateInfo& setTilingOptimal(bool isTilingOptimal);
+  // The allocation will be dedicated for the given image
+  MemAllocateInfo& setDedicatedImage(VkImage image);
+  // The allocation will be dedicated for the given buffer
+  MemAllocateInfo& setDedicatedBuffer(VkBuffer buffer);
+  // Set additional allocation flags
+  MemAllocateInfo& setAllocationFlags(VkMemoryAllocateFlags flags);
+  // Set the device mask for the allocation, redirect allocations to specific device(s) in the device group
+  MemAllocateInfo& setDeviceMask(uint32_t mask);
+  // Set a name for the allocation (only useful for dedicated allocations or allocators)
+  MemAllocateInfo& setDebugName(const std::string& name);
+  // Make the allocation exportable
+  MemAllocateInfo& setExportable(bool exportable);
+  // Prioritize the allocation (values 0.0 - 1.0); this may guide eviction strategies
+  MemAllocateInfo& setPriority(const float priority = 0.5f);
+
+  VkImage                      getDedicatedImage() const { return m_dedicatedImage; }
+  VkBuffer                     getDedicatedBuffer() const { return m_dedicatedBuffer; }
+  VkMemoryAllocateFlags        getAllocationFlags() const { return m_allocateFlags; }
+  uint32_t                     getDeviceMask() const { return m_deviceMask; }
+  bool                         getTilingOptimal() const { return m_isTilingOptimal; }
+  const VkMemoryRequirements&  getMemoryRequirements() const { return m_memReqs; }
+  const VkMemoryPropertyFlags& getMemoryProperties() const { return m_memProps; }
+  std::string                  getDebugName() const { return m_debugName; }
+  bool                         getExportable() const { return m_isExportable; }
+  float                        getPriority() const { return m_priority; }
+
+
+private:
+  VkBuffer              m_dedicatedBuffer{VK_NULL_HANDLE};
+  VkImage               m_dedicatedImage{VK_NULL_HANDLE};
+  VkMemoryAllocateFlags m_allocateFlags{0};
+  uint32_t              m_deviceMask{0};
+  VkMemoryRequirements  m_memReqs{0, 0, 0};
+  VkMemoryPropertyFlags m_memProps{0};
+  float                 m_priority{0.5f};
+
+  std::string m_debugName;
+
+  bool m_isTilingOptimal{false};
+  bool m_isExportable{false};
+};
+
+// BakedAllocateInfo is a group of allocation relevant Vulkan allocation structures,
+// which will be filled out and linked via pNext-> to be used directly via vkAllocateMemory.
+struct BakedAllocateInfo
+{
+  BakedAllocateInfo() = default;
+
+  // In lieu of proper copy operators, need to delete them as we store
+  // addresses to members in other members. Copying such object would make them point to
+  // wrong or out-of-scope addresses
+  BakedAllocateInfo(BakedAllocateInfo&& other)           = delete;
+  BakedAllocateInfo operator=(BakedAllocateInfo&& other) = delete;
+  BakedAllocateInfo(const BakedAllocateInfo&)            = delete;
+  BakedAllocateInfo operator=(const BakedAllocateInfo)   = delete;
+
+  VkMemoryAllocateInfo          memAllocInfo{VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
+  VkMemoryAllocateFlagsInfo     flagsInfo{VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO};
+  VkMemoryDedicatedAllocateInfo dedicatedInfo{VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
+  VkExportMemoryAllocateInfo    exportInfo{VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO};
+};
+
+bool fillBakedAllocateInfo(const VkPhysicalDeviceMemoryProperties& physMemProps, const MemAllocateInfo& info, BakedAllocateInfo& baked);
+uint32_t getMemoryType(const VkPhysicalDeviceMemoryProperties& memoryProperties, uint32_t typeBits, const VkMemoryPropertyFlags& properties);
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/** @DOC_START
+  # class nvvk::MemAllocator
+
+ nvvk::MemAllocator is a Vulkan memory allocator interface extensively used by ResourceAllocator.
+ It provides means to allocate, free, map and unmap pieces of Vulkan device memory.
+ Concrete implementations derive from nvvk::MemoryAllocator.
+ They can implement the allocator dunctionality themselves or act as an adapter to another
+ memory allocator implementation.
+
+ A nvvk::MemAllocator hands out opaque 'MemHandles'. The implementation of the MemAllocator interface
+ may chose any type of payload to store in a MemHandle. A MemHandle's relevant information can be 
+ retrieved via getMemoryInfo().
+@DOC_END */
+class MemAllocator
+{
+public:
+  struct MemInfo
+  {
+    VkDeviceMemory memory;
+    VkDeviceSize   offset;
+    VkDeviceSize   size;
+  };
+
+  // Allocate a piece of memory according to the requirements of allocInfo.
+  // may return NullMemHandle on error (provide pResult for details)
+  virtual MemHandle allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult = nullptr) = 0;
+
+  // Free the memory backing 'memHandle'.
+  // memHandle may be nullptr;
+  virtual void freeMemory(MemHandle memHandle) = 0;
+
+  // Retrieve detailed information about 'memHandle'
+  virtual MemInfo getMemoryInfo(MemHandle memHandle) const = 0;
+
+  // Maps device memory to system memory.
+  // If 'memHandle' already refers to a suballocation 'offset' will be applied on top of the
+  // suballocation's offset inside the device memory.
+  // may return nullptr on error (provide pResult for details)
+  virtual void* map(MemHandle memHandle, VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE, VkResult* pResult = nullptr) = 0;
+
+  // Unmap memHandle
+  virtual void unmap(MemHandle memHandle) = 0;
+
+  // Convenience function to allow mapping straight to a typed pointer.
+  template <class T>
+  T* mapT(MemHandle memHandle, VkResult* pResult = nullptr)
+  {
+    return (T*)map(memHandle, 0, VK_WHOLE_SIZE, pResult);
+  }
+
+  virtual VkDevice         getDevice() const         = 0;
+  virtual VkPhysicalDevice getPhysicalDevice() const = 0;
+
+  // Make sure the dtor is virtual
+  virtual ~MemAllocator() = default;
+};
+
+// Base class for memory handles
+// Individual allocators will derive from it and fill the handles with their own data.
+class MemHandleBase
+{
+public:
+  virtual ~MemHandleBase() = default;  // force the class to become virtual
+};
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/memallocator_vma_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/memallocator_vma_vk.hpp
@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "vk_mem_alloc.h"
+
+#ifndef MEMALLOCATOR_VMA_H_INCLUDED
+#define MEMALLOCATOR_VMA_H_INCLUDED
+
+#include "memallocator_vk.hpp"
+#include "resourceallocator_vk.hpp"
+
+namespace nvvk {
+/** @DOC_START
+ # class nvvk::VMAMemoryAllocator
+ nvvk::VMAMemoryAllocator using the GPUOpen [Vulkan Memory Allocator](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator) underneath.
+ As VMA comes as a header-only library, when using it you'll have to:
+  1) provide _add_package_VMA() in your CMakeLists.txt
+  2) put these lines into one of your compilation units:
+  ```cpp
+       #define VMA_IMPLEMENTATION
+       #include "vk_mem_alloc.h"
+  ```
+@DOC_END */
+class VMAMemoryAllocator : public MemAllocator
+{
+public:
+  VMAMemoryAllocator() = default;
+  inline explicit VMAMemoryAllocator(VkDevice device, VkPhysicalDevice physicalDevice, VmaAllocator vma);
+  inline virtual ~VMAMemoryAllocator();
+
+  inline bool init(VkDevice device, VkPhysicalDevice physicalDevice, VmaAllocator vma);
+  inline void deinit();
+
+  inline MemHandle allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult = nullptr) override;
+  inline void      freeMemory(MemHandle memHandle) override;
+  inline MemInfo   getMemoryInfo(MemHandle memHandle) const override;
+  inline void* map(MemHandle memHandle, VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE, VkResult* pResult = nullptr) override;
+  inline void unmap(MemHandle memHandle) override;
+
+  inline VkDevice         getDevice() const override;
+  inline VkPhysicalDevice getPhysicalDevice() const override;
+
+  inline void findLeak(uint64_t leakID) { m_leakID = leakID; }
+
+private:
+  VmaAllocator     m_vma{0};
+  VkDevice         m_device{nullptr};
+  VkPhysicalDevice m_physicalDevice{nullptr};
+  uint64_t         m_leakID{~0U};
+};
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/** @DOC_START
+ # class nvvk::ResourceAllocatorVMA
+ nvvk::ResourceAllocatorVMA is a convencience class creating, initializing and owning a nvvk::VmaAllocator
+ and associated nvvk::MemAllocator object. 
+@DOC_END */
+class ResourceAllocatorVma : public ResourceAllocator
+{
+public:
+  ResourceAllocatorVma() = default;
+  ResourceAllocatorVma(VkInstance instance, VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+  virtual ~ResourceAllocatorVma();
+
+  void init(VkInstance instance, VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+  void deinit();
+
+protected:
+  VmaAllocator                  m_vma{nullptr};
+  std::unique_ptr<MemAllocator> m_memAlloc;
+};
+
+}  // namespace nvvk
+
+#include "memallocator_vma_vk.inl"
+
+#endif
--- a/raytracer/nvpro_core/nvvk/memallocator_vma_vk.inl
+++ b/raytracer/nvpro_core/nvvk/memallocator_vma_vk.inl
@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "vk_mem_alloc.h"
+#include "error_vk.hpp"
+
+#if defined(LINUX)
+#include <signal.h> // LINUX SIGTRAP
+#endif
+
+namespace nvvk {
+
+//--------------------------------------------------------------------------------------------------
+// Converter utility from Vulkan memory property to VMA
+//
+static inline VmaMemoryUsage vkToVmaMemoryUsage(VkMemoryPropertyFlags flags)
+
+{
+  if((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
+    return VMA_MEMORY_USAGE_GPU_ONLY;
+  else if((flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) == VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+    return VMA_MEMORY_USAGE_CPU_ONLY;
+  else if((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+    return VMA_MEMORY_USAGE_CPU_TO_GPU;
+  return VMA_MEMORY_USAGE_UNKNOWN;
+}
+
+class VMAMemoryHandle : public MemHandleBase
+{
+public:
+  VMAMemoryHandle()                       = default;
+  VMAMemoryHandle(const VMAMemoryHandle&) = default;
+  VMAMemoryHandle(VMAMemoryHandle&&)      = default;
+
+  VmaAllocation getAllocation() const { return m_allocation; }
+
+private:
+  friend class VMAMemoryAllocator;
+  VMAMemoryHandle(VmaAllocation allocation)
+      : m_allocation(allocation)
+  {
+  }
+
+  VmaAllocation m_allocation;
+};
+
+inline VMAMemoryHandle* castVMAMemoryHandle(MemHandle memHandle)
+{
+  if(!memHandle)
+    return nullptr;
+#ifndef NDEBUG
+  auto vmaMemHandle = static_cast<VMAMemoryHandle*>(memHandle);
+#else
+  auto vmaMemHandle = dynamic_cast<VMAMemoryHandle*>(memHandle);
+  assert(vmaMemHandle);
+#endif
+
+  return vmaMemHandle;
+}
+
+inline VMAMemoryAllocator::VMAMemoryAllocator(VkDevice device, VkPhysicalDevice physicalDevice, VmaAllocator vma)
+{
+  init(device, physicalDevice, vma);
+}
+
+
+inline VMAMemoryAllocator::~VMAMemoryAllocator()
+{
+  deinit();
+}
+
+inline bool VMAMemoryAllocator::init(VkDevice device, VkPhysicalDevice physicalDevice, VmaAllocator vma)
+{
+  m_device         = device;
+  m_physicalDevice = physicalDevice;
+  m_vma            = vma;
+  return true;
+}
+
+inline void VMAMemoryAllocator::deinit()
+{
+  m_vma = 0;
+}
+
+inline MemHandle VMAMemoryAllocator::allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult)
+{
+  VmaAllocationCreateInfo vmaAllocInfo = {};
+  vmaAllocInfo.usage                   = vkToVmaMemoryUsage(allocInfo.getMemoryProperties());
+  if(allocInfo.getDedicatedBuffer() || allocInfo.getDedicatedImage())
+  {
+    vmaAllocInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
+  }
+  vmaAllocInfo.priority = allocInfo.getPriority();
+
+  // Not supported by VMA
+  assert(!allocInfo.getExportable());
+  assert(!allocInfo.getDeviceMask());
+
+  VmaAllocationInfo allocationDetail;
+  VmaAllocation     allocation = nullptr;
+
+  VkResult result = vmaAllocateMemory(m_vma, &allocInfo.getMemoryRequirements(), &vmaAllocInfo, &allocation, &allocationDetail);
+
+#ifndef NDEBUG
+  // !! VMA leaks finder!!
+  // Call findLeak with the value showing in the leak report.
+  // Add : #define VMA_DEBUG_LOG(format, ...) do { printf(format, __VA_ARGS__); printf("\n"); } while(false)
+  //  - in the app where VMA_IMPLEMENTATION is defined, to have a leak report
+  static uint64_t counter{0};
+  if(counter == m_leakID)
+  {
+    bool stop_here = true;
+#if defined(_MSVC_LANG)
+    __debugbreak();
+#elif defined(LINUX)
+    raise(SIGTRAP);
+#endif
+  }
+  if (result == VK_SUCCESS)
+  {
+    std::string allocID = std::to_string(counter++);
+    vmaSetAllocationName(m_vma, allocation, allocID.c_str());
+  }
+#endif  // !NDEBUG
+
+  NVVK_CHECK(result);
+  if(pResult)
+  {
+    *pResult = result;
+  }
+  if(result == VK_SUCCESS)
+  {
+    return new VMAMemoryHandle(allocation);
+  }
+  else
+  {
+    return NullMemHandle;
+  }
+}
+
+inline void VMAMemoryAllocator::freeMemory(MemHandle memHandle)
+{
+  if(!memHandle)
+    return;
+
+  auto vmaHandle = castVMAMemoryHandle(memHandle);
+  vmaFreeMemory(m_vma, vmaHandle->getAllocation());
+}
+
+inline MemAllocator::MemInfo VMAMemoryAllocator::getMemoryInfo(MemHandle memHandle) const
+{
+  auto vmaHandle = castVMAMemoryHandle(memHandle);
+
+  VmaAllocationInfo allocInfo;
+  vmaGetAllocationInfo(m_vma, vmaHandle->getAllocation(), &allocInfo);
+
+  MemInfo memInfo;
+  memInfo.memory = allocInfo.deviceMemory;
+  memInfo.offset = allocInfo.offset;
+  memInfo.size   = allocInfo.size;
+
+  return memInfo;
+}
+
+inline void* VMAMemoryAllocator::map(MemHandle memHandle, VkDeviceSize offset, VkDeviceSize size, VkResult* pResult)
+{
+  auto vmaHandle = castVMAMemoryHandle(memHandle);
+
+  void*    ptr;
+  VkResult result = vmaMapMemory(m_vma, vmaHandle->getAllocation(), &ptr);
+  NVVK_CHECK(result);
+  if(pResult)
+  {
+    *pResult = result;
+  }
+
+  return ptr;
+}
+
+inline void VMAMemoryAllocator::unmap(MemHandle memHandle)
+{
+  auto vmaHandle = castVMAMemoryHandle(memHandle);
+
+  vmaUnmapMemory(m_vma, vmaHandle->getAllocation());
+}
+
+
+inline VkDevice VMAMemoryAllocator::getDevice() const
+{
+  return m_device;
+}
+
+inline VkPhysicalDevice VMAMemoryAllocator::getPhysicalDevice() const
+{
+  return m_physicalDevice;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+inline ResourceAllocatorVma::ResourceAllocatorVma(VkInstance instance, VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize)
+{
+  init(instance, device, physicalDevice);
+}
+
+inline ResourceAllocatorVma::~ResourceAllocatorVma()
+{
+  deinit();
+}
+
+inline void ResourceAllocatorVma::init(VkInstance instance, VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize)
+{
+  VmaAllocatorCreateInfo allocatorInfo = {};
+  allocatorInfo.physicalDevice         = physicalDevice;
+  allocatorInfo.device                 = device;
+  allocatorInfo.instance               = instance;
+  allocatorInfo.flags                  = VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT;
+  vmaCreateAllocator(&allocatorInfo, &m_vma);
+
+  m_memAlloc.reset(new VMAMemoryAllocator(device, physicalDevice, m_vma));
+  ResourceAllocator::init(device, physicalDevice, m_memAlloc.get(), stagingBlockSize);
+}
+
+inline void ResourceAllocatorVma::deinit()
+{
+  ResourceAllocator::deinit();
+
+  m_memAlloc.reset();
+  vmaDestroyAllocator(m_vma);
+  m_vma = nullptr;
+}
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/memorymanagement_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/memorymanagement_vk.cpp
@ -0,0 +1,887 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include <algorithm>
+#include <string>
+
+#include "debug_util_vk.hpp"
+#include "error_vk.hpp"
+#include "memorymanagement_vk.hpp"
+#include "nvh/nvprint.hpp"
+
+namespace nvvk {
+bool getMemoryInfo(const VkPhysicalDeviceMemoryProperties& memoryProperties,
+                   const VkMemoryRequirements&             memReqs,
+                   VkMemoryPropertyFlags                   properties,
+                   VkMemoryAllocateInfo&                   memInfo,
+                   bool                                    preferDevice)
+{
+  memInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+  memInfo.pNext = nullptr;
+
+  if(!memReqs.size)
+  {
+    memInfo.allocationSize  = 0;
+    memInfo.memoryTypeIndex = ~0;
+    return true;
+  }
+
+  // Find an available memory type that satisfies the requested properties.
+  for(uint32_t memoryTypeIndex = 0; memoryTypeIndex < memoryProperties.memoryTypeCount; ++memoryTypeIndex)
+  {
+    if((memReqs.memoryTypeBits & (1 << memoryTypeIndex))
+       // either there is a propertyFlags that also includes the combinations
+       && ((properties && (memoryProperties.memoryTypes[memoryTypeIndex].propertyFlags & properties) == properties)
+           // or it directly matches the properties (zero case)
+           || (memoryProperties.memoryTypes[memoryTypeIndex].propertyFlags == properties)))
+    {
+      memInfo.allocationSize  = memReqs.size;
+      memInfo.memoryTypeIndex = memoryTypeIndex;
+      return true;
+    }
+  }
+
+  // special case zero flag logic
+  if(properties == 0)
+  {
+    // prefer something with host visible
+    return getMemoryInfo(memoryProperties, memReqs,
+                         preferDevice ? VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, memInfo);
+  }
+
+  return false;
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+class DMAMemoryAllocator;
+
+class DMAMemoryHandle : public nvvk::MemHandleBase
+{
+public:
+  DMAMemoryHandle()                       = default;
+  DMAMemoryHandle(const DMAMemoryHandle&) = default;
+  DMAMemoryHandle(DMAMemoryHandle&&)      = default;
+
+  DMAMemoryHandle& operator=(const DMAMemoryHandle&) = default;
+  DMAMemoryHandle& operator=(DMAMemoryHandle&&)      = default;
+
+  const AllocationID& getAllocationID() const { return m_allocation; };
+
+private:
+  friend class nvvk::DeviceMemoryAllocator;
+  DMAMemoryHandle(const AllocationID& allocation)
+      : m_allocation(allocation)
+  {
+  }
+
+  AllocationID m_allocation;
+};
+
+DMAMemoryHandle* castDMAMemoryHandle(MemHandle memHandle)
+{
+  if(!memHandle)
+    return nullptr;
+#ifndef NDEBUG
+  auto dmaMemHandle = static_cast<DMAMemoryHandle*>(memHandle);
+#else
+  auto dmaMemHandle = dynamic_cast<DMAMemoryHandle*>(memHandle);
+  assert(dmaMemHandle);
+#endif
+
+  return dmaMemHandle;
+}
+
+MemHandle DeviceMemoryAllocator::allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult)
+{
+  BakedAllocateInfo bakedInfo;
+  fillBakedAllocateInfo(getMemoryProperties(), allocInfo, bakedInfo);
+  State state = m_defaultState;
+  state.allocateDeviceMask |= bakedInfo.flagsInfo.deviceMask;
+  state.allocateFlags |= bakedInfo.flagsInfo.flags;
+  state.priority = allocInfo.getPriority();
+
+  VkResult result;
+  bool     isDedicatedAllocation = allocInfo.getDedicatedBuffer() || allocInfo.getDedicatedImage();
+
+  auto dmaHandle = allocInternal(allocInfo.getMemoryRequirements(), allocInfo.getMemoryProperties(),
+                                 !allocInfo.getTilingOptimal() /*isLinear*/,
+                                 isDedicatedAllocation ? &bakedInfo.dedicatedInfo : nullptr, result, true, state);
+
+  if(pResult)
+  {
+    *pResult = result;
+  }
+
+  if(dmaHandle)
+  {
+    DMAMemoryHandle* dmaMemHandle = new DMAMemoryHandle(dmaHandle);
+
+    // Cannot do this, it would override the DeviceMemoryManager's chosen block buffer name
+    //   if(!allocInfo.getDebugName().empty())
+    //   {
+    //     const MemInfo& memInfo = getMemoryInfo(dmaMemHandle);
+    //     nvvk::DebugUtil(m_dma.getDevice()).setObjectName(memInfo.memory, allocInfo.getDebugName());
+    //   }
+
+    return dmaMemHandle;
+  }
+  else
+  {
+    return NullMemHandle;
+  }
+}
+
+void DeviceMemoryAllocator::freeMemory(MemHandle memHandle)
+{
+  if(!memHandle)
+    return;
+
+  auto dmaHandle = castDMAMemoryHandle(memHandle);
+  assert(dmaHandle);
+
+  free(dmaHandle->getAllocationID());
+
+  delete dmaHandle;
+
+  return;
+}
+
+MemAllocator::MemInfo DeviceMemoryAllocator::getMemoryInfo(MemHandle memHandle) const
+{
+  MemInfo info;
+
+  auto dmaHandle = castDMAMemoryHandle(memHandle);
+  assert(dmaHandle);
+
+  auto& allocInfo = getAllocation(dmaHandle->getAllocationID());
+  info.memory     = allocInfo.mem;
+  info.offset     = allocInfo.offset;
+  info.size       = allocInfo.size;
+
+  return info;
+};
+
+nvvk::AllocationID DeviceMemoryAllocator::getAllocationID(MemHandle memHandle) const
+{
+  auto dmaHandle = castDMAMemoryHandle(memHandle);
+  assert(dmaHandle);
+
+  return dmaHandle->getAllocationID();
+}
+
+
+void* DeviceMemoryAllocator::map(MemHandle memHandle, VkDeviceSize offset, VkDeviceSize size, VkResult* pResult)
+{
+  auto dmaHandle = castDMAMemoryHandle(memHandle);
+  assert(dmaHandle);
+
+  void* ptr = map(dmaHandle->getAllocationID(), pResult);
+  return ptr;
+}
+
+void DeviceMemoryAllocator::unmap(MemHandle memHandle)
+{
+  auto dmaHandle = castDMAMemoryHandle(memHandle);
+  assert(dmaHandle);
+
+  unmap(dmaHandle->getAllocationID());
+}
+
+const VkMemoryDedicatedAllocateInfo* DeviceMemoryAllocator::DEDICATED_PROXY =
+    (const VkMemoryDedicatedAllocateInfo*)&DeviceMemoryAllocator::DEDICATED_PROXY;
+
+int DeviceMemoryAllocator::s_allocDebugBias = 0;
+
+//#define DEBUG_ALLOCID   8
+
+nvvk::AllocationID DeviceMemoryAllocator::createID(Allocation& allocation, BlockID block, uint32_t blockOffset, uint32_t blockSize)
+{
+  // find free slot
+  if(m_freeAllocationIndex != INVALID_ID_INDEX)
+  {
+    uint32_t index = m_freeAllocationIndex;
+
+    m_freeAllocationIndex            = m_allocations[index].id.instantiate((uint32_t)index);
+    m_allocations[index].allocation  = allocation;
+    m_allocations[index].block       = block;
+    m_allocations[index].blockOffset = blockOffset;
+    m_allocations[index].blockSize   = blockSize;
+#if DEBUG_ALLOCID
+    // debug some specific id, useful to track allocation leaks
+    if(index == DEBUG_ALLOCID)
+    {
+      int breakHere = 0;
+      breakHere     = breakHere;
+    }
+#endif
+    return m_allocations[index].id;
+  }
+
+  // otherwise push to end
+  AllocationInfo info;
+  info.allocation = allocation;
+  info.id.instantiate((uint32_t)m_allocations.size());
+  info.block       = block;
+  info.blockOffset = blockOffset;
+  info.blockSize   = blockSize;
+
+  m_allocations.push_back(info);
+
+#if DEBUG_ALLOCID
+  // debug some specific id, useful to track allocation leaks
+  if(info.id.index == DEBUG_ALLOCID)
+  {
+    int breakHere = 0;
+    breakHere     = breakHere;
+  }
+#endif
+
+  return info.id;
+}
+
+void DeviceMemoryAllocator::destroyID(AllocationID id)
+{
+  assert(m_allocations[id.index].id.isEqual(id));
+
+#if DEBUG_ALLOCID
+  // debug some specific id, useful to track allocation leaks
+  if(id.index == DEBUG_ALLOCID)
+  {
+    int breakHere = 0;
+    breakHere     = breakHere;
+  }
+#endif
+
+  // setup for free list
+  m_allocations[id.index].id.instantiate(m_freeAllocationIndex);
+  m_freeAllocationIndex = id.index;
+}
+
+const float DeviceMemoryAllocator::DEFAULT_PRIORITY = 0.5f;
+
+void DeviceMemoryAllocator::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize blockSize, VkDeviceSize maxSize)
+{
+  assert(!m_device);
+  m_device         = device;
+  m_physicalDevice = physicalDevice;
+  // always default to NVVK_DEFAULT_MEMORY_BLOCKSIZE
+  m_blockSize = blockSize ? blockSize : NVVK_DEFAULT_MEMORY_BLOCKSIZE;
+
+  vkGetPhysicalDeviceMemoryProperties(physicalDevice, &m_memoryProperties);
+
+  // Retrieving the max allocation size, can be lowered with maxSize
+  VkPhysicalDeviceProperties2            prop2{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2};
+  VkPhysicalDeviceMaintenance3Properties vkProp{VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES};
+  prop2.pNext = &vkProp;
+  vkGetPhysicalDeviceProperties2(physicalDevice, &prop2);
+  m_maxAllocationSize = maxSize > 0 ? std::min(maxSize, vkProp.maxMemoryAllocationSize) : vkProp.maxMemoryAllocationSize;
+
+
+  assert(m_blocks.empty());
+  assert(m_allocations.empty());
+}
+
+void DeviceMemoryAllocator::freeAll()
+{
+  for(const auto& it : m_blocks)
+  {
+    if(!it.mem)
+      continue;
+
+    if(it.mapped)
+    {
+      vkUnmapMemory(m_device, it.mem);
+    }
+    vkFreeMemory(m_device, it.mem, nullptr);
+  }
+
+  m_allocations.clear();
+  m_blocks.clear();
+  resizeBlocks(0);
+
+  m_freeBlockIndex      = INVALID_ID_INDEX;
+  m_freeAllocationIndex = INVALID_ID_INDEX;
+}
+
+void DeviceMemoryAllocator::deinit()
+{
+  if(!m_device)
+    return;
+
+  for(const auto& it : m_blocks)
+  {
+    if(it.mapped)
+    {
+      assert("not all blocks were unmapped properly");
+      if(it.mem)
+      {
+        vkUnmapMemory(m_device, it.mem);
+      }
+    }
+    if(it.mem)
+    {
+      if(it.isFirst && m_keepFirst)
+      {
+        vkFreeMemory(m_device, it.mem, nullptr);
+      }
+      else
+      {
+        assert("not all blocks were freed properly");
+      }
+    }
+  }
+
+  for(size_t i = 0; i < m_allocations.size(); i++)
+  {
+    if(m_allocations[i].id.index == (uint32_t)i)
+    {
+      assert(0 && i && "AllocationID not freed");
+
+      // set DEBUG_ALLOCID define further up to trace this id
+    }
+  }
+
+  m_allocations.clear();
+  m_blocks.clear();
+  resizeBlocks(0);
+
+  m_freeBlockIndex      = INVALID_ID_INDEX;
+  m_freeAllocationIndex = INVALID_ID_INDEX;
+  m_device              = VK_NULL_HANDLE;
+}
+
+VkDeviceSize DeviceMemoryAllocator::getMaxAllocationSize() const
+{
+  return m_maxAllocationSize;
+}
+
+float DeviceMemoryAllocator::getUtilization(VkDeviceSize& allocatedSize, VkDeviceSize& usedSize) const
+{
+  allocatedSize = m_allocatedSize;
+  usedSize      = m_usedSize;
+
+  return float(double(usedSize) / double(allocatedSize));
+}
+
+void DeviceMemoryAllocator::nvprintReport() const
+{
+  VkDeviceSize used[VK_MAX_MEMORY_HEAPS]      = {0};
+  VkDeviceSize allocated[VK_MAX_MEMORY_HEAPS] = {0};
+  uint32_t     active[VK_MAX_MEMORY_HEAPS]    = {0};
+  uint32_t     dedicated[VK_MAX_MEMORY_HEAPS] = {0};
+  uint32_t     linear[VK_MAX_MEMORY_HEAPS]    = {0};
+
+  uint32_t dedicatedSum = 0;
+  uint32_t linearSum    = 0;
+  for(const auto& block : m_blocks)
+  {
+    if(block.mem)
+    {
+      uint32_t heapIndex = m_memoryProperties.memoryTypes[block.memoryTypeIndex].heapIndex;
+      used[heapIndex] += block.usedSize;
+      allocated[heapIndex] += block.allocationSize;
+
+      active[heapIndex]++;
+      linear[heapIndex] += block.isLinear ? 1 : 0;
+      dedicated[heapIndex] += block.isDedicated ? 1 : 0;
+
+      linearSum += block.isLinear ? 1 : 0;
+      dedicatedSum += block.isDedicated ? 1 : 0;
+    }
+  }
+
+  LOGI("nvvk::DeviceMemoryAllocator %p\n", this);
+  {
+    LOGI("  count : dedicated, linear,  all (device-local)\n");
+  }
+  for(uint32_t i = 0; i < m_memoryProperties.memoryHeapCount; i++)
+  {
+    LOGI("  heap%d : %9d, %6d, %4d (%d)\n", i, dedicated[i], linear[i], active[i],
+         (m_memoryProperties.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) ? 1 : 0);
+  }
+
+  {
+    LOGI("  total : %9d, %6d, %4d\n", dedicatedSum, linearSum, m_activeBlockCount);
+    LOGI("  size  :      used / allocated / available KB (device-local)\n");
+  }
+  for(uint32_t i = 0; i < m_memoryProperties.memoryHeapCount; i++)
+  {
+    LOGI("  heap%d : %9d / %9d / %9d (%d)\n", i, uint32_t((used[i] + 1023) / 1024),
+         uint32_t((allocated[i] + 1023) / 1024), uint32_t((m_memoryProperties.memoryHeaps[i].size + 1023) / 1024),
+         (m_memoryProperties.memoryHeaps[i].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) ? 1 : 0);
+  }
+  {
+    LOGI("  total : %9d / %9d KB (%d percent)\n\n", uint32_t((m_usedSize + 1023) / 1024),
+         uint32_t((m_allocatedSize + 1023) / 1024), uint32_t(double(m_usedSize) * 100.0 / double(m_allocatedSize)));
+  }
+}
+
+void DeviceMemoryAllocator::getTypeStats(uint32_t     count[VK_MAX_MEMORY_TYPES],
+                                         VkDeviceSize used[VK_MAX_MEMORY_TYPES],
+                                         VkDeviceSize allocated[VK_MAX_MEMORY_TYPES]) const
+{
+  memset(used, 0, sizeof(used[0]) * VK_MAX_MEMORY_TYPES);
+  memset(allocated, 0, sizeof(allocated[0]) * VK_MAX_MEMORY_TYPES);
+
+  for(const auto& block : m_blocks)
+  {
+    if(block.mem)
+    {
+      count[block.memoryTypeIndex]++;
+      used[block.memoryTypeIndex] += block.usedSize;
+      allocated[block.memoryTypeIndex] += block.allocationSize;
+    }
+  }
+}
+
+VkDevice DeviceMemoryAllocator::getDevice() const
+{
+  return m_device;
+}
+
+VkPhysicalDevice DeviceMemoryAllocator::getPhysicalDevice() const
+{
+  return m_physicalDevice;
+}
+
+const nvvk::Allocation& DeviceMemoryAllocator::getAllocation(AllocationID id) const
+{
+  assert(m_allocations[id.index].id.isEqual(id));
+  return m_allocations[id.index].allocation;
+}
+
+const VkPhysicalDeviceMemoryProperties& DeviceMemoryAllocator::getMemoryProperties() const
+{
+  return m_memoryProperties;
+}
+
+AllocationID DeviceMemoryAllocator::allocInternal(const VkMemoryRequirements&          memReqs,
+                                                  VkMemoryPropertyFlags                memProps,
+                                                  bool                                 isLinear,
+                                                  const VkMemoryDedicatedAllocateInfo* dedicated,
+                                                  VkResult&                            result,
+                                                  bool                                 preferDevice,
+                                                  const State&                         state)
+{
+  VkMemoryAllocateInfo memInfo;
+
+  result = VK_SUCCESS;
+
+  // Fill out allocation info structure
+  if(memReqs.size > m_maxAllocationSize || !nvvk::getMemoryInfo(m_memoryProperties, memReqs, memProps, memInfo, preferDevice))
+  {
+    result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+    return AllocationID();
+  }
+
+  float priority = m_supportsPriority ? state.priority : DEFAULT_PRIORITY;
+  bool  isFirst  = !dedicated;
+  bool  mappable = (memProps & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0;
+
+  if(!dedicated)
+  {
+    // First try to find an existing memory block that we can use
+
+    for(uint32_t i = 0; i < (uint32_t)m_blocks.size(); i++)
+    {
+      Block& block = m_blocks[i];
+
+      // Ignore invalid or blocks with the wrong memory type
+      if(!block.mem || block.memoryTypeIndex != memInfo.memoryTypeIndex || isLinear != block.isLinear
+         || block.priority != priority || block.allocateFlags != state.allocateFlags
+         || block.allocateDeviceMask != state.allocateDeviceMask || (!block.mappable && mappable))
+      {
+        continue;
+      }
+
+      // if there is a compatible block, we are not "first" of a kind
+      isFirst = false;
+
+      uint32_t blockSize;
+      uint32_t blockOffset;
+      uint32_t offset;
+
+
+      // Look for a block which has enough free space available
+
+      if(block.range.subAllocate((uint32_t)memReqs.size, (uint32_t)memReqs.alignment, blockOffset, offset, blockSize))
+      {
+        block.allocationCount++;
+        block.usedSize += blockSize;
+
+        Allocation allocation;
+        allocation.mem    = block.mem;
+        allocation.offset = offset;
+        allocation.size   = memReqs.size;
+
+        m_usedSize += blockSize;
+
+        return createID(allocation, block.id, blockOffset, blockSize);
+      }
+    }
+  }
+
+  // find available blockID or create new one
+  BlockID id;
+  if(m_freeBlockIndex != INVALID_ID_INDEX)
+  {
+    Block& block     = m_blocks[m_freeBlockIndex];
+    m_freeBlockIndex = block.id.instantiate(m_freeBlockIndex);
+    id               = block.id;
+  }
+  else
+  {
+    uint32_t newIndex = (uint32_t)m_blocks.size();
+    m_blocks.resize(m_blocks.size() + 1);
+    resizeBlocks(newIndex + 1);
+    Block& block = m_blocks[newIndex];
+    block.id.instantiate(newIndex);
+    id = block.id;
+  }
+
+  Block& block = m_blocks[id.index];
+
+  // enforce custom block under certain conditions
+  if(dedicated == DEDICATED_PROXY || memReqs.size > ((m_blockSize * 2) / 3))
+  {
+    block.allocationSize = memReqs.size;
+  }
+  else if(dedicated)
+  {
+    block.allocationSize = memReqs.size;
+    memInfo.pNext        = dedicated;
+  }
+  else
+  {
+    block.allocationSize = std::max(m_blockSize, memReqs.size);
+  }
+
+  VkMemoryPriorityAllocateInfoEXT memPriority = {VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT};
+  if(priority != DEFAULT_PRIORITY)
+  {
+    memPriority.pNext    = memInfo.pNext;
+    memPriority.priority = priority;
+    memInfo.pNext        = &memPriority;
+  }
+
+  VkMemoryAllocateFlagsInfo memFlags = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO};
+  if(state.allocateFlags)
+  {
+    memFlags.pNext      = memInfo.pNext;
+    memFlags.deviceMask = state.allocateDeviceMask;
+    memFlags.flags      = state.allocateFlags;
+    memInfo.pNext       = &memFlags;
+  }
+
+  block.allocationSize  = block.range.alignedSize((uint32_t)block.allocationSize);
+  block.priority        = priority;
+  block.memoryTypeIndex = memInfo.memoryTypeIndex;
+  block.range.init((uint32_t)block.allocationSize);
+  block.isLinear           = isLinear;
+  block.isFirst            = isFirst;
+  block.isDedicated        = dedicated != nullptr;
+  block.allocateFlags      = state.allocateFlags;
+  block.allocateDeviceMask = state.allocateDeviceMask;
+
+  // set allocationSize from aligned block.allocationSize
+  memInfo.allocationSize = block.allocationSize;
+
+  result = allocBlockMemory(id, memInfo, block.mem);
+
+  if(result == VK_SUCCESS)
+  {
+    nvvk::DebugUtil(m_device).setObjectName(block.mem, m_debugName);
+
+    m_allocatedSize += block.allocationSize;
+
+    uint32_t offset;
+    uint32_t blockSize;
+    uint32_t blockOffset;
+
+    block.range.subAllocate((uint32_t)memReqs.size, (uint32_t)memReqs.alignment, blockOffset, offset, blockSize);
+
+    block.allocationCount = 1;
+    block.usedSize        = blockSize;
+    block.mapCount        = 0;
+    block.mapped          = nullptr;
+    block.mappable        = mappable;
+
+    Allocation allocation;
+    allocation.mem    = block.mem;
+    allocation.offset = offset;
+    allocation.size   = memReqs.size;
+
+    m_usedSize += blockSize;
+
+    m_activeBlockCount++;
+
+    return createID(allocation, id, blockOffset, blockSize);
+  }
+  else
+  {
+    // make block free
+    m_freeBlockIndex = block.id.instantiate(m_freeBlockIndex);
+
+    if(result == VK_ERROR_OUT_OF_DEVICE_MEMORY
+       && ((memProps == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) || (memProps == 0 && preferDevice)))
+    {
+      // downgrade memory property to zero and/or not preferDevice
+      LOGW("downgrade memory\n");
+      return allocInternal(memReqs, 0, isLinear, dedicated, result, !preferDevice, state);
+    }
+    else
+    {
+      LOGE("could not allocate memory: VkResult %d\n", result);
+      return AllocationID();
+    }
+  }
+}
+
+void DeviceMemoryAllocator::free(AllocationID allocationID)
+{
+  const AllocationInfo& info  = getInfo(allocationID);
+  Block&                block = getBlock(info.block);
+
+  destroyID(allocationID);
+
+  m_usedSize -= info.blockSize;
+  block.range.subFree(info.blockOffset, info.blockSize);
+  block.allocationCount--;
+  block.usedSize -= info.blockSize;
+
+  if(block.allocationCount == 0 && !(block.isFirst && m_keepFirst))
+  {
+    assert(block.usedSize == 0);
+    assert(!block.mapped);
+    freeBlockMemory(info.block, block.mem);
+    block.mem     = VK_NULL_HANDLE;
+    block.isFirst = false;
+
+    m_allocatedSize -= block.allocationSize;
+    block.range.deinit();
+
+    m_freeBlockIndex = block.id.instantiate(m_freeBlockIndex);
+    m_activeBlockCount--;
+  }
+}
+
+void* DeviceMemoryAllocator::map(AllocationID allocationID, VkResult* pResult)
+{
+  const AllocationInfo& info  = getInfo(allocationID);
+  Block&                block = getBlock(info.block);
+
+  assert(block.mappable);
+  block.mapCount++;
+
+  if(!block.mapped)
+  {
+    VkResult result = vkMapMemory(m_device, block.mem, 0, block.allocationSize, 0, (void**)&block.mapped);
+    if(pResult)
+    {
+      *pResult = result;
+    }
+  }
+  return block.mapped + info.allocation.offset;
+}
+
+void DeviceMemoryAllocator::unmap(AllocationID allocationID)
+{
+  const AllocationInfo& info  = getInfo(allocationID);
+  Block&                block = getBlock(info.block);
+
+  assert(block.mapped);
+
+  if(--block.mapCount == 0)
+  {
+    block.mapped = nullptr;
+    vkUnmapMemory(m_device, block.mem);
+  }
+}
+
+VkImage DeviceMemoryAllocator::createImage(const VkImageCreateInfo& createInfo,
+                                           AllocationID&            allocationID,
+                                           VkMemoryPropertyFlags    memProps,
+                                           VkResult&                result)
+{
+  VkImage image;
+
+  assert(createInfo.extent.width && createInfo.extent.height && createInfo.extent.depth);
+
+  result = createImageInternal(m_device, &createInfo, &image);
+  if(result != VK_SUCCESS)
+    return VK_NULL_HANDLE;
+
+  VkMemoryRequirements2          memReqs       = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+  VkMemoryDedicatedRequirements  dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
+  VkImageMemoryRequirementsInfo2 imageReqs     = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2};
+
+  imageReqs.image = image;
+  memReqs.pNext   = &dedicatedRegs;
+  vkGetImageMemoryRequirements2(m_device, &imageReqs, &memReqs);
+
+  VkBool32 useDedicated = m_forceDedicatedAllocation || dedicatedRegs.prefersDedicatedAllocation;
+
+  VkMemoryDedicatedAllocateInfo dedicatedInfo = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
+  dedicatedInfo.image                         = image;
+
+  allocationID          = alloc(memReqs.memoryRequirements, memProps, createInfo.tiling == VK_IMAGE_TILING_LINEAR,
+                       useDedicated ? &dedicatedInfo : nullptr);
+  Allocation allocation = allocationID.isValid() ? getAllocation(allocationID) : Allocation();
+
+  if(allocation.mem == VK_NULL_HANDLE)
+  {
+    vkDestroyImage(m_device, image, nullptr);
+    result = VK_ERROR_OUT_OF_POOL_MEMORY;
+    return VK_NULL_HANDLE;
+  }
+
+  VkBindImageMemoryInfo bindInfos = {VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO};
+  bindInfos.image                 = image;
+  bindInfos.memory                = allocation.mem;
+  bindInfos.memoryOffset          = allocation.offset;
+
+  result = vkBindImageMemory2(m_device, 1, &bindInfos);
+  if(result != VK_SUCCESS)
+  {
+    vkDestroyImage(m_device, image, nullptr);
+    return VK_NULL_HANDLE;
+  }
+
+  return image;
+}
+VkBuffer DeviceMemoryAllocator::createBuffer(const VkBufferCreateInfo& createInfo,
+                                             AllocationID&             allocationID,
+                                             VkMemoryPropertyFlags     memProps,
+                                             VkResult&                 result)
+{
+  VkBuffer buffer;
+
+  assert(createInfo.size);
+
+  result = createBufferInternal(m_device, &createInfo, &buffer);
+  if(result != VK_SUCCESS)
+  {
+    return VK_NULL_HANDLE;
+  }
+
+  VkMemoryRequirements2           memReqs       = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+  VkMemoryDedicatedRequirements   dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
+  VkBufferMemoryRequirementsInfo2 bufferReqs    = {VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2};
+
+  bufferReqs.buffer = buffer;
+  memReqs.pNext     = &dedicatedRegs;
+  vkGetBufferMemoryRequirements2(m_device, &bufferReqs, &memReqs);
+
+  // for buffers don't use "preferred", but only requires
+  VkBool32 useDedicated = m_forceDedicatedAllocation || dedicatedRegs.requiresDedicatedAllocation;
+
+  VkMemoryDedicatedAllocateInfo dedicatedInfo = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO};
+  dedicatedInfo.buffer                        = buffer;
+
+  allocationID          = alloc(memReqs.memoryRequirements, memProps, true, useDedicated ? &dedicatedInfo : nullptr);
+  Allocation allocation = allocationID.isValid() ? getAllocation(allocationID) : Allocation();
+
+  if(allocation.mem == VK_NULL_HANDLE)
+  {
+    vkDestroyBuffer(m_device, buffer, nullptr);
+    result = VK_ERROR_OUT_OF_POOL_MEMORY;
+    return VK_NULL_HANDLE;
+  }
+
+  VkBindBufferMemoryInfo bindInfos = {VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO};
+  bindInfos.buffer                 = buffer;
+  bindInfos.memory                 = allocation.mem;
+  bindInfos.memoryOffset           = allocation.offset;
+
+  result = vkBindBufferMemory2(m_device, 1, &bindInfos);
+  if(result != VK_SUCCESS)
+  {
+    vkDestroyBuffer(m_device, buffer, nullptr);
+    return VK_NULL_HANDLE;
+  }
+
+  return buffer;
+}
+
+VkBuffer DeviceMemoryAllocator::createBuffer(VkDeviceSize          size,
+                                             VkBufferUsageFlags    usage,
+                                             AllocationID&         allocationID,
+                                             VkMemoryPropertyFlags memProps,
+                                             VkResult&             result)
+{
+  VkBufferCreateInfo createInfo = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
+  createInfo.usage              = usage | m_defaultBufferUsageFlags | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+  createInfo.size               = size;
+
+  return createBuffer(createInfo, allocationID, memProps, result);
+}
+
+#if VK_NV_ray_tracing
+VkAccelerationStructureNV DeviceMemoryAllocator::createAccStructure(const VkAccelerationStructureCreateInfoNV& createInfo,
+                                                                    AllocationID&         allocationID,
+                                                                    VkMemoryPropertyFlags memProps,
+                                                                    VkResult&             result)
+{
+  VkAccelerationStructureNV accel;
+  result = vkCreateAccelerationStructureNV(m_device, &createInfo, nullptr, &accel);
+  if(result != VK_SUCCESS)
+  {
+    return VK_NULL_HANDLE;
+  }
+
+  VkMemoryRequirements2 memReqs = {VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+  VkAccelerationStructureMemoryRequirementsInfoNV memInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV};
+  memInfo.accelerationStructure = accel;
+  vkGetAccelerationStructureMemoryRequirementsNV(m_device, &memInfo, &memReqs);
+
+  allocationID = alloc(memReqs.memoryRequirements, memProps, true, m_forceDedicatedAllocation ? DEDICATED_PROXY : nullptr);
+
+  Allocation allocation = allocationID.isValid() ? getAllocation(allocationID) : Allocation();
+
+  if(allocation.mem == VK_NULL_HANDLE)
+  {
+    vkDestroyAccelerationStructureNV(m_device, accel, nullptr);
+    result = VK_ERROR_OUT_OF_POOL_MEMORY;
+    return VK_NULL_HANDLE;
+  }
+
+  VkBindAccelerationStructureMemoryInfoNV bind = {VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV};
+  bind.accelerationStructure                   = accel;
+  bind.memory                                  = allocation.mem;
+  bind.memoryOffset                            = allocation.offset;
+
+  assert(allocation.offset % memReqs.memoryRequirements.alignment == 0);
+
+  result = vkBindAccelerationStructureMemoryNV(m_device, 1, &bind);
+  if(result != VK_SUCCESS)
+  {
+    vkDestroyAccelerationStructureNV(m_device, accel, nullptr);
+    free(allocationID);
+    allocationID = AllocationID();
+    return VK_NULL_HANDLE;
+  }
+
+  return accel;
+}
+#endif
+
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/memorymanagement_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/memorymanagement_vk.hpp
@ -0,0 +1,549 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#pragma once
+
+#include <cassert>
+#include <vector>
+#include <string>
+
+#include <nvvk/memallocator_vk.hpp>
+#include <nvh/trangeallocator.hpp>
+
+#include <vulkan/vulkan_core.h>
+
+namespace nvvk {
+
+#define NVVK_DEFAULT_MEMORY_BLOCKSIZE (VkDeviceSize(128) * 1024 * 1024)
+
+
+//////////////////////////////////////////////////////////////////////////
+/** @DOC_START
+  This framework assumes that memory heaps exists that support:
+
+  - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
+    for uploading data to the device
+  - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & VK_MEMORY_PROPERTY_HOST_CACHED_BIT
+    for downloading data from the device
+
+  This is typical on all major desktop platforms and vendors.
+  See http://vulkan.gpuinfo.org for information of various devices and platforms.
+
+  # functions in nvvk
+
+  * getMemoryInfo : fills the VkMemoryAllocateInfo based on device's memory properties and memory requirements and property flags. Returns `true` on success.
+@DOC_END */
+
+// returns true on success
+bool getMemoryInfo(const VkPhysicalDeviceMemoryProperties& memoryProperties,
+                   const VkMemoryRequirements&             memReqs,
+                   VkMemoryPropertyFlags                   properties,
+                   VkMemoryAllocateInfo&                   memInfo,
+                   bool preferDevice = true);  // special case if zero properties are unsupported, otherwise use host
+
+//////////////////////////////////////////////////////////////////////////
+
+static const uint32_t INVALID_ID_INDEX = ~0;
+
+struct Allocation
+{
+  VkDeviceMemory mem    = VK_NULL_HANDLE;
+  VkDeviceSize   offset = 0;
+  VkDeviceSize   size   = 0;
+};
+
+class AllocationID
+{
+  friend class DeviceMemoryAllocator;
+
+private:
+  uint32_t index      = INVALID_ID_INDEX;
+  uint32_t generation = 0;
+
+  void     invalidate() { index = INVALID_ID_INDEX; }
+  uint32_t instantiate(uint32_t newIndex)
+  {
+    uint32_t oldIndex = index;
+    index             = newIndex;
+    generation++;
+
+    return oldIndex;
+  }
+
+public:
+  bool isValid() const { return index != INVALID_ID_INDEX; }
+  bool isEqual(const AllocationID& other) const { return index == other.index && generation == other.generation; }
+
+  operator bool() const { return isValid(); }
+
+  friend bool operator==(const AllocationID& lhs, const AllocationID& rhs) { return rhs.isEqual(lhs); }
+};
+
+
+//////////////////////////////////////////////////////////////////////////
+/** @DOC_START
+  # class nvvk::DeviceMemoryAllocator
+
+  The nvvk::DeviceMemoryAllocator allocates and manages device memory in fixed-size memory blocks.
+  It implements the nvvk::MemAllocator interface.
+
+  It sub-allocates from the blocks, and can re-use memory if it finds empty
+  regions. Because of the fixed-block usage, you can directly create resources
+  and don't need a phase to compute the allocation sizes first.
+
+  It will create compatible chunks according to the memory requirements and
+  usage flags. Therefore you can easily create mappable host allocations
+  and delete them after usage, without inferring device-side allocations.
+
+  An `AllocationID` is returned rather than the allocation details directly, which
+  one can query separately.
+
+  Several utility functions are provided to handle the binding of memory
+  directly with the resource creation of buffers, images and acceleration
+  structures. These utilities also make implicit use of Vulkan's dedicated
+  allocation mechanism.
+
+  We recommend the use of the nvvk::ResourceAllocator class, 
+  rather than the various create functions provided here, as we may deprecate them.
+
+  > **WARNING** : The memory manager serves as proof of concept for some key concepts
+  > however it is not meant for production use and it currently lacks de-fragmentation logic
+  > as well. You may want to look at [VMA](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator)
+  > for a more production-focused solution.
+
+  You can derive from this class and overload a few functions to alter the
+  chunk allocation behavior.
+
+  Example :
+  ```cpp
+  nvvk::DeviceMemoryAllocator memAllocator;
+
+  memAllocator.init(device, physicalDevice);
+
+  // low-level
+  aid = memAllocator.alloc(memRequirements,...);
+  ...
+  memAllocator.free(aid);
+
+  // utility wrapper
+  buffer = memAllocator.createBuffer(bufferSize, bufferUsage, bufferAid);
+  ...
+  memAllocator.free(bufferAid);
+
+
+  // It is also possible to not track individual resources
+  // and free everything in one go. However, this is
+  // not recommended for general purpose use.
+
+  bufferA = memAllocator.createBuffer(sizeA, usageA);
+  bufferB = memAllocator.createBuffer(sizeB, usageB);
+  ...
+  memAllocator.freeAll();
+
+  ```
+@DOC_END */
+class DeviceMemoryAllocator : public MemAllocator
+{
+
+public:
+  static const float DEFAULT_PRIORITY;
+
+  DeviceMemoryAllocator(DeviceMemoryAllocator const&)            = delete;
+  DeviceMemoryAllocator& operator=(DeviceMemoryAllocator const&) = delete;
+
+
+  virtual ~DeviceMemoryAllocator()
+  {
+#ifndef NDEBUG
+    // If all memory was released properly, no blocks should be alive at this point
+    assert(m_blocks.empty() || m_keepFirst);
+#endif
+    deinit();
+  }
+
+
+  // system related
+
+  DeviceMemoryAllocator() { m_debugName = "nvvk::DeviceMemoryAllocator:" + std::to_string((uint64_t)this); }
+  DeviceMemoryAllocator(VkDevice         device,
+                        VkPhysicalDevice physicalDevice,
+                        VkDeviceSize     blockSize = NVVK_DEFAULT_MEMORY_BLOCKSIZE,
+                        VkDeviceSize     maxSize   = 0)
+  {
+    init(device, physicalDevice, blockSize, maxSize);
+  }
+
+  void init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize blockSize = NVVK_DEFAULT_MEMORY_BLOCKSIZE, VkDeviceSize maxSize = 0);
+
+  void setDebugName(const std::string& name) { m_debugName = name; }
+
+  // requires VK_EXT_memory_priority, default is false
+  void setPrioritySupported(bool state) { m_supportsPriority = state; }
+
+  // frees all blocks independent of individual allocations
+  // use only if you know the lifetime of all resources from this allocator.
+  void freeAll();
+
+  // asserts on all resources being freed properly
+  void deinit();
+
+  // get utilization of block allocations
+  float getUtilization(VkDeviceSize& allocatedSize, VkDeviceSize& usedSize) const;
+  // get total amount of active blocks / VkDeviceMemory allocations
+  uint32_t getActiveBlockCount() const { return m_activeBlockCount; }
+
+  // dump detailed stats via nvprintfLevel(LOGLEVEL_INFO
+  void nvprintReport() const;
+
+  void getTypeStats(uint32_t     count[VK_MAX_MEMORY_TYPES],
+                    VkDeviceSize used[VK_MAX_MEMORY_TYPES],
+                    VkDeviceSize allocated[VK_MAX_MEMORY_TYPES]) const;
+
+  const VkPhysicalDeviceMemoryProperties& getMemoryProperties() const;
+  VkDeviceSize                            getMaxAllocationSize() const;
+
+  //////////////////////////////////////////////////////////////////////////
+
+  // Implement MemAllocator interface
+  virtual MemHandle allocMemory(const MemAllocateInfo& allocInfo, VkResult* pResult = nullptr) override;
+  virtual void      freeMemory(MemHandle memHandle) override;
+  virtual MemInfo   getMemoryInfo(MemHandle memHandle) const override;
+  virtual void* map(MemHandle memHandle, VkDeviceSize offset = 0, VkDeviceSize size = VK_WHOLE_SIZE, VkResult* pResult = nullptr) override;
+  virtual void unmap(MemHandle memHandle) override;
+
+  virtual VkDevice         getDevice() const override;
+  virtual VkPhysicalDevice getPhysicalDevice() const override;
+
+  AllocationID getAllocationID(MemHandle memHandle) const;
+
+  //////////////////////////////////////////////////////////////////////////
+
+  struct State
+  {
+    float                 priority           = DEFAULT_PRIORITY;
+    VkMemoryAllocateFlags allocateFlags      = 0;
+    uint32_t              allocateDeviceMask = 0;
+  };
+
+  // subsequent allocations (and creates) will use the provided priority
+  // ignored if setPrioritySupported is not enabled
+  float setPriority(float priority = DEFAULT_PRIORITY)
+  {
+    float old               = m_defaultState.priority;
+    m_defaultState.priority = priority;
+    return old;
+  }
+
+  float getPriority() const { return m_defaultState.priority; }
+
+  // subsequent allocations (and creates) will use the provided flags
+  void setAllocateFlags(VkMemoryAllocateFlags flags, bool enabled)
+  {
+    if(enabled)
+    {
+      m_defaultState.allocateFlags |= flags;
+    }
+    else
+    {
+      m_defaultState.allocateFlags &= ~flags;
+    }
+  }
+
+  void setAllocateDeviceMask(uint32_t allocateDeviceMask, bool enabled)
+  {
+    if(enabled)
+    {
+      m_defaultState.allocateDeviceMask |= allocateDeviceMask;
+    }
+    else
+    {
+      m_defaultState.allocateDeviceMask &= ~allocateDeviceMask;
+    }
+  }
+
+  VkMemoryAllocateFlags getAllocateFlags() const { return m_defaultState.allocateFlags; }
+  uint32_t              getAllocateDeviceMask() const { return m_defaultState.allocateDeviceMask; }
+
+  // make individual raw allocations.
+  // there is also utilities that combine creation of buffers/images etc. with binding
+  // the memory below.
+  AllocationID alloc(const VkMemoryRequirements& memReqs,
+                     VkMemoryPropertyFlags       memProps,
+                     bool                        isLinear,  // buffers are linear, optimal tiling textures are not
+                     const VkMemoryDedicatedAllocateInfo* dedicated,
+                     VkResult&                            result)
+  {
+    return allocInternal(memReqs, memProps, isLinear, dedicated, result, true, m_defaultState);
+  }
+
+  // make individual raw allocations.
+  // there is also utilities that combine creation of buffers/images etc. with binding
+  // the memory below.
+  AllocationID alloc(const VkMemoryRequirements& memReqs,
+                     VkMemoryPropertyFlags       memProps,
+                     bool                        isLinear,  // buffers are linear, optimal tiling textures are not
+                     const VkMemoryDedicatedAllocateInfo* dedicated,
+                     State&                               state,
+                     VkResult&                            result)
+  {
+    return allocInternal(memReqs, memProps, isLinear, dedicated, result, true, state);
+  }
+
+  AllocationID alloc(const VkMemoryRequirements& memReqs,
+                     VkMemoryPropertyFlags       memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+                     bool isLinear = true,  // buffers are linear, optimal tiling textures are not
+                     const VkMemoryDedicatedAllocateInfo* dedicated = nullptr)
+  {
+    VkResult result;
+    return allocInternal(memReqs, memProps, isLinear, dedicated, result, true, m_defaultState);
+  }
+
+  // unless you use the freeAll mechanism, each allocation must be freed individually
+  void free(AllocationID allocationID);
+
+  // returns the detailed information from an allocationID
+  const Allocation& getAllocation(AllocationID id) const;
+
+  // can have multiple map/unmaps at once, but must be paired
+  // internally will keep the vk mapping active as long as one map is active
+  void* map(AllocationID allocationID, VkResult* pResult = nullptr);
+  void  unmap(AllocationID allocationID);
+
+  template <class T>
+  T* mapT(AllocationID allocationID, VkResult* pResult = nullptr)
+  {
+    return (T*)map(allocationID, pResult);
+  }
+
+  //////////////////////////////////////////////////////////////////////////
+
+  // utility functions to create resources and bind their memory directly
+
+  // subsequent creates will use dedicated allocations (mostly for debugging purposes)
+  inline void setForceDedicatedAllocation(bool state) { m_forceDedicatedAllocation = state; }
+  // subsequent createBuffers will also use these flags
+  inline void setDefaultBufferUsageFlags(VkBufferUsageFlags usage) { m_defaultBufferUsageFlags = usage; }
+
+  VkImage createImage(const VkImageCreateInfo& createInfo, AllocationID& allocationID, VkMemoryPropertyFlags memProps, VkResult& result);
+  VkImage createImage(const VkImageCreateInfo& createInfo, AllocationID& allocationID, VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
+  {
+    VkResult result;
+    return createImage(createInfo, allocationID, memProps, result);
+  }
+  VkImage createImage(const VkImageCreateInfo& createInfo, VkMemoryPropertyFlags memProps, VkResult& result)
+  {
+    AllocationID id;
+    return createImage(createInfo, id, memProps, result);
+  }
+  VkImage createImage(const VkImageCreateInfo& createInfo, VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
+  {
+    AllocationID id;
+    return createImage(createInfo, id, memProps);
+  }
+
+
+  VkBuffer createBuffer(const VkBufferCreateInfo& createInfo, AllocationID& allocationID, VkMemoryPropertyFlags memProps, VkResult& result);
+  VkBuffer createBuffer(const VkBufferCreateInfo& createInfo, AllocationID& allocationID, VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
+  {
+    VkResult result;
+    return createBuffer(createInfo, allocationID, memProps, result);
+  }
+  VkBuffer createBuffer(const VkBufferCreateInfo& createInfo, VkMemoryPropertyFlags memProps, VkResult& result)
+  {
+    AllocationID id;
+    return createBuffer(createInfo, id, memProps, result);
+  }
+  VkBuffer createBuffer(const VkBufferCreateInfo& createInfo, VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
+  {
+    AllocationID id;
+    return createBuffer(createInfo, id, memProps);
+  }
+
+  VkBuffer createBuffer(VkDeviceSize size,
+                        VkBufferUsageFlags usage,  // combined with m_defaultBufferUsageFlags and VK_BUFFER_USAGE_TRANSFER_DST_BIT
+                        AllocationID&         allocationID,
+                        VkMemoryPropertyFlags memProps,
+                        VkResult&             result);
+  VkBuffer createBuffer(VkDeviceSize size,
+                        VkBufferUsageFlags usage,  // combined with m_defaultBufferUsageFlags and VK_BUFFER_USAGE_TRANSFER_DST_BIT
+                        AllocationID&         allocationID,
+                        VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
+  {
+    VkResult result;
+    return createBuffer(size, usage, allocationID, memProps, result);
+  }
+
+#if VK_NV_ray_tracing
+  VkAccelerationStructureNV createAccStructure(const VkAccelerationStructureCreateInfoNV& createInfo,
+                                               AllocationID&                              allocationID,
+                                               VkMemoryPropertyFlags                      memProps,
+                                               VkResult&                                  result);
+  VkAccelerationStructureNV createAccStructure(const VkAccelerationStructureCreateInfoNV& createInfo,
+                                               AllocationID&                              allocationID,
+                                               VkMemoryPropertyFlags memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
+  {
+    VkResult result;
+    return createAccStructure(createInfo, allocationID, memProps, result);
+  }
+#endif
+
+
+protected:
+  static const VkMemoryDedicatedAllocateInfo* DEDICATED_PROXY;
+  static int                                  s_allocDebugBias;
+
+  struct BlockID
+  {
+    uint32_t index      = INVALID_ID_INDEX;
+    uint32_t generation = 0;
+
+    bool     isEqual(const BlockID& other) const { return index == other.index && generation == other.generation; }
+    uint32_t instantiate(uint32_t newIndex)
+    {
+      uint32_t oldIndex = index;
+      index             = newIndex;
+      generation++;
+
+      return oldIndex;
+    }
+
+    friend bool operator==(const BlockID& lhs, const BlockID& rhs) { return rhs.isEqual(lhs); }
+  };
+
+  struct Block
+  {
+    BlockID                   id{};  // index to self, or next free item
+    VkDeviceMemory            mem = VK_NULL_HANDLE;
+    nvh::TRangeAllocator<256> range;
+
+    VkDeviceSize allocationSize = 0;
+    VkDeviceSize usedSize       = 0;
+
+    // to avoid management of pages via limits::bufferImageGranularity,
+    // a memory block is either fully linear, or non-linear
+    bool                  isLinear    = false;
+    bool                  isDedicated = false;
+    bool                  isFirst     = false;  // first memory block of a type
+    float                 priority    = 0.0f;
+    VkMemoryAllocateFlags allocateFlags{};
+    uint32_t              allocateDeviceMask = 0;
+
+    uint32_t memoryTypeIndex = 0;
+    uint32_t allocationCount = 0;
+    uint32_t mapCount        = 0;
+    uint32_t mappable        = 0;
+    uint8_t* mapped          = nullptr;
+
+    Block& operator=(Block&&) = default;
+    Block(Block&&)            = default;
+    Block(const Block&)       = default;
+    Block()                   = default;
+  };
+
+  struct AllocationInfo
+  {
+    AllocationID id{};  // index to self, or next free item
+    Allocation   allocation{};
+    uint32_t     blockOffset = 0;
+    uint32_t     blockSize   = 0;
+    BlockID      block{};
+  };
+
+  VkDevice     m_device            = VK_NULL_HANDLE;
+  VkDeviceSize m_blockSize         = 0;
+  VkDeviceSize m_allocatedSize     = 0;
+  VkDeviceSize m_usedSize          = 0;
+  VkDeviceSize m_maxAllocationSize = 0;
+
+  std::vector<Block>          m_blocks;
+  std::vector<AllocationInfo> m_allocations;
+
+  // linked-list to next free allocation
+  uint32_t m_freeAllocationIndex = INVALID_ID_INDEX;
+  // linked-list to next free block
+  uint32_t m_freeBlockIndex   = INVALID_ID_INDEX;
+  uint32_t m_activeBlockCount = 0;
+
+  VkPhysicalDeviceMemoryProperties m_memoryProperties;
+  VkPhysicalDevice                 m_physicalDevice = NULL;
+
+  State m_defaultState;
+
+  VkBufferUsageFlags m_defaultBufferUsageFlags  = 0;
+  bool               m_forceDedicatedAllocation = false;
+  bool               m_supportsPriority         = false;
+  // heuristic that doesn't immediately free the first memory block of a specific memorytype
+  bool m_keepFirst = true;
+
+  std::string m_debugName;
+
+  AllocationID allocInternal(const VkMemoryRequirements& memReqs,
+                             VkMemoryPropertyFlags       memProps,
+                             bool isLinear,  // buffers are linear, optimal tiling textures are not
+                             const VkMemoryDedicatedAllocateInfo* dedicated,
+                             VkResult&                            result,
+                             bool                                 preferDevice,
+                             const State&                         state);
+
+  AllocationID createID(Allocation& allocation, BlockID block, uint32_t blockOffset, uint32_t blockSize);
+  void         destroyID(AllocationID id);
+
+  const AllocationInfo& getInfo(AllocationID id) const
+  {
+    assert(m_allocations[id.index].id.isEqual(id));
+
+    return m_allocations[id.index];
+  }
+
+  Block& getBlock(BlockID id)
+  {
+    Block& block = m_blocks[id.index];
+    assert(block.id.isEqual(id));
+    return block;
+  }
+
+  //////////////////////////////////////////////////////////////////////////
+  // For derived memory allocators you can do special purpose operations via overloading these functions.
+  // A typical use-case would be export/import the memory to another API.
+
+  virtual VkResult allocBlockMemory(BlockID id, VkMemoryAllocateInfo& memInfo, VkDeviceMemory& deviceMemory)
+  {
+    //s_allocDebugBias++;
+    return vkAllocateMemory(m_device, &memInfo, nullptr, &deviceMemory);
+  }
+  virtual void freeBlockMemory(BlockID id, VkDeviceMemory deviceMemory)
+  {
+    //s_allocDebugBias--;
+    vkFreeMemory(m_device, deviceMemory, nullptr);
+  }
+  virtual void resizeBlocks(uint32_t count) {}
+
+  virtual VkResult createBufferInternal(VkDevice device, const VkBufferCreateInfo* info, VkBuffer* buffer)
+  {
+    return vkCreateBuffer(device, info, nullptr, buffer);
+  }
+
+  virtual VkResult createImageInternal(VkDevice device, const VkImageCreateInfo* info, VkImage* image)
+  {
+    return vkCreateImage(device, info, nullptr, image);
+  }
+};
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/memorymanagement_vkgl.cpp
+++ b/raytracer/nvpro_core/nvvk/memorymanagement_vkgl.cpp
@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#if NVP_SUPPORTS_OPENGL
+
+#include <vulkan/vulkan.h>
+
+#include "memorymanagement_vkgl.hpp"
+
+#ifdef LINUX
+#include <unistd.h>
+#endif
+
+namespace nvvk {
+
+
+//////////////////////////////////////////////////////////////////////////
+
+VkExternalMemoryHandleTypeFlags DeviceMemoryAllocatorGL::getExternalMemoryHandleTypeFlags()
+{
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+  return VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
+#else
+  return VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
+#endif
+}
+
+VkResult DeviceMemoryAllocatorGL::allocBlockMemory(BlockID id, VkMemoryAllocateInfo& memInfo, VkDeviceMemory& deviceMemory)
+{
+  BlockGL& blockGL = m_blockGLs[id.index];
+
+  bool               isDedicated = false;
+  const StructChain* extChain    = (const StructChain*)memInfo.pNext;
+  while(extChain)
+  {
+    if(extChain->sType == VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO)
+    {
+      isDedicated = true;
+      break;
+    }
+    extChain = extChain->pNext;
+  }
+
+  // prepare memory allocation for export
+  VkExportMemoryAllocateInfo exportInfo = {VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO};
+  exportInfo.handleTypes                = getExternalMemoryHandleTypeFlags();
+
+  exportInfo.pNext = memInfo.pNext;
+  memInfo.pNext    = &exportInfo;
+
+
+  VkResult result = vkAllocateMemory(m_device, &memInfo, nullptr, &deviceMemory);
+  if(result != VK_SUCCESS)
+  {
+    return result;
+  }
+  // get OS-handle (warning must not forget close)
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+  VkMemoryGetWin32HandleInfoKHR memGetHandle = {VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR};
+  memGetHandle.memory                        = deviceMemory;
+  memGetHandle.handleType                    = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
+  result                                     = vkGetMemoryWin32HandleKHR(m_device, &memGetHandle, &blockGL.handle);
+#else
+  VkMemoryGetFdInfoKHR memGetHandle = {VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR};
+  memGetHandle.memory               = deviceMemory;
+  memGetHandle.handleType           = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
+  result                            = vkGetMemoryFdKHR(m_device, &memGetHandle, &blockGL.handle);
+#endif
+  if(result != VK_SUCCESS)
+  {
+    return result;
+  }
+  // import into GL
+  GLint param = isDedicated ? GL_TRUE : GL_FALSE;
+  glCreateMemoryObjectsEXT(1, &blockGL.memoryObject);
+  glMemoryObjectParameterivEXT(blockGL.memoryObject, GL_DEDICATED_MEMORY_OBJECT_EXT, &param);
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+  glImportMemoryWin32HandleEXT(blockGL.memoryObject, memInfo.allocationSize, GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, blockGL.handle);
+#else
+  glImportMemoryFdEXT(blockGL.memoryObject, memInfo.allocationSize, GL_HANDLE_TYPE_OPAQUE_FD_EXT, blockGL.handle);
+  // the Fd got consumed
+  blockGL.handle = -1;
+#endif
+
+  return result;
+}
+
+void DeviceMemoryAllocatorGL::freeBlockMemory(BlockID id, VkDeviceMemory deviceMemory)
+{
+  BlockGL& blockGL = m_blockGLs[id.index];
+  // free vulkan memory
+  vkFreeMemory(m_device, deviceMemory, nullptr);
+
+  glDeleteMemoryObjectsEXT(1, &blockGL.memoryObject);
+  blockGL.memoryObject = 0;
+
+  // don't forget the OS-handle it is ref-counted and can leak memory!
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+  CloseHandle(blockGL.handle);
+  blockGL.handle = NULL;
+#else
+  if(blockGL.handle != -1)
+  {
+    close(blockGL.handle);
+    blockGL.handle = -1;
+  }
+#endif
+}
+
+void DeviceMemoryAllocatorGL::resizeBlocks(uint32_t count)
+{
+  if(count == 0)
+  {
+    m_blockGLs.clear();
+  }
+  else
+  {
+    m_blockGLs.resize(count);
+  }
+}
+
+VkResult DeviceMemoryAllocatorGL::createBufferInternal(VkDevice device, const VkBufferCreateInfo* info, VkBuffer* buffer)
+{
+  VkBufferCreateInfo               infoNew  = *info;
+  VkExternalMemoryBufferCreateInfo external = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO};
+  external.handleTypes                      = getExternalMemoryHandleTypeFlags();
+  external.pNext                            = infoNew.pNext;
+  infoNew.pNext                             = &external;
+  return vkCreateBuffer(device, &infoNew, nullptr, buffer);
+}
+
+VkResult DeviceMemoryAllocatorGL::createImageInternal(VkDevice device, const VkImageCreateInfo* info, VkImage* image)
+{
+  VkImageCreateInfo               infoNew  = *info;
+  VkExternalMemoryImageCreateInfo external = {VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO};
+  external.handleTypes                     = getExternalMemoryHandleTypeFlags();
+  external.pNext                           = infoNew.pNext;
+  infoNew.pNext                            = &external;
+  return vkCreateImage(device, &infoNew, nullptr, image);
+}
+
+}  // namespace nvvk
+
+
+#endif
--- a/raytracer/nvpro_core/nvvk/memorymanagement_vkgl.hpp
+++ b/raytracer/nvpro_core/nvvk/memorymanagement_vkgl.hpp
@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#if NVP_SUPPORTS_OPENGL
+
+/** @DOC_START
+  This file contains helpers for resource interoperability between OpenGL and Vulkan.
+  they only exist if the nvpro_core project is compiled with Vulkan AND OpenGL support.
+@DOC_END */
+
+
+#pragma once
+
+#include <nvgl/extensions_gl.hpp>
+#include <nvvk/images_vk.hpp>
+#include <nvvk/memorymanagement_vk.hpp>
+#include <vulkan/vulkan_core.h>
+
+namespace nvvk {
+
+struct AllocationGL
+{
+  GLuint   memoryObject = 0;
+  GLuint64 offset       = 0;
+  GLuint64 size         = 0;
+};
+
+//////////////////////////////////////////////////////////////////////////
+
+/** @DOC_START
+  # class nvvk::DeviceMemoryAllocatorGL
+
+  nvvk::DeviceMemoryAllocatorGL is derived from nvvk::DeviceMemoryAllocator it uses vulkan memory that is exported
+  and directly imported into OpenGL. Requires GL_EXT_memory_object.
+
+  Used just like the original class however a new function to get the 
+  GL memory object exists: `getAllocationGL`.
+
+  Look at source of nvvk::AllocatorDmaGL for usage.
+@DOC_END */
+
+
+class DeviceMemoryAllocatorGL : public DeviceMemoryAllocator
+{
+public:
+  DeviceMemoryAllocatorGL() {}
+  DeviceMemoryAllocatorGL(VkDevice         device,
+                          VkPhysicalDevice physicalDevice,
+                          VkDeviceSize     blockSize = NVVK_DEFAULT_MEMORY_BLOCKSIZE,
+                          VkDeviceSize     maxSize   = 0)
+      : DeviceMemoryAllocator(device, physicalDevice, blockSize, maxSize)
+  {
+  }
+
+
+  AllocationGL getAllocationGL(AllocationID aid) const
+  {
+    AllocationGL          alloc;
+    const AllocationInfo& info = getInfo(aid);
+    alloc.memoryObject         = m_blockGLs[info.block.index].memoryObject;
+    alloc.offset               = info.allocation.offset;
+    alloc.size                 = info.allocation.size;
+    return alloc;
+  }
+
+  static VkExternalMemoryHandleTypeFlags getExternalMemoryHandleTypeFlags();
+
+protected:
+  struct BlockGL
+  {
+#ifdef WIN32
+    void* handle = nullptr;
+#else
+    int handle = -1;
+#endif
+    GLuint memoryObject = 0;
+  };
+
+  std::vector<BlockGL> m_blockGLs;
+
+  struct StructChain
+  {
+    VkStructureType    sType;
+    const StructChain* pNext;
+  };
+
+  VkResult allocBlockMemory(BlockID id, VkMemoryAllocateInfo& memInfo, VkDeviceMemory& deviceMemory) override;
+  void     freeBlockMemory(BlockID id, VkDeviceMemory deviceMemory) override;
+  void     resizeBlocks(uint32_t count) override;
+
+  VkResult createBufferInternal(VkDevice device, const VkBufferCreateInfo* info, VkBuffer* buffer) override;
+  VkResult createImageInternal(VkDevice device, const VkImageCreateInfo* info, VkImage* image) override;
+};
+}  // namespace nvvk
+#endif
--- a/raytracer/nvpro_core/nvvk/nsight_aftermath_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/nsight_aftermath_vk.cpp
@ -0,0 +1,691 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include "nsight_aftermath_vk.hpp"
+
+#if defined(NVVK_SUPPORTS_AFTERMATH) && defined(NVP_SUPPORTS_VULKANSDK)
+#include <vulkan/vulkan.h>  // needed so GFSDK_Aftermath_SpirvCode gets declared
+
+#include "nvh/nvprint.hpp"
+#include "nvp/perproject_globals.hpp"
+#include "nvp/nvpsystem.hpp"
+
+#include "GFSDK_Aftermath.h"
+#include "GFSDK_Aftermath_GpuCrashDump.h"
+#include "GFSDK_Aftermath_GpuCrashDumpDecoding.h"
+
+#include <array>
+#include <cassert>
+#include <filesystem>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <vector>
+//--------------------------------------------------------------------------------------------------
+// Some std::to_string overloads for some Nsight Aftermath API types.
+//
+namespace std {
+template <typename T>
+inline std::string to_hex_string(T n)
+{
+  std::stringstream stream;
+  stream << std::setfill('0') << std::setw(2 * sizeof(T)) << std::hex << n;
+  return stream.str();
+}
+
+inline std::string to_string(GFSDK_Aftermath_Result result)
+{
+  return std::string("0x") + to_hex_string(static_cast<uint32_t>(result));
+}
+
+inline std::string to_string(const GFSDK_Aftermath_ShaderDebugInfoIdentifier& identifier)
+{
+  return to_hex_string(identifier.id[0]) + "-" + to_hex_string(identifier.id[1]);
+}
+
+inline std::string to_string(const GFSDK_Aftermath_ShaderBinaryHash& hash)
+{
+  return to_hex_string(hash.hash);
+}
+}  // namespace std
+
+//*********************************************************
+// Helper for comparing shader hashes and debug info identifier.
+//
+
+// Helper for comparing GFSDK_Aftermath_ShaderDebugInfoIdentifier.
+inline bool operator<(const GFSDK_Aftermath_ShaderDebugInfoIdentifier& lhs, const GFSDK_Aftermath_ShaderDebugInfoIdentifier& rhs)
+{
+  if(lhs.id[0] == rhs.id[0])
+  {
+    return lhs.id[1] < rhs.id[1];
+  }
+  return lhs.id[0] < rhs.id[0];
+}
+
+// Helper for comparing GFSDK_Aftermath_ShaderBinaryHash.
+inline bool operator<(const GFSDK_Aftermath_ShaderBinaryHash& lhs, const GFSDK_Aftermath_ShaderBinaryHash& rhs)
+{
+  return lhs.hash < rhs.hash;
+}
+
+// Helper for comparing GFSDK_Aftermath_ShaderDebugName.
+inline bool operator<(const GFSDK_Aftermath_ShaderDebugName& lhs, const GFSDK_Aftermath_ShaderDebugName& rhs)
+{
+  return strncmp(lhs.name, rhs.name, sizeof(lhs.name)) < 0;
+}
+
+//*********************************************************
+// Helper for checking Nsight Aftermath failures.
+//
+
+inline std::string AftermathErrorMessage(GFSDK_Aftermath_Result result)
+{
+  switch(result)
+  {
+    case GFSDK_Aftermath_Result_FAIL_DriverVersionNotSupported:
+      return "Unsupported driver version - requires an NVIDIA R495 display driver or newer.";
+    default:
+      return "Aftermath Error 0x" + std::to_hex_string(result);
+  }
+}
+
+
+// Helper macro for checking Nsight Aftermath results and throwing exception
+// in case of a failure.
+#ifdef _WIN32
+
+#define WIN32_LEAN_AND_MEAN
+#define VC_EXTRALEAN
+#include <windows.h>
+
+#define AFTERMATH_CHECK_ERROR(FC)                                                                                      \
+  [&]() {                                                                                                              \
+    GFSDK_Aftermath_Result _result = FC;                                                                               \
+    if(!GFSDK_Aftermath_SUCCEED(_result))                                                                              \
+    {                                                                                                                  \
+      MessageBoxA(0, AftermathErrorMessage(_result).c_str(), "Aftermath Error", MB_OK);                                \
+      exit(1);                                                                                                         \
+    }                                                                                                                  \
+  }()
+#else
+#define AFTERMATH_CHECK_ERROR(FC)                                                                                      \
+  [&]() {                                                                                                              \
+    GFSDK_Aftermath_Result _result = FC;                                                                               \
+    if(!GFSDK_Aftermath_SUCCEED(_result))                                                                              \
+    {                                                                                                                  \
+      printf("%s\n", AftermathErrorMessage(_result).c_str());                                                          \
+      fflush(stdout);                                                                                                  \
+      exit(1);                                                                                                         \
+    }                                                                                                                  \
+  }()
+#endif
+
+namespace nvvk {
+
+//*********************************************************
+// Implements GPU crash dump tracking using the Nsight
+// Aftermath API.
+//
+class GpuCrashTrackerImpl
+{
+public:
+  // keep four frames worth of marker history
+  const static unsigned int                                                 c_markerFrameHistory = 4;
+  typedef std::array<std::map<uint64_t, std::string>, c_markerFrameHistory> MarkerMap;
+
+  GpuCrashTrackerImpl(const MarkerMap& markerMap);
+  ~GpuCrashTrackerImpl();
+
+  // Initialize the GPU crash dump tracker.
+  void initialize();
+
+  // Track a shader compiled with -g
+  void addShaderBinary(std::vector<uint32_t>& data);
+
+  // Track an optimized shader with additional debug information
+  void addShaderBinaryWithDebugInfo(std::vector<uint32_t>& data, std::vector<uint32_t>& strippedData);
+
+
+private:
+  //*********************************************************
+  // Callback handlers for GPU crash dumps and related data.
+  //
+
+  // Handler for GPU crash dump callbacks.
+  void onCrashDump(const void* pGpuCrashDump, const uint32_t gpuCrashDumpSize);
+
+  // Handler for shader debug information callbacks.
+  void onShaderDebugInfo(const void* pShaderDebugInfo, const uint32_t shaderDebugInfoSize);
+
+  // Handler for GPU crash dump description callbacks.
+  static void onDescription(PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription addDescription);
+
+  // Handler for app-managed marker resolve callback
+  void onResolveMarker(const void* pMarker, void** resolvedMarkerData, uint32_t* markerSize);
+
+  //*********************************************************
+  // Helpers for writing a GPU crash dump and debug information
+  // data to files.
+  //
+
+  // Helper for writing a GPU crash dump to a file.
+  void writeGpuCrashDumpToFile(const void* pGpuCrashDump, const uint32_t gpuCrashDumpSize);
+
+  // Helper for writing shader debug information to a file
+  static void writeShaderDebugInformationToFile(GFSDK_Aftermath_ShaderDebugInfoIdentifier identifier,
+                                                const void*                               pShaderDebugInfo,
+                                                const uint32_t                            shaderDebugInfoSize);
+
+  //*********************************************************
+  // Helpers for decoding GPU crash dump to JSON.
+  //
+
+  // Handler for shader debug info lookup callbacks.
+  void onShaderDebugInfoLookup(const GFSDK_Aftermath_ShaderDebugInfoIdentifier& identifier,
+                               PFN_GFSDK_Aftermath_SetData                      setShaderDebugInfo) const;
+
+  // Handler for shader lookup callbacks.
+  void onShaderLookup(const GFSDK_Aftermath_ShaderBinaryHash& shaderHash, PFN_GFSDK_Aftermath_SetData setShaderBinary) const;
+
+  // Handler for shader source debug info lookup callbacks.
+  void onShaderSourceDebugInfoLookup(const GFSDK_Aftermath_ShaderDebugName& shaderDebugName,
+                                     PFN_GFSDK_Aftermath_SetData            setShaderBinary) const;
+
+  //*********************************************************
+  // Static callback wrappers.
+  //
+
+  // GPU crash dump callback.
+  static void gpuCrashDumpCallback(const void* pGpuCrashDump, const uint32_t gpuCrashDumpSize, void* pUserData);
+
+  // Shader debug information callback.
+  static void shaderDebugInfoCallback(const void* pShaderDebugInfo, const uint32_t shaderDebugInfoSize, void* pUserData);
+
+  // GPU crash dump description callback.
+  static void crashDumpDescriptionCallback(PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription addDescription, void* pUserData);
+
+  // App-managed marker resolve callback
+  static void resolveMarkerCallback(const void* pMarker, const uint32_t markerDataSize, void* pUserData, void** resolvedMarkerData, uint32_t* markerSize);
+
+  // Shader debug information lookup callback.
+  static void shaderDebugInfoLookupCallback(const GFSDK_Aftermath_ShaderDebugInfoIdentifier* pIdentifier,
+                                            PFN_GFSDK_Aftermath_SetData                      setShaderDebugInfo,
+                                            void*                                            pUserData);
+
+  // Shader lookup callback.
+  static void shaderLookupCallback(const GFSDK_Aftermath_ShaderBinaryHash* pShaderHash,
+                                   PFN_GFSDK_Aftermath_SetData             setShaderBinary,
+                                   void*                                   pUserData);
+
+  // Shader source debug info lookup callback.
+  static void shaderSourceDebugInfoLookupCallback(const GFSDK_Aftermath_ShaderDebugName* pShaderDebugName,
+                                                  PFN_GFSDK_Aftermath_SetData            setShaderBinary,
+                                                  void*                                  pUserData);
+
+  //*********************************************************
+  // GPU crash tracker state.
+  //
+
+  // Is the GPU crash dump tracker initialized?
+  bool m_initialized;
+
+  // For thread-safe access of GPU crash tracker state.
+  mutable std::mutex m_mutex;
+
+  // List of Shader Debug Information by ShaderDebugInfoIdentifier.
+  std::map<GFSDK_Aftermath_ShaderDebugInfoIdentifier, std::vector<uint8_t>> m_shaderDebugInfo;
+
+  // App-managed marker tracking
+  const MarkerMap& m_markerMap;
+
+  //*********************************************************
+  // SAhder database .
+  //
+
+  // Find a shader bytecode binary by shader hash.
+  bool findShaderBinary(const GFSDK_Aftermath_ShaderBinaryHash& shaderHash, std::vector<uint32_t>& shader) const;
+
+  // Find a source shader debug info by shader debug name generated by the DXC compiler.
+  bool findShaderBinaryWithDebugData(const GFSDK_Aftermath_ShaderDebugName& shaderDebugName, std::vector<uint32_t>& shader) const;
+
+
+  // List of shader binaries by ShaderBinaryHash.
+  std::map<GFSDK_Aftermath_ShaderBinaryHash, std::vector<uint32_t>> m_shaderBinaries;
+
+  // List of available shader binaries with source debug information by ShaderDebugName.
+  std::map<GFSDK_Aftermath_ShaderDebugName, std::vector<uint32_t>> m_shaderBinariesWithDebugInfo;
+};
+
+
+//*********************************************************
+// GpuCrashTrackerImpl implementation
+//*********************************************************
+
+GpuCrashTrackerImpl::GpuCrashTrackerImpl(const MarkerMap& markerMap)
+    : m_initialized(false)
+    , m_markerMap(markerMap)
+{
+}
+
+GpuCrashTrackerImpl::~GpuCrashTrackerImpl()
+{
+  // If initialized, disable GPU crash dumps
+  if(m_initialized)
+  {
+    GFSDK_Aftermath_DisableGpuCrashDumps();
+  }
+}
+
+// Initialize the GPU Crash Dump Tracker
+void GpuCrashTrackerImpl::initialize()
+{
+  // Enable GPU crash dumps and set up the callbacks for crash dump notifications,
+  // shader debug information notifications, and providing additional crash
+  // dump description data.Only the crash dump callback is mandatory. The other two
+  // callbacks are optional and can be omitted, by passing nullptr, if the corresponding
+  // functionality is not used.
+  // The DeferDebugInfoCallbacks flag enables caching of shader debug information data
+  // in memory. If the flag is set, ShaderDebugInfoCallback will be called only
+  // in the event of a crash, right before GpuCrashDumpCallback. If the flag is not set,
+  // ShaderDebugInfoCallback will be called for every shader that is compiled.
+  AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_EnableGpuCrashDumps(
+      GFSDK_Aftermath_Version_API, GFSDK_Aftermath_GpuCrashDumpWatchedApiFlags_Vulkan,
+      GFSDK_Aftermath_GpuCrashDumpFeatureFlags_DeferDebugInfoCallbacks,  // Let the Nsight Aftermath library cache shader debug information.
+      gpuCrashDumpCallback,                                              // Register callback for GPU crash dumps.
+      shaderDebugInfoCallback,       // Register callback for shader debug information.
+      crashDumpDescriptionCallback,  // Register callback for GPU crash dump description.
+      resolveMarkerCallback,         // Register callback for resolving application-managed markers.
+      this));                        // Set the GpuCrashTrackerImpl object as user data for the above callbacks.
+
+  m_initialized = true;
+}
+
+// Handler for GPU crash dump callbacks from Nsight Aftermath
+void GpuCrashTrackerImpl::onCrashDump(const void* pGpuCrashDump, const uint32_t gpuCrashDumpSize)
+{
+  // Make sure only one thread at a time...
+  std::lock_guard<std::mutex> lock(m_mutex);
+
+  // Write to file for later in-depth analysis with Nsight Graphics.
+  writeGpuCrashDumpToFile(pGpuCrashDump, gpuCrashDumpSize);
+}
+
+// Handler for shader debug information callbacks
+void GpuCrashTrackerImpl::onShaderDebugInfo(const void* pShaderDebugInfo, const uint32_t shaderDebugInfoSize)
+{
+  // Make sure only one thread at a time...
+  std::lock_guard<std::mutex> lock(m_mutex);
+
+  // Get shader debug information identifier
+  GFSDK_Aftermath_ShaderDebugInfoIdentifier identifier = {};
+  AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GetShaderDebugInfoIdentifier(GFSDK_Aftermath_Version_API, pShaderDebugInfo,
+                                                                     shaderDebugInfoSize, &identifier));
+
+  // Store information for decoding of GPU crash dumps with shader address mapping
+  // from within the application.
+  std::vector<uint8_t> data((uint8_t*)pShaderDebugInfo, (uint8_t*)pShaderDebugInfo + shaderDebugInfoSize);
+  m_shaderDebugInfo[identifier].swap(data);
+
+  // Write to file for later in-depth analysis of crash dumps with Nsight Graphics
+  writeShaderDebugInformationToFile(identifier, pShaderDebugInfo, shaderDebugInfoSize);
+}
+
+// Handler for GPU crash dump description callbacks
+void GpuCrashTrackerImpl::onDescription(PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription addDescription)
+{
+  // Add some basic description about the crash. This is called after the GPU crash happens, but before
+  // the actual GPU crash dump callback. The provided data is included in the crash dump and can be
+  // retrieved using GFSDK_Aftermath_GpuCrashDump_GetDescription().
+  addDescription(GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationName, getProjectName().c_str());
+  addDescription(GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationVersion, "v1.0");
+}
+
+// Handler for app-managed marker resolve callback
+void GpuCrashTrackerImpl::onResolveMarker(const void* pMarker, void** resolvedMarkerData, uint32_t* markerSize)
+{
+  // Important: the pointer passed back via resolvedMarkerData must remain valid after this function returns
+  // using references for all of the m_markerMap accesses ensures that the pointers refer to the persistent data
+  for(const auto& map : m_markerMap)
+  {
+    const auto& found_marker = map.find((uint64_t)pMarker);
+    if(found_marker != map.end())
+    {
+      const std::string& marker_data = found_marker->second;
+      // std::string::data() will return a valid pointer until the string is next modified
+      // we don't modify the string after calling data() here, so the pointer should remain valid
+      *resolvedMarkerData = (void*)marker_data.data();
+      *markerSize         = static_cast<uint32_t>(marker_data.length());
+      return;
+    }
+  }
+}
+
+// Helper for writing a GPU crash dump to a file
+void GpuCrashTrackerImpl::writeGpuCrashDumpToFile(const void* pGpuCrashDump, const uint32_t gpuCrashDumpSize)
+{
+  // Create a GPU crash dump decoder object for the GPU crash dump.
+  GFSDK_Aftermath_GpuCrashDump_Decoder decoder = {};
+  AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GpuCrashDump_CreateDecoder(GFSDK_Aftermath_Version_API, pGpuCrashDump,
+                                                                   gpuCrashDumpSize, &decoder));
+
+  // Use the decoder object to read basic information, like application
+  // name, PID, etc. from the GPU crash dump.
+  GFSDK_Aftermath_GpuCrashDump_BaseInfo baseInfo = {};
+  AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GpuCrashDump_GetBaseInfo(decoder, &baseInfo));
+
+  // Use the decoder object to query the application name that was set
+  // in the GPU crash dump description.
+  uint32_t applicationNameLength = 0;
+  AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GpuCrashDump_GetDescriptionSize(decoder, GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationName,
+                                                                        &applicationNameLength));
+
+  std::vector<char> applicationName(applicationNameLength, '\0');
+
+  AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GpuCrashDump_GetDescription(decoder, GFSDK_Aftermath_GpuCrashDumpDescriptionKey_ApplicationName,
+                                                                    uint32_t(applicationName.size()), applicationName.data()));
+
+  // Create a unique file name for writing the crash dump data to a file.
+  // Note: due to an Nsight Aftermath bug (will be fixed in an upcoming
+  // driver release) we may see redundant crash dumps. As a workaround,
+  // attach a unique count to each generated file name.
+  static int        count = 0;
+  const std::string base_file_name =
+      std::string(applicationName.data()) + "-" + std::to_string(baseInfo.pid) + "-" + std::to_string(++count);
+
+  // Write the crash dump data to a file using the .nv-gpudmp extension
+  // registered with Nsight Graphics.
+  const std::string           crash_dump_file_name = base_file_name + ".nv-gpudmp";
+  const std::filesystem::path crash_dump_file_path(std::filesystem::absolute(crash_dump_file_name));
+  LOGE("\n--------------------------------------------------------------\n");
+  LOGE("Writing Aftermath dump file to:\n  %s", crash_dump_file_path.string().c_str());
+  LOGE("\n--------------------------------------------------------------\n");
+
+  std::ofstream dump_file(crash_dump_file_path, std::ios::out | std::ios::binary);
+  if(dump_file)
+  {
+    dump_file.write(static_cast<const char*>(pGpuCrashDump), gpuCrashDumpSize);
+    dump_file.close();
+  }
+
+  // Decode the crash dump to a JSON string.
+  // Step 1: Generate the JSON and get the size.
+  uint32_t jsonSize = 0;
+  AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GpuCrashDump_GenerateJSON(
+      decoder, GFSDK_Aftermath_GpuCrashDumpDecoderFlags_ALL_INFO, GFSDK_Aftermath_GpuCrashDumpFormatterFlags_NONE,
+      shaderDebugInfoLookupCallback, shaderLookupCallback, shaderSourceDebugInfoLookupCallback, this, &jsonSize));
+  // Step 2: Allocate a buffer and fetch the generated JSON.
+  std::vector<char> json(jsonSize);
+  AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GpuCrashDump_GetJSON(decoder, uint32_t(json.size()), json.data()));
+
+  // Write the crash dump data as JSON to a file.
+  const std::string           json_file_name = crash_dump_file_name + ".json";
+  const std::filesystem::path json_file_path(std::filesystem::absolute(json_file_name));
+  LOGE("\n--------------------------------------------------------------\n");
+  LOGE("Writing JSON dump file to:\n  %s", json_file_path.string().c_str());
+  LOGE("\n--------------------------------------------------------------\n");
+
+
+  std::ofstream json_file(json_file_path, std::ios::out | std::ios::binary);
+  if(json_file)
+  {
+    // Write the JSON to the file (excluding string termination)
+    json_file.write(json.data(), json.size() - 1);
+    json_file.close();
+  }
+
+  // Destroy the GPU crash dump decoder object.
+  AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GpuCrashDump_DestroyDecoder(decoder));
+}
+
+// Helper for writing shader debug information to a file
+void GpuCrashTrackerImpl::writeShaderDebugInformationToFile(GFSDK_Aftermath_ShaderDebugInfoIdentifier identifier,
+                                                            const void*                               pShaderDebugInfo,
+                                                            const uint32_t shaderDebugInfoSize)
+{
+  // Create a unique file name.
+  const std::string file_path = "shader-" + std::to_string(identifier) + ".nvdbg";
+
+  std::ofstream f(file_path, std::ios::out | std::ios::binary);
+  if(f)
+  {
+    f.write(static_cast<const char*>(pShaderDebugInfo), shaderDebugInfoSize);
+  }
+}
+
+// Handler for shader debug information lookup callbacks.
+// This is used by the JSON decoder for mapping shader instruction
+// addresses to SPIR-V IL lines or GLSL source lines.
+void GpuCrashTrackerImpl::onShaderDebugInfoLookup(const GFSDK_Aftermath_ShaderDebugInfoIdentifier& identifier,
+                                                  PFN_GFSDK_Aftermath_SetData setShaderDebugInfo) const
+{
+  // Search the list of shader debug information blobs received earlier.
+  auto i_debug_info = m_shaderDebugInfo.find(identifier);
+  if(i_debug_info == m_shaderDebugInfo.end())
+  {
+    // Early exit, nothing found. No need to call setShaderDebugInfo.
+    return;
+  }
+
+  // Let the GPU crash dump decoder know about the shader debug information
+  // that was found.
+  setShaderDebugInfo(i_debug_info->second.data(), static_cast<uint32_t>(i_debug_info->second.size()));
+}
+
+// Handler for shader lookup callbacks.
+// This is used by the JSON decoder for mapping shader instruction
+// addresses to SPIR-V IL lines or GLSL source lines.
+// NOTE: If the application loads stripped shader binaries (ie; --strip-all in spirv-remap),
+// Aftermath will require access to both the stripped and the not stripped
+// shader binaries.
+void GpuCrashTrackerImpl::onShaderLookup(const GFSDK_Aftermath_ShaderBinaryHash& shaderHash, PFN_GFSDK_Aftermath_SetData setShaderBinary) const
+{
+  // Find shader binary data for the shader hash in the shader database.
+  std::vector<uint32_t> shader_binary;
+  if(!findShaderBinary(shaderHash, shader_binary))
+  {
+    // Early exit, nothing found. No need to call setShaderBinary.
+    return;
+  }
+
+  // Let the GPU crash dump decoder know about the shader data
+  // that was found.
+  setShaderBinary(shader_binary.data(), sizeof(uint32_t) * static_cast<uint32_t>(shader_binary.size()));
+}
+
+// Handler for shader source debug info lookup callbacks.
+// This is used by the JSON decoder for mapping shader instruction addresses to
+// GLSL source lines, if the shaders used by the application were compiled with
+// separate debug info data files.
+void GpuCrashTrackerImpl::onShaderSourceDebugInfoLookup(const GFSDK_Aftermath_ShaderDebugName& shaderDebugName,
+                                                        PFN_GFSDK_Aftermath_SetData            setShaderBinary) const
+{
+  // Find source debug info for the shader DebugName in the shader database.
+  std::vector<uint32_t> shader_binary;
+  if(!findShaderBinaryWithDebugData(shaderDebugName, shader_binary))
+  {
+    // Early exit, nothing found. No need to call setShaderBinary.
+    return;
+  }
+
+  // Let the GPU crash dump decoder know about the shader debug data that was
+  // found.
+  setShaderBinary(shader_binary.data(), sizeof(uint32_t) * static_cast<uint32_t>(shader_binary.size()));
+}
+
+// Static callback wrapper for OnCrashDump
+void GpuCrashTrackerImpl::gpuCrashDumpCallback(const void* pGpuCrashDump, const uint32_t gpuCrashDumpSize, void* pUserData)
+{
+  auto* p_gpu_crash_tracker = reinterpret_cast<GpuCrashTrackerImpl*>(pUserData);
+  p_gpu_crash_tracker->onCrashDump(pGpuCrashDump, gpuCrashDumpSize);
+}
+
+// Static callback wrapper for OnShaderDebugInfo
+void GpuCrashTrackerImpl::shaderDebugInfoCallback(const void* pShaderDebugInfo, const uint32_t shaderDebugInfoSize, void* pUserData)
+{
+  auto* p_gpu_crash_tracker = reinterpret_cast<GpuCrashTrackerImpl*>(pUserData);
+  p_gpu_crash_tracker->onShaderDebugInfo(pShaderDebugInfo, shaderDebugInfoSize);
+}
+
+// Static callback wrapper for OnDescription
+void GpuCrashTrackerImpl::crashDumpDescriptionCallback(PFN_GFSDK_Aftermath_AddGpuCrashDumpDescription addDescription, void* pUserData)
+{
+  auto* p_gpu_crash_tracker = reinterpret_cast<GpuCrashTrackerImpl*>(pUserData);
+  p_gpu_crash_tracker->onDescription(addDescription);
+}
+
+// Static callback wrapper for OnResolveMarker
+void GpuCrashTrackerImpl::resolveMarkerCallback(const void*    pMarker,
+                                                const uint32_t markerDataSize,
+                                                void*          pUserData,
+                                                void**         resolvedMarkerData,
+                                                uint32_t*      markerSize)
+{
+  auto* p_gpu_crash_tracker = reinterpret_cast<GpuCrashTrackerImpl*>(pUserData);
+  p_gpu_crash_tracker->onResolveMarker(pMarker, resolvedMarkerData, markerSize);
+}
+
+// Static callback wrapper for OnShaderDebugInfoLookup
+void GpuCrashTrackerImpl::shaderDebugInfoLookupCallback(const GFSDK_Aftermath_ShaderDebugInfoIdentifier* pIdentifier,
+                                                        PFN_GFSDK_Aftermath_SetData setShaderDebugInfo,
+                                                        void*                       pUserData)
+{
+  auto* p_gpu_crash_tracker = reinterpret_cast<GpuCrashTrackerImpl*>(pUserData);
+  p_gpu_crash_tracker->onShaderDebugInfoLookup(*pIdentifier, setShaderDebugInfo);
+}
+
+// Static callback wrapper for OnShaderLookup
+void GpuCrashTrackerImpl::shaderLookupCallback(const GFSDK_Aftermath_ShaderBinaryHash* pShaderHash,
+                                               PFN_GFSDK_Aftermath_SetData             setShaderBinary,
+                                               void*                                   pUserData)
+{
+  auto* p_gpu_crash_tracker = reinterpret_cast<GpuCrashTrackerImpl*>(pUserData);
+  p_gpu_crash_tracker->onShaderLookup(*pShaderHash, setShaderBinary);
+}
+
+// Static callback wrapper for OnShaderSourceDebugInfoLookup
+void GpuCrashTrackerImpl::shaderSourceDebugInfoLookupCallback(const GFSDK_Aftermath_ShaderDebugName* pShaderDebugName,
+                                                              PFN_GFSDK_Aftermath_SetData            setShaderBinary,
+                                                              void*                                  pUserData)
+{
+  auto* p_gpu_crash_tracker = reinterpret_cast<GpuCrashTrackerImpl*>(pUserData);
+  p_gpu_crash_tracker->onShaderSourceDebugInfoLookup(*pShaderDebugName, setShaderBinary);
+}
+
+
+void GpuCrashTrackerImpl::addShaderBinary(std::vector<uint32_t>& data)
+{
+
+  // Create shader hash for the shader
+  const GFSDK_Aftermath_SpirvCode  shader{data.data(), static_cast<uint32_t>(data.size())};
+  GFSDK_Aftermath_ShaderBinaryHash shaderHash{};
+  AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GetShaderHashSpirv(GFSDK_Aftermath_Version_API, &shader, &shaderHash));
+
+  // Store the data for shader mapping when decoding GPU crash dumps.
+  // cf. FindShaderBinary()
+  m_shaderBinaries[shaderHash] = data;
+}
+
+void GpuCrashTrackerImpl::addShaderBinaryWithDebugInfo(std::vector<uint32_t>& data, std::vector<uint32_t>& strippedData)
+{
+  // Generate shader debug name.
+  GFSDK_Aftermath_ShaderDebugName debugName{};
+  const GFSDK_Aftermath_SpirvCode shader{data.data(), static_cast<uint32_t>(data.size())};
+  const GFSDK_Aftermath_SpirvCode strippedShader{strippedData.data(), static_cast<uint32_t>(strippedData.size())};
+  AFTERMATH_CHECK_ERROR(GFSDK_Aftermath_GetShaderDebugNameSpirv(GFSDK_Aftermath_Version_API, &shader, &strippedShader, &debugName));
+
+  // Store the data for shader instruction address mapping when decoding GPU crash dumps.
+  // cf. FindShaderBinaryWithDebugData()
+  m_shaderBinariesWithDebugInfo[debugName] = data;
+}
+
+// Find a shader binary by shader hash.
+bool GpuCrashTrackerImpl::findShaderBinary(const GFSDK_Aftermath_ShaderBinaryHash& shaderHash, std::vector<uint32_t>& shader) const
+{
+  // Find shader binary data for the shader hash
+  auto i_shader = m_shaderBinaries.find(shaderHash);
+  if(i_shader == m_shaderBinaries.end())
+  {
+    // Nothing found.
+    return false;
+  }
+
+  shader = i_shader->second;
+  return true;
+}
+
+// Find a shader binary with debug information by shader debug name.
+bool GpuCrashTrackerImpl::findShaderBinaryWithDebugData(const GFSDK_Aftermath_ShaderDebugName& shaderDebugName,
+                                                        std::vector<uint32_t>&                 shader) const
+{
+  // Find shader binary for the shader debug name.
+  auto i_shader = m_shaderBinariesWithDebugInfo.find(shaderDebugName);
+  if(i_shader == m_shaderBinariesWithDebugInfo.end())
+  {
+    // Nothing found.
+    return false;
+  }
+
+  shader = i_shader->second;
+  return true;
+}
+
+
+// Global marker map
+static GpuCrashTrackerImpl::MarkerMap g_marker_map;
+
+GpuCrashTracker::GpuCrashTracker()
+    : m_pimpl(new GpuCrashTrackerImpl(g_marker_map))
+{
+}
+
+GpuCrashTracker::~GpuCrashTracker()
+{
+  delete m_pimpl;
+}
+
+void GpuCrashTracker::initialize()
+{
+  m_pimpl->initialize();
+}
+
+}  // namespace nvvk
+
+#else
+
+namespace nvvk {
+
+GpuCrashTracker::GpuCrashTracker()
+    : m_pimpl(nullptr)
+{
+}
+
+GpuCrashTracker::~GpuCrashTracker() {}
+
+void GpuCrashTracker::initialize() {}
+
+}  // namespace nvvk
+
+#endif
--- a/raytracer/nvpro_core/nvvk/nsight_aftermath_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/nsight_aftermath_vk.hpp
@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#pragma once
+
+namespace nvvk {
+//// @DOC_SKIP
+class GpuCrashTracker
+{
+public:
+  GpuCrashTracker();
+  ~GpuCrashTracker();
+
+  void initialize();  // Initialize the GPU crash dump tracker.
+
+private:
+  class GpuCrashTrackerImpl* m_pimpl;
+};
+
+}  //namespace nvvk
--- a/raytracer/nvpro_core/nvvk/pipeline_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/pipeline_vk.cpp
@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include "pipeline_vk.hpp"
+#include <inttypes.h>
+#include <nvh/nvprint.hpp>
+
+namespace nvvk {
+
+void nvprintPipelineStats(VkDevice device, VkPipeline pipeline, const char* name, bool verbose /*= false*/)
+{
+  VkPipelineInfoKHR pipeInfo = {VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR};
+  pipeInfo.pipeline          = pipeline;
+  if(!pipeline)
+    return;
+
+  std::vector<VkPipelineExecutablePropertiesKHR> props;
+  uint32_t                                       executableCount = 0;
+  vkGetPipelineExecutablePropertiesKHR(device, &pipeInfo, &executableCount, nullptr);
+  props.resize(executableCount, {VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR});
+  vkGetPipelineExecutablePropertiesKHR(device, &pipeInfo, &executableCount, props.data());
+
+  nvprintfLevel(LOGLEVEL_STATS, "VkPipeline stats for %p, %s\n", pipeline, name);
+  nvprintfLevel(LOGLEVEL_STATS, "---------------------------\n");
+  for(uint32_t i = 0; i < executableCount; i++)
+  {
+    const VkPipelineExecutablePropertiesKHR& prop = props[i];
+    nvprintfLevel(LOGLEVEL_STATS, "- Executable: %s\n", prop.name);
+    if(verbose)
+      nvprintfLevel(LOGLEVEL_STATS, "  (%s)\n", prop.description);
+    nvprintfLevel(LOGLEVEL_STATS, "  - stages: 0x%08X\n", prop.stages);
+    nvprintfLevel(LOGLEVEL_STATS, "  - subgroupSize: %2d\n", prop.subgroupSize);
+    VkPipelineExecutableInfoKHR execInfo = {VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR};
+    execInfo.pipeline                    = pipeline;
+    execInfo.executableIndex             = i;
+
+    uint32_t                                      statsCount = 0;
+    std::vector<VkPipelineExecutableStatisticKHR> stats;
+    vkGetPipelineExecutableStatisticsKHR(device, &execInfo, &statsCount, nullptr);
+    stats.resize(statsCount, {VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_STATISTIC_KHR});
+    vkGetPipelineExecutableStatisticsKHR(device, &execInfo, &statsCount, stats.data());
+
+    for(uint32_t s = 0; s < statsCount; s++)
+    {
+      const VkPipelineExecutableStatisticKHR& stat = stats[s];
+      switch(stat.format)
+      {
+        case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR:
+          nvprintfLevel(LOGLEVEL_STATS, "  - %s: %d\n", stat.name, stat.value.b32);
+          break;
+        case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
+          nvprintfLevel(LOGLEVEL_STATS, "  - %s: %" PRIi64 "\n", stat.name, stat.value.i64);
+          break;
+        case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
+          nvprintfLevel(LOGLEVEL_STATS, "  - %s: %" PRIu64 "\n", stat.name, stat.value.u64);
+          break;
+        case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
+          nvprintfLevel(LOGLEVEL_STATS, "  - %s: %f\n", stat.name, stat.value.f64);
+          break;
+      }
+      if(verbose)
+        nvprintfLevel(LOGLEVEL_STATS, "    (%s)\n", stat.description);
+    }
+  }
+  nvprintfLevel(LOGLEVEL_STATS, "\n");
+}
+
+void dumpPipelineStats(VkDevice device, VkPipeline pipeline, const char* fileName)
+{
+  VkPipelineInfoKHR pipeInfo = {VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR};
+  pipeInfo.pipeline          = pipeline;
+  if(!pipeline)
+    return;
+
+  FILE* fdump = fopen(fileName, "wt");
+  if(!fdump)
+    return;
+
+
+  std::vector<VkPipelineExecutablePropertiesKHR> props;
+  uint32_t                                       executableCount = 0;
+  vkGetPipelineExecutablePropertiesKHR(device, &pipeInfo, &executableCount, nullptr);
+  props.resize(executableCount, {VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR});
+  vkGetPipelineExecutablePropertiesKHR(device, &pipeInfo, &executableCount, props.data());
+
+  fprintf(fdump, "VkPipeline stats for %p\n", pipeline);
+  fprintf(fdump, "-----------------------\n");
+  for(uint32_t i = 0; i < executableCount; i++)
+  {
+    const VkPipelineExecutablePropertiesKHR& prop = props[i];
+    fprintf(fdump, "- Executable: %s\n", prop.name);
+    fprintf(fdump, "  (%s)\n", prop.description);
+    fprintf(fdump, "  - stages: 0x%08X\n", prop.stages);
+    fprintf(fdump, "  - subgroupSize: %2d\n", prop.subgroupSize);
+    VkPipelineExecutableInfoKHR execInfo = {VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR};
+    execInfo.pipeline                    = pipeline;
+    execInfo.executableIndex             = i;
+
+    uint32_t                                      statsCount = 0;
+    std::vector<VkPipelineExecutableStatisticKHR> stats;
+    vkGetPipelineExecutableStatisticsKHR(device, &execInfo, &statsCount, nullptr);
+    stats.resize(statsCount, {VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_STATISTIC_KHR});
+    vkGetPipelineExecutableStatisticsKHR(device, &execInfo, &statsCount, stats.data());
+
+    for(uint32_t s = 0; s < statsCount; s++)
+    {
+      const VkPipelineExecutableStatisticKHR& stat = stats[s];
+      switch(stat.format)
+      {
+        case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR:
+          fprintf(fdump, "  - %s: %d\n", stat.name, stat.value.b32);
+          break;
+        case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
+          fprintf(fdump, "  - %s: %" PRIi64 "\n", stat.name, stat.value.i64);
+          break;
+        case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
+          fprintf(fdump, "  - %s: %" PRIu64 "\n", stat.name, stat.value.u64);
+          break;
+        case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
+          fprintf(fdump, "  - %s: %f\n", stat.name, stat.value.f64);
+          break;
+      }
+      fprintf(fdump, "    (%s)\n", stat.description);
+    }
+  }
+  fprintf(fdump, "\n");
+  fclose(fdump);
+}
+
+static inline std::string stringFormat(const char* msg, ...)
+{
+  char    text[1024];
+  va_list list;
+
+  if(msg == 0)
+    return std::string();
+
+  va_start(list, msg);
+  vsnprintf(text, sizeof(text), msg, list);
+  va_end(list);
+
+  return std::string(text);
+}
+
+void dumpPipelineInternals(VkDevice device, VkPipeline pipeline, const char* baseFileName)
+{
+  VkPipelineInfoKHR pipeInfo = {VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR};
+  pipeInfo.pipeline          = pipeline;
+  if(!pipeline)
+    return;
+
+  std::vector<VkPipelineExecutablePropertiesKHR> props;
+  uint32_t                                       executableCount = 0;
+  vkGetPipelineExecutablePropertiesKHR(device, &pipeInfo, &executableCount, nullptr);
+  props.resize(executableCount, {VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR});
+  vkGetPipelineExecutablePropertiesKHR(device, &pipeInfo, &executableCount, props.data());
+
+  for(uint32_t e = 0; e < executableCount; e++)
+  {
+    const VkPipelineExecutablePropertiesKHR& prop     = props[e];
+    VkPipelineExecutableInfoKHR              execInfo = {VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR};
+    execInfo.pipeline                                 = pipeline;
+    execInfo.executableIndex                          = e;
+
+    uint32_t internalCount = 0;
+    vkGetPipelineExecutableInternalRepresentationsKHR(device, &execInfo, &internalCount, nullptr);
+    if(internalCount)
+    {
+      std::vector<VkPipelineExecutableInternalRepresentationKHR> internals(
+          internalCount, {VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INTERNAL_REPRESENTATION_KHR});
+      vkGetPipelineExecutableInternalRepresentationsKHR(device, &execInfo, &internalCount, internals.data());
+
+      size_t offset = 0;
+      for(uint32_t i = 0; i < internalCount; i++)
+      {
+        offset += internals[i].dataSize;
+      }
+
+      std::vector<uint8_t> rawBytes(offset);
+
+      offset = 0;
+      for(uint32_t i = 0; i < internalCount; i++)
+      {
+        internals[i].pData = &rawBytes[offset];
+        offset += internals[i].dataSize;
+      }
+
+      vkGetPipelineExecutableInternalRepresentationsKHR(device, &execInfo, &internalCount, internals.data());
+      for(uint32_t i = 0; i < internalCount; i++)
+      {
+        bool isText = strstr(internals[i].name, "text") != nullptr;
+
+        std::string fileName = std::string(baseFileName) + "." + std::string(prop.name) + stringFormat(".%d.", e)
+                               + internals[i].name + stringFormat(".%d.%s", i, isText ? "txt" : "bin");
+        FILE* f = fopen(fileName.c_str(), "wb");
+        if(f)
+        {
+          fwrite(internals[i].pData, internals[i].dataSize, 1, f);
+          fclose(f);
+        }
+      }
+    }
+  }
+}
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/pipeline_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/pipeline_vk.hpp
@ -0,0 +1,767 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#pragma once
+
+#include <cassert>
+#include <iterator>
+#include <string>
+#include <vector>
+#include <vulkan/vulkan_core.h>
+
+namespace nvvk {
+//--------------------------------------------------------------------------------------------------
+/** @DOC_START
+  # functions in nvvk
+
+  - nvprintPipelineStats : prints stats of the pipeline using VK_KHR_pipeline_executable_properties (don't forget to enable extension and set VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR)
+  - dumpPipelineStats    : dumps stats of the pipeline using VK_KHR_pipeline_executable_properties to a text file (don't forget to enable extension and set VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR)
+  - dumpPipelineBinCodes : dumps shader binaries using VK_KHR_pipeline_executable_properties to multiple binary files (don't forget to enable extension and set VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)
+@DOEC_END */
+// nvprints stats to LOGLEVEL_STATS stream
+void nvprintPipelineStats(VkDevice device, VkPipeline pipeline, const char* name, bool verbose = false);
+// writes stats into single file
+void dumpPipelineStats(VkDevice device, VkPipeline pipeline, const char* fileName);
+// creates multiple files, one for each pipe executable and representation.
+// The baseFilename will get appended along the lines of ".some details.bin"
+void dumpPipelineInternals(VkDevice device, VkPipeline pipeline, const char* baseFileName);
+
+//--------------------------------------------------------------------------------------------------
+/** @DOC_START
+# struct nvvk::GraphicsPipelineState
+
+Most graphic pipelines have similar states, therefore the helper `GraphicsPipelineStage` holds all the elements and 
+initialize the structures with the proper default values, such as the primitive type, `PipelineColorBlendAttachmentState` 
+with their mask, `DynamicState` for viewport and scissor, adjust depth test if enabled, line width to 1 pixel, for 
+example. 
+
+Example of usage :
+```cpp
+nvvk::GraphicsPipelineState pipelineState();
+pipelineState.depthStencilState.setDepthTestEnable(true);
+pipelineState.rasterizationState.setCullMode(vk::CullModeFlagBits::eNone);
+pipelineState.addBindingDescription({0, sizeof(Vertex)});
+pipelineState.addAttributeDescriptions ({
+    {0, 0, vk::Format::eR32G32B32Sfloat, static_cast<uint32_t>(offsetof(Vertex, pos))},
+    {1, 0, vk::Format::eR32G32B32Sfloat, static_cast<uint32_t>(offsetof(Vertex, nrm))},
+    {2, 0, vk::Format::eR32G32B32Sfloat, static_cast<uint32_t>(offsetof(Vertex, col))}});
+```
+@DOC_END */
+
+
+struct GraphicsPipelineState
+{
+  // Initialize the state to common values: triangle list topology, depth test enabled,
+  // dynamic viewport and scissor, one render target, blending disabled
+  GraphicsPipelineState()
+  {
+    rasterizationState.flags                   = {};
+    rasterizationState.depthClampEnable        = {};
+    rasterizationState.rasterizerDiscardEnable = {};
+    setValue(rasterizationState.polygonMode, VK_POLYGON_MODE_FILL);
+    setValue(rasterizationState.cullMode, VK_CULL_MODE_BACK_BIT);
+    setValue(rasterizationState.frontFace, VK_FRONT_FACE_COUNTER_CLOCKWISE);
+
+    rasterizationState.depthBiasEnable         = {};
+    rasterizationState.depthBiasConstantFactor = {};
+    rasterizationState.depthBiasClamp          = {};
+    rasterizationState.depthBiasSlopeFactor    = {};
+    rasterizationState.lineWidth               = 1.f;
+
+    inputAssemblyState.flags = {};
+    setValue(inputAssemblyState.topology, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST);
+    inputAssemblyState.primitiveRestartEnable = {};
+
+
+    colorBlendState.flags         = {};
+    colorBlendState.logicOpEnable = {};
+    setValue(colorBlendState.logicOp, VK_LOGIC_OP_CLEAR);
+    colorBlendState.attachmentCount = {};
+    colorBlendState.pAttachments    = {};
+    for(int i = 0; i < 4; i++)
+    {
+      colorBlendState.blendConstants[i] = 0.f;
+    }
+
+
+    dynamicState.flags             = {};
+    dynamicState.dynamicStateCount = {};
+    dynamicState.pDynamicStates    = {};
+
+
+    vertexInputState.flags                           = {};
+    vertexInputState.vertexBindingDescriptionCount   = {};
+    vertexInputState.pVertexBindingDescriptions      = {};
+    vertexInputState.vertexAttributeDescriptionCount = {};
+    vertexInputState.pVertexAttributeDescriptions    = {};
+
+
+    viewportState.flags         = {};
+    viewportState.viewportCount = {};
+    viewportState.pViewports    = {};
+    viewportState.scissorCount  = {};
+    viewportState.pScissors     = {};
+
+
+    depthStencilState.flags            = {};
+    depthStencilState.depthTestEnable  = VK_TRUE;
+    depthStencilState.depthWriteEnable = VK_TRUE;
+    setValue(depthStencilState.depthCompareOp, VK_COMPARE_OP_LESS_OR_EQUAL);
+    depthStencilState.depthBoundsTestEnable = {};
+    depthStencilState.stencilTestEnable     = {};
+    setValue(depthStencilState.front, VkStencilOpState());
+    setValue(depthStencilState.back, VkStencilOpState());
+    depthStencilState.minDepthBounds = {};
+    depthStencilState.maxDepthBounds = {};
+
+    setValue(multisampleState.rasterizationSamples, VK_SAMPLE_COUNT_1_BIT);
+  }
+
+  GraphicsPipelineState(const GraphicsPipelineState& src) = default;
+
+  // Attach the pointer values of the structures to the internal arrays
+  void update()
+  {
+    colorBlendState.attachmentCount = (uint32_t)blendAttachmentStates.size();
+    colorBlendState.pAttachments    = blendAttachmentStates.data();
+
+    dynamicState.dynamicStateCount = (uint32_t)dynamicStateEnables.size();
+    dynamicState.pDynamicStates    = dynamicStateEnables.data();
+
+    vertexInputState.vertexAttributeDescriptionCount = static_cast<uint32_t>(attributeDescriptions.size());
+    vertexInputState.vertexBindingDescriptionCount   = static_cast<uint32_t>(bindingDescriptions.size());
+    vertexInputState.pVertexBindingDescriptions      = bindingDescriptions.data();
+    vertexInputState.pVertexAttributeDescriptions    = attributeDescriptions.data();
+
+    if(viewports.empty())
+    {
+      viewportState.viewportCount = 1;
+      viewportState.pViewports    = nullptr;
+    }
+    else
+    {
+      viewportState.viewportCount = (uint32_t)viewports.size();
+      viewportState.pViewports    = viewports.data();
+    }
+
+    if(scissors.empty())
+    {
+      viewportState.scissorCount = 1;
+      viewportState.pScissors    = nullptr;
+    }
+    else
+    {
+      viewportState.scissorCount = (uint32_t)scissors.size();
+      viewportState.pScissors    = scissors.data();
+    }
+  }
+
+  static inline VkPipelineColorBlendAttachmentState makePipelineColorBlendAttachmentState(
+      VkColorComponentFlags colorWriteMask_ = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT
+                                              | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+      VkBool32      blendEnable_         = 0,
+      VkBlendFactor srcColorBlendFactor_ = VK_BLEND_FACTOR_ZERO,
+      VkBlendFactor dstColorBlendFactor_ = VK_BLEND_FACTOR_ZERO,
+      VkBlendOp     colorBlendOp_        = VK_BLEND_OP_ADD,
+      VkBlendFactor srcAlphaBlendFactor_ = VK_BLEND_FACTOR_ZERO,
+      VkBlendFactor dstAlphaBlendFactor_ = VK_BLEND_FACTOR_ZERO,
+      VkBlendOp     alphaBlendOp_        = VK_BLEND_OP_ADD)
+  {
+    VkPipelineColorBlendAttachmentState res;
+
+    res.blendEnable         = blendEnable_;
+    res.srcColorBlendFactor = srcColorBlendFactor_;
+    res.dstColorBlendFactor = dstColorBlendFactor_;
+    res.colorBlendOp        = colorBlendOp_;
+    res.srcAlphaBlendFactor = srcAlphaBlendFactor_;
+    res.dstAlphaBlendFactor = dstAlphaBlendFactor_;
+    res.alphaBlendOp        = alphaBlendOp_;
+    res.colorWriteMask      = colorWriteMask_;
+    return res;
+  }
+
+  static inline VkVertexInputBindingDescription makeVertexInputBinding(uint32_t binding, uint32_t stride, VkVertexInputRate rate = VK_VERTEX_INPUT_RATE_VERTEX)
+  {
+    VkVertexInputBindingDescription vertexBinding;
+    vertexBinding.binding   = binding;
+    vertexBinding.inputRate = rate;
+    vertexBinding.stride    = stride;
+    return vertexBinding;
+  }
+
+  static inline VkVertexInputAttributeDescription makeVertexInputAttribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset)
+  {
+    VkVertexInputAttributeDescription attrib;
+    attrib.binding  = binding;
+    attrib.location = location;
+    attrib.format   = format;
+    attrib.offset   = offset;
+    return attrib;
+  }
+
+
+  void clearBlendAttachmentStates() { blendAttachmentStates.clear(); }
+  void setBlendAttachmentCount(uint32_t attachmentCount) { blendAttachmentStates.resize(attachmentCount); }
+
+  void setBlendAttachmentState(uint32_t attachment, const VkPipelineColorBlendAttachmentState& blendState)
+  {
+    assert(attachment < blendAttachmentStates.size());
+    if(attachment <= blendAttachmentStates.size())
+    {
+      blendAttachmentStates[attachment] = blendState;
+    }
+  }
+
+  void setBlendAttachmentColorMask(uint32_t attachment, VkColorComponentFlags mask)
+  {
+    assert(attachment < blendAttachmentStates.size());
+    if(attachment <= blendAttachmentStates.size())
+    {
+      blendAttachmentStates[attachment].colorWriteMask = mask;
+    }
+  }
+
+  uint32_t addBlendAttachmentState(const VkPipelineColorBlendAttachmentState& blendState)
+  {
+    blendAttachmentStates.push_back(blendState);
+    return (uint32_t)(blendAttachmentStates.size() - 1);
+  }
+
+  void clearDynamicStateEnables() { dynamicStateEnables.clear(); }
+  void setDynamicStateEnablesCount(uint32_t dynamicStateCount) { dynamicStateEnables.resize(dynamicStateCount); }
+
+  void setDynamicStateEnable(uint32_t state, VkDynamicState dynamicState)
+  {
+    assert(state < dynamicStateEnables.size());
+    if(state <= dynamicStateEnables.size())
+    {
+      dynamicStateEnables[state] = dynamicState;
+    }
+  }
+
+  uint32_t addDynamicStateEnable(VkDynamicState dynamicState)
+  {
+    dynamicStateEnables.push_back(dynamicState);
+    return (uint32_t)(dynamicStateEnables.size() - 1);
+  }
+
+
+  void clearBindingDescriptions() { bindingDescriptions.clear(); }
+  void setBindingDescriptionsCount(uint32_t bindingDescriptionCount)
+  {
+    bindingDescriptions.resize(bindingDescriptionCount);
+  }
+  void setBindingDescription(uint32_t binding, VkVertexInputBindingDescription bindingDescription)
+  {
+    assert(binding < bindingDescriptions.size());
+    if(binding <= bindingDescriptions.size())
+    {
+      bindingDescriptions[binding] = bindingDescription;
+    }
+  }
+
+  uint32_t addBindingDescription(const VkVertexInputBindingDescription& bindingDescription)
+  {
+    bindingDescriptions.push_back(bindingDescription);
+    return (uint32_t)(bindingDescriptions.size() - 1);
+  }
+
+  void addBindingDescriptions(const std::vector<VkVertexInputBindingDescription>& bindingDescriptions_)
+  {
+    bindingDescriptions.insert(bindingDescriptions.end(), bindingDescriptions_.begin(), bindingDescriptions_.end());
+  }
+
+  void clearAttributeDescriptions() { attributeDescriptions.clear(); }
+  void setAttributeDescriptionsCount(uint32_t attributeDescriptionCount)
+  {
+    attributeDescriptions.resize(attributeDescriptionCount);
+  }
+
+  void setAttributeDescription(uint32_t attribute, const VkVertexInputAttributeDescription& attributeDescription)
+  {
+    assert(attribute < attributeDescriptions.size());
+    if(attribute <= attributeDescriptions.size())
+    {
+      attributeDescriptions[attribute] = attributeDescription;
+    }
+  }
+
+
+  uint32_t addAttributeDescription(const VkVertexInputAttributeDescription& attributeDescription)
+  {
+    attributeDescriptions.push_back(attributeDescription);
+    return (uint32_t)(attributeDescriptions.size() - 1);
+  }
+
+  void addAttributeDescriptions(const std::vector<VkVertexInputAttributeDescription>& attributeDescriptions_)
+  {
+    attributeDescriptions.insert(attributeDescriptions.end(), attributeDescriptions_.begin(), attributeDescriptions_.end());
+  }
+
+
+  void clearViewports() { viewports.clear(); }
+  void setViewportsCount(uint32_t viewportCount) { viewports.resize(viewportCount); }
+  void setViewport(uint32_t attribute, VkViewport viewport)
+  {
+    assert(attribute < viewports.size());
+    if(attribute <= viewports.size())
+    {
+      viewports[attribute] = viewport;
+    }
+  }
+  uint32_t addViewport(VkViewport viewport)
+  {
+    viewports.push_back(viewport);
+    return (uint32_t)(viewports.size() - 1);
+  }
+
+
+  void clearScissors() { scissors.clear(); }
+  void setScissorsCount(uint32_t scissorCount) { scissors.resize(scissorCount); }
+  void setScissor(uint32_t attribute, VkRect2D scissor)
+  {
+    assert(attribute < scissors.size());
+    if(attribute <= scissors.size())
+    {
+      scissors[attribute] = scissor;
+    }
+  }
+  uint32_t addScissor(VkRect2D scissor)
+  {
+    scissors.push_back(scissor);
+    return (uint32_t)(scissors.size() - 1);
+  }
+
+
+  VkPipelineInputAssemblyStateCreateInfo inputAssemblyState{VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO};
+  VkPipelineRasterizationStateCreateInfo rasterizationState{VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO};
+  VkPipelineMultisampleStateCreateInfo   multisampleState{VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO};
+  VkPipelineDepthStencilStateCreateInfo  depthStencilState{VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO};
+  VkPipelineViewportStateCreateInfo      viewportState{VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO};
+  VkPipelineDynamicStateCreateInfo       dynamicState{VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO};
+  VkPipelineColorBlendStateCreateInfo    colorBlendState{VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO};
+  VkPipelineVertexInputStateCreateInfo   vertexInputState{VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO};
+
+protected:
+  std::vector<VkPipelineColorBlendAttachmentState> blendAttachmentStates{makePipelineColorBlendAttachmentState()};
+  std::vector<VkDynamicState> dynamicStateEnables = {VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR};
+
+  std::vector<VkVertexInputBindingDescription>   bindingDescriptions;
+  std::vector<VkVertexInputAttributeDescription> attributeDescriptions;
+
+  std::vector<VkViewport> viewports;
+  std::vector<VkRect2D>   scissors;
+
+
+  // Helper to set objects for either C and C++
+  template <class T, class U>
+  void setValue(T& target, const U& val)
+  {
+    target = (T)(val);
+  }
+};
+
+
+//--------------------------------------------------------------------------------------------------
+/** @DOC_START
+# struct nvvk::GraphicsPipelineGenerator
+
+The graphics pipeline generator takes a GraphicsPipelineState object and pipeline-specific information such as 
+the render pass and pipeline layout to generate the final pipeline. 
+
+Example of usage :
+```cpp
+nvvk::GraphicsPipelineState pipelineState();
+...
+nvvk::GraphicsPipelineGenerator pipelineGenerator(m_device, m_pipelineLayout, m_renderPass, pipelineState);
+pipelineGenerator.addShader(readFile("spv/vert_shader.vert.spv"), VkShaderStageFlagBits::eVertex);
+pipelineGenerator.addShader(readFile("spv/frag_shader.frag.spv"), VkShaderStageFlagBits::eFragment);
+
+m_pipeline = pipelineGenerator.createPipeline();
+```
+@DOC_END */
+
+struct GraphicsPipelineGenerator
+{
+public:
+  GraphicsPipelineGenerator(GraphicsPipelineState& pipelineState_)
+      : pipelineState(pipelineState_)
+  {
+    init();
+  }
+
+  GraphicsPipelineGenerator(const GraphicsPipelineGenerator& src)
+      : createInfo(src.createInfo)
+      , device(src.device)
+      , pipelineCache(src.pipelineCache)
+      , pipelineState(src.pipelineState)
+  {
+    init();
+  }
+
+  GraphicsPipelineGenerator(VkDevice device_, const VkPipelineLayout& layout, const VkRenderPass& renderPass, GraphicsPipelineState& pipelineState_)
+      : device(device_)
+      , pipelineState(pipelineState_)
+  {
+    createInfo.layout     = layout;
+    createInfo.renderPass = renderPass;
+    init();
+  }
+
+  // For VK_KHR_dynamic_rendering
+  using PipelineRenderingCreateInfo = VkPipelineRenderingCreateInfo;
+
+  GraphicsPipelineGenerator(VkDevice                           device_,
+                            const VkPipelineLayout&            layout,
+                            const PipelineRenderingCreateInfo& pipelineRenderingCreateInfo,
+                            GraphicsPipelineState&             pipelineState_)
+      : device(device_)
+      , pipelineState(pipelineState_)
+  {
+    createInfo.layout = layout;
+    setPipelineRenderingCreateInfo(pipelineRenderingCreateInfo);
+    init();
+  }
+
+  const GraphicsPipelineGenerator& operator=(const GraphicsPipelineGenerator& src)
+  {
+    device        = src.device;
+    pipelineState = src.pipelineState;
+    createInfo    = src.createInfo;
+    pipelineCache = src.pipelineCache;
+
+    init();
+    return *this;
+  }
+
+  void setDevice(VkDevice device_) { device = device_; }
+
+  void setRenderPass(VkRenderPass renderPass)
+  {
+    createInfo.renderPass = renderPass;
+    createInfo.pNext      = nullptr;
+  }
+
+  void setPipelineRenderingCreateInfo(const PipelineRenderingCreateInfo& pipelineRenderingCreateInfo)
+  {
+    // Deep copy
+    assert(pipelineRenderingCreateInfo.pNext == nullptr);  // Update deep copy if needed.
+    dynamicRenderingInfo = pipelineRenderingCreateInfo;
+    if(dynamicRenderingInfo.colorAttachmentCount != 0)
+    {
+      dynamicRenderingColorFormats.assign(dynamicRenderingInfo.pColorAttachmentFormats,
+                                          dynamicRenderingInfo.pColorAttachmentFormats + dynamicRenderingInfo.colorAttachmentCount);
+      dynamicRenderingInfo.pColorAttachmentFormats = dynamicRenderingColorFormats.data();
+    }
+
+    // Set VkGraphicsPipelineCreateInfo::pNext to point to deep copy of extension struct.
+    // NB: Will have to change if more than 1 extension struct needs to be supported.
+    createInfo.pNext = &dynamicRenderingInfo;
+  }
+
+  void setLayout(VkPipelineLayout layout) { createInfo.layout = layout; }
+
+  ~GraphicsPipelineGenerator() { destroyShaderModules(); }
+
+  VkPipelineShaderStageCreateInfo& addShader(const std::string& code, VkShaderStageFlagBits stage, const char* entryPoint = "main")
+  {
+    std::vector<char> v;
+    std::copy(code.begin(), code.end(), std::back_inserter(v));
+    return addShader(v, stage, entryPoint);
+  }
+
+  template <typename T>
+  VkPipelineShaderStageCreateInfo& addShader(const std::vector<T>& code, VkShaderStageFlagBits stage, const char* entryPoint = "main")
+
+  {
+    VkShaderModuleCreateInfo createInfo{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
+    createInfo.codeSize = sizeof(T) * code.size();
+    createInfo.pCode    = reinterpret_cast<const uint32_t*>(code.data());
+    VkShaderModule shaderModule;
+    vkCreateShaderModule(device, &createInfo, nullptr, &shaderModule);
+    temporaryModules.push_back(shaderModule);
+
+    return addShader(shaderModule, stage, entryPoint);
+  }
+  VkPipelineShaderStageCreateInfo& addShader(VkShaderModule shaderModule, VkShaderStageFlagBits stage, const char* entryPoint = "main")
+  {
+    VkPipelineShaderStageCreateInfo shaderStage{VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO};
+    shaderStage.stage  = (VkShaderStageFlagBits)stage;
+    shaderStage.module = shaderModule;
+    shaderStage.pName  = entryPoint;
+
+    shaderStages.push_back(shaderStage);
+    return shaderStages.back();
+  }
+
+  void clearShaders()
+  {
+    shaderStages.clear();
+    destroyShaderModules();
+  }
+
+  VkShaderModule getShaderModule(size_t index) const
+  {
+    if(index < shaderStages.size())
+      return shaderStages[index].module;
+    return VK_NULL_HANDLE;
+  }
+
+  VkPipeline createPipeline(const VkPipelineCache& cache)
+  {
+    update();
+    VkPipeline pipeline;
+    vkCreateGraphicsPipelines(device, cache, 1, (VkGraphicsPipelineCreateInfo*)&createInfo, nullptr, &pipeline);
+    return pipeline;
+  }
+
+  VkPipeline createPipeline() { return createPipeline(pipelineCache); }
+
+  void destroyShaderModules()
+  {
+    for(const auto& shaderModule : temporaryModules)
+    {
+      vkDestroyShaderModule(device, shaderModule, nullptr);
+    }
+    temporaryModules.clear();
+  }
+  void update()
+  {
+    createInfo.stageCount = static_cast<uint32_t>(shaderStages.size());
+    createInfo.pStages    = shaderStages.data();
+    pipelineState.update();
+  }
+
+  VkGraphicsPipelineCreateInfo createInfo{VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO};
+
+private:
+  VkDevice        device;
+  VkPipelineCache pipelineCache{};
+
+  std::vector<VkPipelineShaderStageCreateInfo> shaderStages;
+  std::vector<VkShaderModule>                  temporaryModules;
+  std::vector<VkFormat>                        dynamicRenderingColorFormats;
+  GraphicsPipelineState&                       pipelineState;
+  PipelineRenderingCreateInfo                  dynamicRenderingInfo{VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO};
+
+
+  void init()
+  {
+    createInfo.pRasterizationState = &pipelineState.rasterizationState;
+    createInfo.pInputAssemblyState = &pipelineState.inputAssemblyState;
+    createInfo.pColorBlendState    = &pipelineState.colorBlendState;
+    createInfo.pMultisampleState   = &pipelineState.multisampleState;
+    createInfo.pViewportState      = &pipelineState.viewportState;
+    createInfo.pDepthStencilState  = &pipelineState.depthStencilState;
+    createInfo.pDynamicState       = &pipelineState.dynamicState;
+    createInfo.pVertexInputState   = &pipelineState.vertexInputState;
+  }
+
+  // Helper to set objects for either C and C++
+  template <class T, class U>
+  void setValue(T& target, const U& val)
+  {
+    target = (T)(val);
+  }
+};
+
+
+//--------------------------------------------------------------------------------------------------
+/** @DOC_START
+# class nvvk::GraphicsPipelineGeneratorCombined
+
+In some cases the application may have each state associated to a single pipeline. For convenience, 
+nvvk::GraphicsPipelineGeneratorCombined combines both the state and generator into a single object.
+
+Example of usage :
+```cpp
+nvvk::GraphicsPipelineGeneratorCombined pipelineGenerator(m_device, m_pipelineLayout, m_renderPass);
+pipelineGenerator.depthStencilState.setDepthTestEnable(true);
+pipelineGenerator.rasterizationState.setCullMode(vk::CullModeFlagBits::eNone);
+pipelineGenerator.addBindingDescription({0, sizeof(Vertex)});
+pipelineGenerator.addAttributeDescriptions ({
+    {0, 0, vk::Format::eR32G32B32Sfloat, static_cast<uint32_t>(offsetof(Vertex, pos))},
+    {1, 0, vk::Format::eR32G32B32Sfloat, static_cast<uint32_t>(offsetof(Vertex, nrm))},
+    {2, 0, vk::Format::eR32G32B32Sfloat, static_cast<uint32_t>(offsetof(Vertex, col))}});
+
+pipelineGenerator.addShader(readFile("spv/vert_shader.vert.spv"), VkShaderStageFlagBits::eVertex);
+pipelineGenerator.addShader(readFile("spv/frag_shader.frag.spv"), VkShaderStageFlagBits::eFragment);
+
+m_pipeline = pipelineGenerator.createPipeline();
+```
+@DOC_END */
+
+
+struct GraphicsPipelineGeneratorCombined : public GraphicsPipelineState, public GraphicsPipelineGenerator
+{
+  GraphicsPipelineGeneratorCombined(VkDevice device_, const VkPipelineLayout& layout, const VkRenderPass& renderPass)
+      : GraphicsPipelineState()
+      , GraphicsPipelineGenerator(device_, layout, renderPass, *this)
+  {
+  }
+};
+
+
+//--------------------------------------------------------------------------------------------------
+/** @DOC_START
+# struct nvvk::GraphicShaderObjectPipeline
+
+This is a helper to set the dynamic graphics pipeline state for shader object
+ - Set the pipeline state as you would do for a regular pipeline
+ - Call cmdSetPipelineState to set the pipeline state in the command buffer
+
+Example of usage :
+```cpp
+  // Member of the class
+  nvvk::GraphicShaderObjectPipeline m_shaderObjPipeline;
+
+
+  // Creation of the dynamic graphic pipeline
+  m_shaderObjPipeline.rasterizationState.cullMode = VK_CULL_MODE_NONE;
+  m_shaderObjPipeline.addBindingDescriptions({{0, sizeof(nvh::PrimitiveVertex)}});
+  m_shaderObjPipeline.addAttributeDescriptions({
+    {0, 0, VK_FORMAT_R32G32B32_SFLOAT, static_cast<uint32_t>(offsetof(nvh::PrimitiveVertex, p))},  // Position
+    {1, 0, VK_FORMAT_R32G32B32_SFLOAT, static_cast<uint32_t>(offsetof(nvh::PrimitiveVertex, n))},  // Normal
+    });
+  m_shaderObjPipeline.update();
+
+  // In the drawing
+  m_shaderObjPipeline.setViewportScissor(m_app->getViewportSize());
+  m_shaderObjPipeline.cmdSetPipelineState(cmd);
+
+```
+@DOC_START */
+
+struct GraphicShaderObjectPipeline : GraphicsPipelineState
+{
+  VkSampleMask                                       sampleMask{~0U};
+  std::vector<VkVertexInputBindingDescription2EXT>   vertexBindingDescriptions2;
+  std::vector<VkColorBlendEquationEXT>               colorBlendEquationState;
+  std::vector<VkBool32>                              colorBlendEnables;
+  std::vector<VkBool32>                              colorWriteMasks;
+  std::vector<VkVertexInputAttributeDescription2EXT> vertexAttributeDescriptions2;
+
+  GraphicShaderObjectPipeline()
+  {
+    viewports.resize(1);  // There should be at least one viewport
+    scissors.resize(1);   //
+  }
+
+  // Set the viewport and scissor to the full extent
+  void setViewportScissor(const VkExtent2D& extent)
+  {
+    viewports[0].x        = 0;
+    viewports[0].y        = 0;
+    viewports[0].width    = float(extent.width);
+    viewports[0].height   = float(extent.height);
+    viewports[0].minDepth = 0;
+    viewports[0].maxDepth = 1;
+
+    scissors[0].offset = {0, 0};
+    scissors[0].extent = extent;
+  }
+
+  // Update the internal state
+  void update()
+  {
+    GraphicsPipelineState::update();
+    multisampleState.pSampleMask = &sampleMask;
+
+    vertexBindingDescriptions2.resize(vertexInputState.vertexBindingDescriptionCount);
+    for(uint32_t i = 0; i < vertexInputState.vertexBindingDescriptionCount; i++)
+    {
+      vertexBindingDescriptions2[i].sType     = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT;
+      vertexBindingDescriptions2[i].binding   = vertexInputState.pVertexBindingDescriptions[i].binding;
+      vertexBindingDescriptions2[i].inputRate = vertexInputState.pVertexBindingDescriptions[i].inputRate;
+      vertexBindingDescriptions2[i].stride    = vertexInputState.pVertexBindingDescriptions[i].stride;
+      vertexBindingDescriptions2[i].divisor   = 1;
+    }
+
+    vertexAttributeDescriptions2.resize(vertexInputState.vertexAttributeDescriptionCount);
+    for(uint32_t i = 0; i < vertexInputState.vertexAttributeDescriptionCount; i++)
+    {
+      vertexAttributeDescriptions2[i].sType    = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT;
+      vertexAttributeDescriptions2[i].binding  = vertexInputState.pVertexAttributeDescriptions[i].binding;
+      vertexAttributeDescriptions2[i].format   = vertexInputState.pVertexAttributeDescriptions[i].format;
+      vertexAttributeDescriptions2[i].location = vertexInputState.pVertexAttributeDescriptions[i].location;
+      vertexAttributeDescriptions2[i].offset   = vertexInputState.pVertexAttributeDescriptions[i].offset;
+    }
+
+    colorBlendEquationState.resize(colorBlendState.attachmentCount);
+    colorBlendEnables.resize(colorBlendState.attachmentCount);
+    colorWriteMasks.resize(colorBlendState.attachmentCount);
+    for(uint32_t i = 0; i < colorBlendState.attachmentCount; i++)
+    {
+      colorBlendEquationState[i].srcColorBlendFactor = colorBlendState.pAttachments[i].srcColorBlendFactor;
+      colorBlendEquationState[i].dstColorBlendFactor = colorBlendState.pAttachments[i].dstColorBlendFactor;
+      colorBlendEquationState[i].colorBlendOp        = colorBlendState.pAttachments[i].colorBlendOp;
+      colorBlendEquationState[i].srcAlphaBlendFactor = colorBlendState.pAttachments[i].srcAlphaBlendFactor;
+      colorBlendEquationState[i].dstAlphaBlendFactor = colorBlendState.pAttachments[i].dstAlphaBlendFactor;
+      colorBlendEquationState[i].alphaBlendOp        = colorBlendState.pAttachments[i].alphaBlendOp;
+      colorBlendEnables[i]                           = colorBlendState.pAttachments[i].blendEnable;
+      colorWriteMasks[i]                             = colorBlendState.pAttachments[i].colorWriteMask;
+    }
+  }
+
+  // Set the pipeline state in the command buffer
+  void cmdSetPipelineState(VkCommandBuffer cmd)
+  {
+    vkCmdSetViewportWithCount(cmd, viewportState.viewportCount, viewportState.pViewports);
+    vkCmdSetScissorWithCount(cmd, viewportState.scissorCount, viewportState.pScissors);
+
+    vkCmdSetLineWidth(cmd, rasterizationState.lineWidth);
+    vkCmdSetDepthBias(cmd, rasterizationState.depthBiasConstantFactor, rasterizationState.depthBiasClamp,
+                      rasterizationState.depthBiasSlopeFactor);
+    vkCmdSetCullMode(cmd, rasterizationState.cullMode);
+    vkCmdSetFrontFace(cmd, rasterizationState.frontFace);
+    vkCmdSetDepthBiasEnable(cmd, rasterizationState.depthBiasEnable);
+    vkCmdSetRasterizerDiscardEnable(cmd, rasterizationState.rasterizerDiscardEnable);
+    vkCmdSetDepthClampEnableEXT(cmd, rasterizationState.depthClampEnable);
+    vkCmdSetPolygonModeEXT(cmd, rasterizationState.polygonMode);
+
+    vkCmdSetBlendConstants(cmd, colorBlendState.blendConstants);
+
+    vkCmdSetDepthBounds(cmd, depthStencilState.minDepthBounds, depthStencilState.maxDepthBounds);
+    vkCmdSetDepthBoundsTestEnable(cmd, depthStencilState.depthBoundsTestEnable);
+    vkCmdSetDepthCompareOp(cmd, depthStencilState.depthCompareOp);
+    vkCmdSetDepthTestEnable(cmd, depthStencilState.depthTestEnable);
+    vkCmdSetDepthWriteEnable(cmd, depthStencilState.depthWriteEnable);
+    vkCmdSetStencilTestEnable(cmd, depthStencilState.stencilTestEnable);
+
+    vkCmdSetPrimitiveRestartEnable(cmd, inputAssemblyState.primitiveRestartEnable);
+    vkCmdSetPrimitiveTopology(cmd, inputAssemblyState.topology);
+
+    vkCmdSetRasterizationSamplesEXT(cmd, multisampleState.rasterizationSamples);
+    vkCmdSetSampleMaskEXT(cmd, multisampleState.rasterizationSamples, multisampleState.pSampleMask);
+    vkCmdSetAlphaToCoverageEnableEXT(cmd, multisampleState.alphaToCoverageEnable);
+    vkCmdSetAlphaToOneEnableEXT(cmd, multisampleState.alphaToOneEnable);
+
+    vkCmdSetVertexInputEXT(cmd, vertexInputState.vertexBindingDescriptionCount, vertexBindingDescriptions2.data(),
+                           vertexInputState.vertexAttributeDescriptionCount, vertexAttributeDescriptions2.data());
+
+    vkCmdSetColorBlendEquationEXT(cmd, 0, colorBlendState.attachmentCount, colorBlendEquationState.data());
+    vkCmdSetColorBlendEnableEXT(cmd, 0, colorBlendState.attachmentCount, colorBlendEnables.data());
+    vkCmdSetColorWriteMaskEXT(cmd, 0, colorBlendState.attachmentCount, colorWriteMasks.data());
+    vkCmdSetLogicOpEnableEXT(cmd, colorBlendState.logicOpEnable);
+  }
+};
+
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/profiler_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/profiler_vk.cpp
@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include "profiler_vk.hpp"
+#include "debug_util_vk.hpp"
+#include "error_vk.hpp"
+#include <assert.h>
+
+
+//////////////////////////////////////////////////////////////////////////
+
+namespace nvvk {
+
+void ProfilerVK::init(VkDevice device, VkPhysicalDevice physicalDevice, int queueFamilyIndex)
+{
+  assert(!m_device);
+  m_device = device;
+#if 0
+  m_useCoreHostReset = supportsCoreHostReset;
+#endif
+
+  VkPhysicalDeviceProperties properties;
+  vkGetPhysicalDeviceProperties(physicalDevice, &properties);
+
+  m_frequency = properties.limits.timestampPeriod;
+
+  std::vector<VkQueueFamilyProperties> queueProperties;
+  uint32_t                             queueFamilyCount = 0;
+  vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, nullptr);
+  queueProperties.resize(queueFamilyCount);
+  vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, &queueFamilyCount, queueProperties.data());
+
+  uint32_t validBits = queueProperties[queueFamilyIndex].timestampValidBits;
+
+  m_queueFamilyMask = validBits == 64 ? uint64_t(-1) : ((uint64_t(1) << validBits) - uint64_t(1));
+
+
+  resize();
+}
+
+void ProfilerVK::deinit()
+{
+  if(m_queryPool)
+  {
+    vkDestroyQueryPool(m_device, m_queryPool, nullptr);
+    m_queryPool = VK_NULL_HANDLE;
+  }
+  m_device = VK_NULL_HANDLE;
+}
+
+void ProfilerVK::setLabelUsage(bool state)
+{
+  m_useLabels = state;
+}
+
+void ProfilerVK::resize()
+{
+  if(getRequiredTimers() < m_queryPoolSize)
+    return;
+
+  if(m_queryPool)
+  {
+    // FIXME we may loose results this way
+    // not exactly efficient, but when timers changed a lot, we have a slow frame anyway
+    // cleaner would be allocating more pools
+    VkResult result = vkDeviceWaitIdle(m_device);
+    if(nvvk::checkResult(result, __FILE__, __LINE__))
+    {
+      exit(-1);
+    }
+    vkDestroyQueryPool(m_device, m_queryPool, nullptr);
+  }
+
+  VkQueryPoolCreateInfo createInfo = {VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO};
+  createInfo.queryType             = VK_QUERY_TYPE_TIMESTAMP;
+  createInfo.queryCount            = getRequiredTimers();
+  m_queryPoolSize                  = createInfo.queryCount;
+
+  VkResult res = vkCreateQueryPool(m_device, &createInfo, nullptr, &m_queryPool);
+  assert(res == VK_SUCCESS);
+
+  nvvk::DebugUtil(m_device).setObjectName(m_queryPool, m_debugName);
+}
+
+nvh::Profiler::SectionID ProfilerVK::beginSection(const char* name, VkCommandBuffer cmd, bool singleShot, bool useHostReset)
+{
+  nvh::Profiler::gpuTimeProvider_fn fnProvider = [&](SectionID i, uint32_t queryFrame, double& gpuTime) {
+    return getSectionTime(i, queryFrame, gpuTime);
+  };
+
+
+  SectionID slot = Profiler::beginSection(name, "VK ", fnProvider, singleShot);
+  if(getRequiredTimers() > m_queryPoolSize)
+  {
+    resize();
+  }
+  if(m_useLabels)
+  {
+    VkDebugUtilsLabelEXT label = {VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT};
+    label.pLabelName           = name;
+    label.color[1]             = 1.0f;
+    vkCmdBeginDebugUtilsLabelEXT(cmd, &label);
+  }
+#if 0
+  else if(m_useMarkers)
+  {
+    VkDebugMarkerMarkerInfoEXT marker = {VK_STRUCTURE_TYPE_DEBUG_MARKER_MARKER_INFO_EXT};
+    marker.pMarkerName                = name;
+    vkCmdDebugMarkerBeginEXT(cmd, &marker);
+  }
+#endif
+
+  uint32_t idx = getTimerIdx(slot, getSubFrame(slot), true);
+
+  if(useHostReset)
+  {
+#if 0
+    if(m_useCoreHostReset)
+    {
+      vkResetQueryPool(m_device, m_queryPool, idx, 2);
+    }
+    else
+#endif
+    {
+      vkResetQueryPoolEXT(m_device, m_queryPool, idx, 2);
+    }
+  }
+  else
+  {
+    // not ideal to do this per query
+    vkCmdResetQueryPool(cmd, m_queryPool, idx, 2);
+  }
+  // log timestamp
+  vkCmdWriteTimestamp(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_queryPool, idx);
+
+  return slot;
+}
+
+void ProfilerVK::endSection(SectionID slot, VkCommandBuffer cmd)
+{
+  uint32_t idx = getTimerIdx(slot, getSubFrame(slot), false);
+  vkCmdWriteTimestamp(cmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_queryPool, idx);
+  if(m_useLabels)
+  {
+    vkCmdEndDebugUtilsLabelEXT(cmd);
+  }
+#if 0
+  else if(m_useMarkers)
+  {
+    vkCmdDebugMarkerEndEXT(cmd);
+  }
+#endif
+  Profiler::endSection(slot);
+}
+
+
+bool ProfilerVK::getSectionTime(SectionID i, uint32_t queryFrame, double& gpuTime)
+{
+  bool     isRecurring = isSectionRecurring(i);
+  uint32_t idxBegin    = getTimerIdx(i, queryFrame, true);
+  uint32_t idxEnd      = getTimerIdx(i, queryFrame, false);
+  assert(idxEnd == idxBegin + 1);
+
+  uint64_t times[2];
+  VkResult result = vkGetQueryPoolResults(m_device, m_queryPool, idxBegin, 2, sizeof(uint64_t) * 2, times, sizeof(uint64_t),
+                                          VK_QUERY_RESULT_64_BIT | (isRecurring ? VK_QUERY_RESULT_WAIT_BIT : 0));
+  // validation layer bug, complains if VK_QUERY_RESULT_WAIT_BIT is not provided, even if we wait
+  // through another fence for the buffer containing the problem
+  // fixed in VK SDK fixed with 1.1.126, but we keep old logic still here
+
+  if(result == VK_SUCCESS)
+  {
+    uint64_t mask = m_queueFamilyMask;
+    gpuTime       = (double((times[1] & mask) - (times[0] & mask)) * double(m_frequency)) / double(1000);
+    return true;
+  }
+  else
+  {
+    return false;
+  }
+}
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/profiler_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/profiler_vk.hpp
@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#pragma once
+
+#include "nvh/profiler.hpp"
+#include <string>
+#include <vulkan/vulkan_core.h>
+
+namespace nvvk {
+
+//////////////////////////////////////////////////////////////////////////
+/** @DOC_START
+  # class nvvk::ProfilerVK
+
+  nvvk::ProfilerVK derives from nvh::Profiler and uses vkCmdWriteTimestamp
+  to measure the gpu time within a section.
+
+  If profiler.setLabelUsage(true) was used then it will make use
+  of vkCmdDebugMarkerBeginEXT and vkCmdDebugMarkerEndEXT for each
+  section so that it shows up in tools like NsightGraphics and renderdoc.
+  
+  Currently the commandbuffers must support vkCmdResetQueryPool as well.
+  
+  When multiple queues are used there could be problems with the "nesting"
+  of sections. In that case multiple profilers, one per queue, are most
+  likely better.
+
+
+  Example:
+
+  ```cpp
+  nvvk::ProfilerVK profiler;
+  std::string     profilerStats;
+
+  profiler.init(device, physicalDevice, queueFamilyIndex);
+  profiler.setLabelUsage(true); // depends on VK_EXT_debug_utils
+  
+  while(true)
+  {
+    profiler.beginFrame();
+
+    ... setup frame ...
+
+
+    {
+      // use the Section class to time the scope
+      auto sec = profiler.timeRecurring("draw", cmd);
+
+      vkCmdDraw(cmd, ...);
+    }
+
+    ... submit cmd buffer ...
+
+    profiler.endFrame();
+
+    // generic print to string
+    profiler.print(profilerStats);
+
+    // or access data directly
+    nvh::Profiler::TimerInfo info;
+    if( profiler.getTimerInfo("draw", info)) {
+      // do some updates
+      updateProfilerUi("draw", info.gpu.average);
+    }
+  }
+
+  ```
+@DOC_END */
+
+class ProfilerVK : public nvh::Profiler
+{
+public:
+  // hostReset usage depends on VK_EXT_host_query_reset
+  // mandatory for transfer-only queues
+
+  //////////////////////////////////////////////////////////////////////////
+
+  // utility class to call begin/end within local scope
+  class Section
+  {
+  public:
+    Section(ProfilerVK& profiler, const char* name, VkCommandBuffer cmd, bool singleShot = false, bool hostReset = false)
+        : m_profiler(profiler)
+    {
+      m_id  = profiler.beginSection(name, cmd, singleShot, hostReset);
+      m_cmd = cmd;
+    }
+    ~Section() { m_profiler.endSection(m_id, m_cmd); }
+
+  private:
+    SectionID       m_id;
+    VkCommandBuffer m_cmd;
+    ProfilerVK&     m_profiler;
+  };
+
+  // recurring, must be within beginFrame/endFrame
+  Section timeRecurring(const char* name, VkCommandBuffer cmd, bool hostReset = false)
+  {
+    return Section(*this, name, cmd, false, hostReset);
+  }
+
+  // singleShot, results are available after FRAME_DELAY many endFrame
+  Section timeSingle(const char* name, VkCommandBuffer cmd, bool hostReset = false)
+  {
+    return Section(*this, name, cmd, true, hostReset);
+  }
+
+  //////////////////////////////////////////////////////////////////////////
+
+  ProfilerVK(nvh::Profiler* masterProfiler = nullptr)
+      : Profiler(masterProfiler)
+  {
+    m_debugName = "nvvk::ProfilerVK:" + std::to_string((uint64_t)this);
+  }
+
+  ProfilerVK(VkDevice device, VkPhysicalDevice physicalDevice, nvh::Profiler* masterProfiler = nullptr)
+      : Profiler(masterProfiler)
+  {
+    init(device, physicalDevice);
+  }
+
+  ~ProfilerVK() { deinit(); }
+
+  void init(VkDevice device, VkPhysicalDevice physicalDevice, int queueFamilyIndex = 0);
+  void deinit();
+  void setDebugName(const std::string& name) { m_debugName = name; }
+
+  // enable debug label per section, requires VK_EXT_debug_utils
+  void setLabelUsage(bool state);
+
+  SectionID beginSection(const char* name, VkCommandBuffer cmd, bool singleShot = false, bool hostReset = false);
+  void      endSection(SectionID slot, VkCommandBuffer cmd);
+
+  bool getSectionTime(SectionID i, uint32_t queryFrame, double& gpuTime);
+
+private:
+  void resize();
+  bool m_useLabels = false;
+#if 0
+  bool m_useCoreHostReset = false;
+#endif
+
+  VkDevice    m_device          = VK_NULL_HANDLE;
+  VkQueryPool m_queryPool       = VK_NULL_HANDLE;
+  uint32_t    m_queryPoolSize   = 0;
+  float       m_frequency       = 1.0f;
+  uint64_t    m_queueFamilyMask = ~0;
+  std::string m_debugName;
+};
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/raypicker_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/raypicker_vk.hpp
@ -0,0 +1,417 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#pragma once
+
+
+/** @DOC_START
+  # class nvvk::RayPickerKHR
+
+  nvvk::RayPickerKHR is a utility to get hit information under a screen coordinate. 
+
+  The information returned is: 
+    - origin and direction in world space
+    - hitT, the distance of the hit along the ray direction
+    - primitiveID, instanceID and instanceCustomIndex
+    - the barycentric coordinates in the triangle
+
+  Setting up:
+    - call setup() once with the Vulkan device, and allocator
+    - call setTlas with the TLAS previously build
+  
+  Getting results, for example, on mouse down:
+  - fill the PickInfo structure
+  - call run()
+  - call getResult() to get all the information above
+
+
+  Example to set the camera interest point 
+    ```cpp
+    RayPickerKHR::PickResult pr = m_picker.getResult();
+    if(pr.instanceID != ~0) // Hit something
+    {
+      glm::vec3 worldPos = pr.worldRayOrigin + pr.worldRayDirection * pr.hitT;
+      glm::vec3 eye, center, up;
+      CameraManip.getLookat(eye, center, up);
+      CameraManip.setLookat(eye, worldPos, up, false); // Nice with CameraManip.updateAnim();
+    }
+    ```
+@DOC_END */
+
+#include <glm/glm.hpp>
+#include "nvvk/commands_vk.hpp"
+#include "nvvk/debug_util_vk.hpp"
+#include "nvvk/descriptorsets_vk.hpp"
+#include "nvvk/resourceallocator_vk.hpp"
+#include "nvvk/shaders_vk.hpp"
+#include "nvvk/context_vk.hpp"
+
+namespace nvvk {
+
+struct RayPickerKHR
+{
+public:
+  struct PickInfo
+  {
+    glm::mat4 modelViewInv;    // inverse model view matrix
+    glm::mat4 perspectiveInv;  // inverse perspective matrix
+    float     pickX{0};        // normalized X position
+    float     pickY{0};        // normalized Y position
+  } m_pickInfo;
+
+  struct PickResult
+  {
+    glm::vec4 worldRayOrigin{0, 0, 0, 0};
+    glm::vec4 worldRayDirection{0, 0, 0, 0};
+    float     hitT{0};
+    int       primitiveID{0};
+    int       instanceID{~0};
+    int       instanceCustomIndex{0};
+    glm::vec3 baryCoord{0, 0, 0};
+  };
+
+  RayPickerKHR() = default;
+  RayPickerKHR(nvvk::Context* ctx, nvvk::ResourceAllocator* allocator, uint32_t queueFamilyIndex = 0)
+  {
+    setup(ctx->m_device, ctx->m_physicalDevice, queueFamilyIndex, allocator);
+  }
+
+  void setup(const VkDevice& device, const VkPhysicalDevice& physicalDevice, uint32_t queueFamilyIndex, nvvk::ResourceAllocator* allocator)
+  {
+    m_physicalDevice   = physicalDevice;
+    m_device           = device;
+    m_queueFamilyIndex = queueFamilyIndex;
+    m_debug.setup(device);
+    m_alloc = allocator;
+
+    createOutputResult();
+    createDescriptorSet();
+    createPipeline();
+  }
+
+  // tlas : top acceleration structure
+  void setTlas(const VkAccelerationStructureKHR& tlas)
+  {
+    VkWriteDescriptorSetAccelerationStructureKHR descAsInfo{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR};
+    descAsInfo.accelerationStructureCount = 1;
+    descAsInfo.pAccelerationStructures    = &tlas;
+
+    VkDescriptorBufferInfo            pickDesc{m_pickResult.buffer, 0, VK_WHOLE_SIZE};
+    std::vector<VkWriteDescriptorSet> writes;
+    writes.emplace_back(m_binding.makeWrite(m_descSet, 0, &descAsInfo));
+    writes.emplace_back(m_binding.makeWrite(m_descSet, 1, &pickDesc));
+    vkUpdateDescriptorSets(m_device, static_cast<uint32_t>(writes.size()), writes.data(), 0, nullptr);
+  }
+
+  bool isValid() { return m_pipeline != VK_NULL_HANDLE; }
+
+  void run(const VkCommandBuffer& cmdBuf, const PickInfo& pickInfo)
+  {
+    m_pickInfo = pickInfo;
+
+    vkCmdBindPipeline(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipeline);
+    vkCmdBindDescriptorSets(cmdBuf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipelineLayout, 0, 1, &m_descSet, 0, nullptr);
+    vkCmdPushConstants(cmdBuf, m_pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(PickInfo), &m_pickInfo);
+    vkCmdDispatch(cmdBuf, 1, 1, 1);  // one pixel
+
+
+    // Wait for result
+    VkBufferMemoryBarrier bmb{VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER};
+    bmb.srcAccessMask       = VK_ACCESS_MEMORY_WRITE_BIT;
+    bmb.dstAccessMask       = VK_ACCESS_MEMORY_READ_BIT;
+    bmb.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    bmb.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+    bmb.buffer              = m_pickResult.buffer;
+    bmb.size                = VK_WHOLE_SIZE;
+    vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                         VK_DEPENDENCY_DEVICE_GROUP_BIT, 0, nullptr, 1, &bmb, 0, nullptr);
+  }
+
+  PickResult getResult()
+  {
+    PickResult pr;
+    void*      mapped = m_alloc->map(m_pickResult);
+    memcpy(&pr, mapped, sizeof(PickResult));
+    m_alloc->unmap(m_pickResult);
+    return pr;
+  }
+
+  void destroy()
+  {
+    m_alloc->destroy(m_pickResult);
+    m_alloc->destroy(m_sbtBuffer);
+    vkDestroyDescriptorSetLayout(m_device, m_descSetLayout, nullptr);
+    vkDestroyDescriptorPool(m_device, m_descPool, nullptr);
+    vkDestroyPipeline(m_device, m_pipeline, nullptr);
+    vkDestroyPipelineLayout(m_device, m_pipelineLayout, nullptr);
+
+    m_pickResult     = nvvk::Buffer();
+    m_descSetLayout  = VK_NULL_HANDLE;
+    m_descSet        = VK_NULL_HANDLE;
+    m_pipelineLayout = VK_NULL_HANDLE;
+    m_pipeline       = VK_NULL_HANDLE;
+    m_descPool       = VK_NULL_HANDLE;
+  }
+
+
+private:
+  nvvk::Buffer m_pickResult;
+  nvvk::Buffer m_sbtBuffer;
+
+  nvvk::DescriptorSetBindings m_binding;
+  VkDescriptorPool            m_descPool{VK_NULL_HANDLE};
+  VkDescriptorSetLayout       m_descSetLayout{VK_NULL_HANDLE};
+  VkDescriptorSet             m_descSet{VK_NULL_HANDLE};
+  VkPipelineLayout            m_pipelineLayout{VK_NULL_HANDLE};
+  VkPipeline                  m_pipeline{VK_NULL_HANDLE};
+  VkPhysicalDevice            m_physicalDevice{VK_NULL_HANDLE};
+  VkDevice                    m_device{VK_NULL_HANDLE};
+  uint32_t                    m_queueFamilyIndex{0};
+  nvvk::ResourceAllocator*    m_alloc{nullptr};
+  nvvk::DebugUtil             m_debug;
+
+
+  void createOutputResult()
+  {
+    nvvk::CommandPool sCmd(m_device, m_queueFamilyIndex);
+    VkCommandBuffer   cmdBuf = sCmd.createCommandBuffer();
+    PickResult        presult{};
+    m_pickResult = m_alloc->createBuffer(cmdBuf, sizeof(PickResult), &presult,
+                                         VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+                                         VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+    sCmd.submitAndWait(cmdBuf);
+    m_alloc->finalizeAndReleaseStaging();
+    NAME_VK(m_pickResult.buffer);
+  }
+
+  void createDescriptorSet()
+  {
+    vkDestroyDescriptorSetLayout(m_device, m_descSetLayout, nullptr);
+    vkDestroyDescriptorPool(m_device, m_descPool, nullptr);
+
+    m_binding.clear();
+    m_binding.addBinding(0, VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, 1, VK_SHADER_STAGE_COMPUTE_BIT);
+    m_binding.addBinding(1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT);
+
+    m_descPool      = m_binding.createPool(m_device);
+    m_descSetLayout = m_binding.createLayout(m_device);
+
+
+    VkDescriptorSetAllocateInfo allocateInfo{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
+    allocateInfo.descriptorPool     = m_descPool;
+    allocateInfo.descriptorSetCount = 1;
+    allocateInfo.pSetLayouts        = &m_descSetLayout;
+
+    vkAllocateDescriptorSets(m_device, &allocateInfo, &m_descSet);
+  }
+
+  void createPipeline()
+  {
+    vkDestroyPipeline(m_device, m_pipeline, nullptr);
+    vkDestroyPipelineLayout(m_device, m_pipelineLayout, nullptr);
+
+    VkPushConstantRange        pushConstant{VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(PickInfo)};
+    VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
+    pipelineLayoutCreateInfo.setLayoutCount         = 1;
+    pipelineLayoutCreateInfo.pSetLayouts            = &m_descSetLayout;
+    pipelineLayoutCreateInfo.pushConstantRangeCount = 1;
+    pipelineLayoutCreateInfo.pPushConstantRanges    = &pushConstant;
+    vkCreatePipelineLayout(m_device, &pipelineLayoutCreateInfo, nullptr, &m_pipelineLayout);
+    NAME_VK(m_pipelineLayout);
+
+    VkComputePipelineCreateInfo computePipelineCreateInfo{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
+    computePipelineCreateInfo.layout = m_pipelineLayout;
+    computePipelineCreateInfo.stage  = nvvk::createShaderStageInfo(m_device, getSpirV(), VK_SHADER_STAGE_COMPUTE_BIT);
+    vkCreateComputePipelines(m_device, {}, 1, &computePipelineCreateInfo, nullptr, &m_pipeline);
+    NAME_VK(m_pipeline);
+
+    vkDestroyShaderModule(m_device, computePipelineCreateInfo.stage.module, nullptr);
+  }
+
+
+  const std::vector<uint32_t> getSpirV()
+  {  // glslangValidator.exe --target-env vulkan1.2 --variable-name pick
+    //const uint32_t pick[] =
+    return {0x07230203, 0x00010500, 0x0008000a, 0x00000089, 0x00000000, 0x00020011, 0x00000001, 0x00020011, 0x00001178,
+            0x0006000a, 0x5f565053, 0x5f52484b, 0x5f796172, 0x72657571, 0x00000079, 0x0006000b, 0x00000001, 0x4c534c47,
+            0x6474732e, 0x3035342e, 0x00000000, 0x0003000e, 0x00000000, 0x00000001, 0x0008000f, 0x00000005, 0x00000004,
+            0x6e69616d, 0x00000000, 0x0000000e, 0x00000047, 0x0000005f, 0x00060010, 0x00000004, 0x00000011, 0x00000001,
+            0x00000001, 0x00000001, 0x00030003, 0x00000002, 0x000001cc, 0x00060004, 0x455f4c47, 0x725f5458, 0x715f7961,
+            0x79726575, 0x00000000, 0x00040005, 0x00000004, 0x6e69616d, 0x00000000, 0x00050005, 0x00000009, 0x65786970,
+            0x6e65436c, 0x00726574, 0x00050005, 0x0000000c, 0x736e6f43, 0x746e6174, 0x00000073, 0x00070006, 0x0000000c,
+            0x00000000, 0x65646f6d, 0x6569566c, 0x766e4977, 0x00000000, 0x00070006, 0x0000000c, 0x00000001, 0x73726570,
+            0x74636570, 0x49657669, 0x0000766e, 0x00050006, 0x0000000c, 0x00000002, 0x6b636970, 0x00000058, 0x00050006,
+            0x0000000c, 0x00000003, 0x6b636970, 0x00000059, 0x00030005, 0x0000000e, 0x00000000, 0x00030005, 0x00000018,
+            0x00000064, 0x00040005, 0x00000020, 0x6769726f, 0x00006e69, 0x00040005, 0x00000028, 0x67726174, 0x00007465,
+            0x00050005, 0x00000036, 0x65726964, 0x6f697463, 0x0000006e, 0x00050005, 0x00000044, 0x51796172, 0x79726575,
+            0x00000000, 0x00050005, 0x00000047, 0x4c706f74, 0x6c657665, 0x00005341, 0x00030005, 0x00000058, 0x00746968,
+            0x00050005, 0x0000005c, 0x6b636950, 0x75736552, 0x0000746c, 0x00070006, 0x0000005c, 0x00000000, 0x6c726f77,
+            0x79615264, 0x6769724f, 0x00006e69, 0x00080006, 0x0000005c, 0x00000001, 0x6c726f77, 0x79615264, 0x65726944,
+            0x6f697463, 0x0000006e, 0x00050006, 0x0000005c, 0x00000002, 0x54746968, 0x00000000, 0x00060006, 0x0000005c,
+            0x00000003, 0x6d697270, 0x76697469, 0x00444965, 0x00060006, 0x0000005c, 0x00000004, 0x74736e69, 0x65636e61,
+            0x00004449, 0x00080006, 0x0000005c, 0x00000005, 0x74736e69, 0x65636e61, 0x74737543, 0x6e496d6f, 0x00786564,
+            0x00060006, 0x0000005c, 0x00000006, 0x79726162, 0x726f6f43, 0x00000064, 0x00050005, 0x0000005d, 0x7365725f,
+            0x50746c75, 0x006b6369, 0x00060006, 0x0000005d, 0x00000000, 0x75736572, 0x6950746c, 0x00006b63, 0x00030005,
+            0x0000005f, 0x00000000, 0x00040005, 0x00000079, 0x79726162, 0x00000000, 0x00040048, 0x0000000c, 0x00000000,
+            0x00000005, 0x00050048, 0x0000000c, 0x00000000, 0x00000023, 0x00000000, 0x00050048, 0x0000000c, 0x00000000,
+            0x00000007, 0x00000010, 0x00040048, 0x0000000c, 0x00000001, 0x00000005, 0x00050048, 0x0000000c, 0x00000001,
+            0x00000023, 0x00000040, 0x00050048, 0x0000000c, 0x00000001, 0x00000007, 0x00000010, 0x00050048, 0x0000000c,
+            0x00000002, 0x00000023, 0x00000080, 0x00050048, 0x0000000c, 0x00000003, 0x00000023, 0x00000084, 0x00030047,
+            0x0000000c, 0x00000002, 0x00040047, 0x00000047, 0x00000022, 0x00000000, 0x00040047, 0x00000047, 0x00000021,
+            0x00000000, 0x00050048, 0x0000005c, 0x00000000, 0x00000023, 0x00000000, 0x00050048, 0x0000005c, 0x00000001,
+            0x00000023, 0x00000010, 0x00050048, 0x0000005c, 0x00000002, 0x00000023, 0x00000020, 0x00050048, 0x0000005c,
+            0x00000003, 0x00000023, 0x00000024, 0x00050048, 0x0000005c, 0x00000004, 0x00000023, 0x00000028, 0x00050048,
+            0x0000005c, 0x00000005, 0x00000023, 0x0000002c, 0x00050048, 0x0000005c, 0x00000006, 0x00000023, 0x00000030,
+            0x00050048, 0x0000005d, 0x00000000, 0x00000023, 0x00000000, 0x00030047, 0x0000005d, 0x00000002, 0x00040047,
+            0x0000005f, 0x00000022, 0x00000000, 0x00040047, 0x0000005f, 0x00000021, 0x00000001, 0x00020013, 0x00000002,
+            0x00030021, 0x00000003, 0x00000002, 0x00030016, 0x00000006, 0x00000020, 0x00040017, 0x00000007, 0x00000006,
+            0x00000002, 0x00040020, 0x00000008, 0x00000007, 0x00000007, 0x00040017, 0x0000000a, 0x00000006, 0x00000004,
+            0x00040018, 0x0000000b, 0x0000000a, 0x00000004, 0x0006001e, 0x0000000c, 0x0000000b, 0x0000000b, 0x00000006,
+            0x00000006, 0x00040020, 0x0000000d, 0x00000009, 0x0000000c, 0x0004003b, 0x0000000d, 0x0000000e, 0x00000009,
+            0x00040015, 0x0000000f, 0x00000020, 0x00000001, 0x0004002b, 0x0000000f, 0x00000010, 0x00000002, 0x00040020,
+            0x00000011, 0x00000009, 0x00000006, 0x0004002b, 0x0000000f, 0x00000014, 0x00000003, 0x0004002b, 0x00000006,
+            0x0000001a, 0x40000000, 0x0004002b, 0x00000006, 0x0000001c, 0x3f800000, 0x00040020, 0x0000001f, 0x00000007,
+            0x0000000a, 0x0004002b, 0x0000000f, 0x00000021, 0x00000000, 0x00040020, 0x00000022, 0x00000009, 0x0000000b,
+            0x0004002b, 0x00000006, 0x00000025, 0x00000000, 0x0007002c, 0x0000000a, 0x00000026, 0x00000025, 0x00000025,
+            0x00000025, 0x0000001c, 0x0004002b, 0x0000000f, 0x00000029, 0x00000001, 0x00040015, 0x0000002c, 0x00000020,
+            0x00000000, 0x0004002b, 0x0000002c, 0x0000002d, 0x00000000, 0x00040020, 0x0000002e, 0x00000007, 0x00000006,
+            0x0004002b, 0x0000002c, 0x00000031, 0x00000001, 0x00040017, 0x00000039, 0x00000006, 0x00000003, 0x00021178,
+            0x00000042, 0x00040020, 0x00000043, 0x00000007, 0x00000042, 0x000214dd, 0x00000045, 0x00040020, 0x00000046,
+            0x00000000, 0x00000045, 0x0004003b, 0x00000046, 0x00000047, 0x00000000, 0x0004002b, 0x0000002c, 0x00000049,
+            0x000000ff, 0x0004002b, 0x00000006, 0x0000004c, 0x3727c5ac, 0x0004002b, 0x00000006, 0x0000004f, 0x749dc5ae,
+            0x00020014, 0x00000055, 0x00040020, 0x00000057, 0x00000007, 0x00000055, 0x00030029, 0x00000055, 0x00000059,
+            0x0009001e, 0x0000005c, 0x0000000a, 0x0000000a, 0x00000006, 0x0000000f, 0x0000000f, 0x0000000f, 0x00000039,
+            0x0003001e, 0x0000005d, 0x0000005c, 0x00040020, 0x0000005e, 0x0000000c, 0x0000005d, 0x0004003b, 0x0000005e,
+            0x0000005f, 0x0000000c, 0x00040020, 0x00000061, 0x0000000c, 0x0000000a, 0x00040020, 0x00000066, 0x0000000c,
+            0x00000006, 0x00040020, 0x00000069, 0x0000000c, 0x0000000f, 0x0004002b, 0x0000000f, 0x0000006b, 0x00000004,
+            0x00040020, 0x0000006d, 0x00000007, 0x0000000f, 0x0004002b, 0x0000000f, 0x00000073, 0xffffffff, 0x0004002b,
+            0x0000000f, 0x00000076, 0x00000005, 0x0004002b, 0x0000000f, 0x0000007b, 0x00000006, 0x00040020, 0x00000087,
+            0x0000000c, 0x00000039, 0x00050036, 0x00000002, 0x00000004, 0x00000000, 0x00000003, 0x000200f8, 0x00000005,
+            0x0004003b, 0x00000008, 0x00000009, 0x00000007, 0x0004003b, 0x00000008, 0x00000018, 0x00000007, 0x0004003b,
+            0x0000001f, 0x00000020, 0x00000007, 0x0004003b, 0x0000001f, 0x00000028, 0x00000007, 0x0004003b, 0x0000001f,
+            0x00000036, 0x00000007, 0x0004003b, 0x00000043, 0x00000044, 0x00000007, 0x0004003b, 0x00000057, 0x00000058,
+            0x00000007, 0x0004003b, 0x0000006d, 0x0000006e, 0x00000007, 0x0004003b, 0x00000008, 0x00000079, 0x00000007,
+            0x00050041, 0x00000011, 0x00000012, 0x0000000e, 0x00000010, 0x0004003d, 0x00000006, 0x00000013, 0x00000012,
+            0x00050041, 0x00000011, 0x00000015, 0x0000000e, 0x00000014, 0x0004003d, 0x00000006, 0x00000016, 0x00000015,
+            0x00050050, 0x00000007, 0x00000017, 0x00000013, 0x00000016, 0x0003003e, 0x00000009, 0x00000017, 0x0004003d,
+            0x00000007, 0x00000019, 0x00000009, 0x0005008e, 0x00000007, 0x0000001b, 0x00000019, 0x0000001a, 0x00050050,
+            0x00000007, 0x0000001d, 0x0000001c, 0x0000001c, 0x00050083, 0x00000007, 0x0000001e, 0x0000001b, 0x0000001d,
+            0x0003003e, 0x00000018, 0x0000001e, 0x00050041, 0x00000022, 0x00000023, 0x0000000e, 0x00000021, 0x0004003d,
+            0x0000000b, 0x00000024, 0x00000023, 0x00050091, 0x0000000a, 0x00000027, 0x00000024, 0x00000026, 0x0003003e,
+            0x00000020, 0x00000027, 0x00050041, 0x00000022, 0x0000002a, 0x0000000e, 0x00000029, 0x0004003d, 0x0000000b,
+            0x0000002b, 0x0000002a, 0x00050041, 0x0000002e, 0x0000002f, 0x00000018, 0x0000002d, 0x0004003d, 0x00000006,
+            0x00000030, 0x0000002f, 0x00050041, 0x0000002e, 0x00000032, 0x00000018, 0x00000031, 0x0004003d, 0x00000006,
+            0x00000033, 0x00000032, 0x00070050, 0x0000000a, 0x00000034, 0x00000030, 0x00000033, 0x0000001c, 0x0000001c,
+            0x00050091, 0x0000000a, 0x00000035, 0x0000002b, 0x00000034, 0x0003003e, 0x00000028, 0x00000035, 0x00050041,
+            0x00000022, 0x00000037, 0x0000000e, 0x00000021, 0x0004003d, 0x0000000b, 0x00000038, 0x00000037, 0x0004003d,
+            0x0000000a, 0x0000003a, 0x00000028, 0x0008004f, 0x00000039, 0x0000003b, 0x0000003a, 0x0000003a, 0x00000000,
+            0x00000001, 0x00000002, 0x0006000c, 0x00000039, 0x0000003c, 0x00000001, 0x00000045, 0x0000003b, 0x00050051,
+            0x00000006, 0x0000003d, 0x0000003c, 0x00000000, 0x00050051, 0x00000006, 0x0000003e, 0x0000003c, 0x00000001,
+            0x00050051, 0x00000006, 0x0000003f, 0x0000003c, 0x00000002, 0x00070050, 0x0000000a, 0x00000040, 0x0000003d,
+            0x0000003e, 0x0000003f, 0x00000025, 0x00050091, 0x0000000a, 0x00000041, 0x00000038, 0x00000040, 0x0003003e,
+            0x00000036, 0x00000041, 0x0004003d, 0x00000045, 0x00000048, 0x00000047, 0x0004003d, 0x0000000a, 0x0000004a,
+            0x00000020, 0x0008004f, 0x00000039, 0x0000004b, 0x0000004a, 0x0000004a, 0x00000000, 0x00000001, 0x00000002,
+            0x0004003d, 0x0000000a, 0x0000004d, 0x00000036, 0x0008004f, 0x00000039, 0x0000004e, 0x0000004d, 0x0000004d,
+            0x00000000, 0x00000001, 0x00000002, 0x00091179, 0x00000044, 0x00000048, 0x0000002d, 0x00000049, 0x0000004b,
+            0x0000004c, 0x0000004e, 0x0000004f, 0x000200f9, 0x00000050, 0x000200f8, 0x00000050, 0x000400f6, 0x00000052,
+            0x00000053, 0x00000000, 0x000200f9, 0x00000054, 0x000200f8, 0x00000054, 0x0004117d, 0x00000055, 0x00000056,
+            0x00000044, 0x000400fa, 0x00000056, 0x00000051, 0x00000052, 0x000200f8, 0x00000051, 0x0002117c, 0x00000044,
+            0x000200f9, 0x00000053, 0x000200f8, 0x00000053, 0x000200f9, 0x00000050, 0x000200f8, 0x00000052, 0x0005117f,
+            0x0000002c, 0x0000005a, 0x00000044, 0x00000029, 0x000500ab, 0x00000055, 0x0000005b, 0x0000005a, 0x0000002d,
+            0x0003003e, 0x00000058, 0x0000005b, 0x0004003d, 0x0000000a, 0x00000060, 0x00000020, 0x00060041, 0x00000061,
+            0x00000062, 0x0000005f, 0x00000021, 0x00000021, 0x0003003e, 0x00000062, 0x00000060, 0x0004003d, 0x0000000a,
+            0x00000063, 0x00000036, 0x00060041, 0x00000061, 0x00000064, 0x0000005f, 0x00000021, 0x00000029, 0x0003003e,
+            0x00000064, 0x00000063, 0x00051782, 0x00000006, 0x00000065, 0x00000044, 0x00000029, 0x00060041, 0x00000066,
+            0x00000067, 0x0000005f, 0x00000021, 0x00000010, 0x0003003e, 0x00000067, 0x00000065, 0x00051787, 0x0000000f,
+            0x00000068, 0x00000044, 0x00000029, 0x00060041, 0x00000069, 0x0000006a, 0x0000005f, 0x00000021, 0x00000014,
+            0x0003003e, 0x0000006a, 0x00000068, 0x0004003d, 0x00000055, 0x0000006c, 0x00000058, 0x000300f7, 0x00000070,
+            0x00000000, 0x000400fa, 0x0000006c, 0x0000006f, 0x00000072, 0x000200f8, 0x0000006f, 0x00051784, 0x0000000f,
+            0x00000071, 0x00000044, 0x00000029, 0x0003003e, 0x0000006e, 0x00000071, 0x000200f9, 0x00000070, 0x000200f8,
+            0x00000072, 0x0003003e, 0x0000006e, 0x00000073, 0x000200f9, 0x00000070, 0x000200f8, 0x00000070, 0x0004003d,
+            0x0000000f, 0x00000074, 0x0000006e, 0x00060041, 0x00000069, 0x00000075, 0x0000005f, 0x00000021, 0x0000006b,
+            0x0003003e, 0x00000075, 0x00000074, 0x00051783, 0x0000000f, 0x00000077, 0x00000044, 0x00000029, 0x00060041,
+            0x00000069, 0x00000078, 0x0000005f, 0x00000021, 0x00000076, 0x0003003e, 0x00000078, 0x00000077, 0x00051788,
+            0x00000007, 0x0000007a, 0x00000044, 0x00000029, 0x0003003e, 0x00000079, 0x0000007a, 0x00050041, 0x0000002e,
+            0x0000007c, 0x00000079, 0x0000002d, 0x0004003d, 0x00000006, 0x0000007d, 0x0000007c, 0x00050083, 0x00000006,
+            0x0000007e, 0x0000001c, 0x0000007d, 0x00050041, 0x0000002e, 0x0000007f, 0x00000079, 0x00000031, 0x0004003d,
+            0x00000006, 0x00000080, 0x0000007f, 0x00050083, 0x00000006, 0x00000081, 0x0000007e, 0x00000080, 0x00050041,
+            0x0000002e, 0x00000082, 0x00000079, 0x0000002d, 0x0004003d, 0x00000006, 0x00000083, 0x00000082, 0x00050041,
+            0x0000002e, 0x00000084, 0x00000079, 0x00000031, 0x0004003d, 0x00000006, 0x00000085, 0x00000084, 0x00060050,
+            0x00000039, 0x00000086, 0x00000081, 0x00000083, 0x00000085, 0x00060041, 0x00000087, 0x00000088, 0x0000005f,
+            0x00000021, 0x0000007b, 0x0003003e, 0x00000088, 0x00000086, 0x000100fd, 0x00010038};
+  }
+
+  std::string getGlsl()
+  {
+    return R"(
+#version 460
+#extension GL_EXT_ray_query : require
+
+// clang-format off
+struct PickResult
+{
+  vec4  worldRayOrigin;
+  vec4  worldRayDirection;
+  float hitT;
+  int   primitiveID;
+  int   instanceID;
+  int   instanceCustomIndex;
+  vec3  baryCoord;
+};
+
+layout(set = 0, binding = 0) uniform accelerationStructureEXT topLevelAS;
+layout(set = 0, binding = 1) buffer _resultPick { PickResult resultPick; };
+layout(push_constant) uniform Constants
+{
+  mat4  modelViewInv;
+  mat4  perspectiveInv;
+  float pickX;  // normalized
+  float pickY;
+};
+
+void main()
+{
+  const vec2 pixelCenter = vec2(pickX, pickY);
+  vec2       d           = pixelCenter * 2.0 - 1.0;
+  vec4 origin            = modelViewInv * vec4(0, 0, 0, 1);
+  vec4 target            = perspectiveInv * vec4(d.x, d.y, 1, 1);
+  vec4 direction         = modelViewInv * vec4(normalize(target.xyz), 0);
+
+  rayQueryEXT rayQuery;
+  rayQueryInitializeEXT(rayQuery, topLevelAS, 0, 0xff, origin.xyz, 0.00001, direction.xyz, 1e32);
+  while(rayQueryProceedEXT(rayQuery)) {rayQueryConfirmIntersectionEXT(rayQuery); }
+
+  bool hit = (rayQueryGetIntersectionTypeEXT(rayQuery, true) != gl_RayQueryCommittedIntersectionNoneEXT);
+  resultPick.worldRayOrigin      = origin;
+  resultPick.worldRayDirection   = direction;
+  resultPick.hitT                = rayQueryGetIntersectionTEXT(rayQuery, true);
+  resultPick.primitiveID         = rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, true);
+  resultPick.instanceID          = hit ? rayQueryGetIntersectionInstanceIdEXT(rayQuery, true) : ~0;
+  resultPick.instanceCustomIndex = rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, true);
+  vec2 bary                      = rayQueryGetIntersectionBarycentricsEXT(rayQuery, true);
+  resultPick.baryCoord           = vec3(1.0 - bary.x - bary.y, bary.x, bary.y);
+}
+// clang-format on
+)";
+  }
+};
+
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/raytraceKHR_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/raytraceKHR_vk.cpp
@ -0,0 +1,429 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "raytraceKHR_vk.hpp"
+#include <cinttypes>
+#include <numeric>
+#include "nvh/timesampler.hpp"
+
+//--------------------------------------------------------------------------------------------------
+// Initializing the allocator and querying the raytracing properties
+//
+void nvvk::RaytracingBuilderKHR::setup(const VkDevice& device, nvvk::ResourceAllocator* allocator, uint32_t queueIndex)
+{
+  m_device     = device;
+  m_queueIndex = queueIndex;
+  m_debug.setup(device);
+  m_alloc = allocator;
+}
+
+//--------------------------------------------------------------------------------------------------
+// Destroying all allocations
+//
+void nvvk::RaytracingBuilderKHR::destroy()
+{
+  if(m_alloc)
+  {
+    for(auto& b : m_blas)
+    {
+      m_alloc->destroy(b);
+    }
+    m_alloc->destroy(m_tlas);
+  }
+  m_blas.clear();
+}
+
+//--------------------------------------------------------------------------------------------------
+// Returning the constructed top-level acceleration structure
+//
+VkAccelerationStructureKHR nvvk::RaytracingBuilderKHR::getAccelerationStructure() const
+{
+  return m_tlas.accel;
+}
+
+//--------------------------------------------------------------------------------------------------
+// Return the device address of a Blas previously created.
+//
+VkDeviceAddress nvvk::RaytracingBuilderKHR::getBlasDeviceAddress(uint32_t blasId)
+{
+  assert(size_t(blasId) < m_blas.size());
+  VkAccelerationStructureDeviceAddressInfoKHR addressInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR};
+  addressInfo.accelerationStructure = m_blas[blasId].accel;
+  return vkGetAccelerationStructureDeviceAddressKHR(m_device, &addressInfo);
+}
+
+//--------------------------------------------------------------------------------------------------
+// Create all the BLAS from the vector of BlasInput
+// - There will be one BLAS per input-vector entry
+// - There will be as many BLAS as input.size()
+// - The resulting BLAS (along with the inputs used to build) are stored in m_blas,
+//   and can be referenced by index.
+// - if flag has the 'Compact' flag, the BLAS will be compacted
+//
+void nvvk::RaytracingBuilderKHR::buildBlas(const std::vector<BlasInput>& input, VkBuildAccelerationStructureFlagsKHR flags)
+{
+  m_cmdPool.init(m_device, m_queueIndex);
+  auto         nbBlas = static_cast<uint32_t>(input.size());
+  VkDeviceSize asTotalSize{0};     // Memory size of all allocated BLAS
+  uint32_t     nbCompactions{0};   // Nb of BLAS requesting compaction
+  VkDeviceSize maxScratchSize{0};  // Largest scratch size
+
+  // Preparing the information for the acceleration build commands.
+  std::vector<BuildAccelerationStructure> buildAs(nbBlas);
+  for(uint32_t idx = 0; idx < nbBlas; idx++)
+  {
+    // Filling partially the VkAccelerationStructureBuildGeometryInfoKHR for querying the build sizes.
+    // Other information will be filled in the createBlas (see #2)
+    buildAs[idx].buildInfo.type          = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
+    buildAs[idx].buildInfo.mode          = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
+    buildAs[idx].buildInfo.flags         = input[idx].flags | flags;
+    buildAs[idx].buildInfo.geometryCount = static_cast<uint32_t>(input[idx].asGeometry.size());
+    buildAs[idx].buildInfo.pGeometries   = input[idx].asGeometry.data();
+
+    // Build range information
+    buildAs[idx].rangeInfo = input[idx].asBuildOffsetInfo.data();
+
+    // Finding sizes to create acceleration structures and scratch
+    std::vector<uint32_t> maxPrimCount(input[idx].asBuildOffsetInfo.size());
+    for(auto tt = 0; tt < input[idx].asBuildOffsetInfo.size(); tt++)
+      maxPrimCount[tt] = input[idx].asBuildOffsetInfo[tt].primitiveCount;  // Number of primitives/triangles
+    vkGetAccelerationStructureBuildSizesKHR(m_device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,
+                                            &buildAs[idx].buildInfo, maxPrimCount.data(), &buildAs[idx].sizeInfo);
+
+    // Extra info
+    asTotalSize += buildAs[idx].sizeInfo.accelerationStructureSize;
+    maxScratchSize = std::max(maxScratchSize, buildAs[idx].sizeInfo.buildScratchSize);
+    nbCompactions += hasFlag(buildAs[idx].buildInfo.flags, VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR);
+  }
+
+  // Allocate the scratch buffers holding the temporary data of the acceleration structure builder
+  nvvk::Buffer scratchBuffer =
+      m_alloc->createBuffer(maxScratchSize, VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
+  VkBufferDeviceAddressInfo bufferInfo{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, nullptr, scratchBuffer.buffer};
+  VkDeviceAddress           scratchAddress = vkGetBufferDeviceAddress(m_device, &bufferInfo);
+  NAME_VK(scratchBuffer.buffer);
+
+  // Allocate a query pool for storing the needed size for every BLAS compaction.
+  VkQueryPool queryPool{VK_NULL_HANDLE};
+  if(nbCompactions > 0)  // Is compaction requested?
+  {
+    assert(nbCompactions == nbBlas);  // Don't allow mix of on/off compaction
+    VkQueryPoolCreateInfo qpci{VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO};
+    qpci.queryCount = nbBlas;
+    qpci.queryType  = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR;
+    vkCreateQueryPool(m_device, &qpci, nullptr, &queryPool);
+  }
+
+  // Batching creation/compaction of BLAS to allow staying in restricted amount of memory
+  std::vector<uint32_t> indices;  // Indices of the BLAS to create
+  VkDeviceSize          batchSize{0};
+  VkDeviceSize          batchLimit{256'000'000};  // 256 MB
+  for(uint32_t idx = 0; idx < nbBlas; idx++)
+  {
+    indices.push_back(idx);
+    batchSize += buildAs[idx].sizeInfo.accelerationStructureSize;
+    // Over the limit or last BLAS element
+    if(batchSize >= batchLimit || idx == nbBlas - 1)
+    {
+      VkCommandBuffer cmdBuf = m_cmdPool.createCommandBuffer();
+      cmdCreateBlas(cmdBuf, indices, buildAs, scratchAddress, queryPool);
+      m_cmdPool.submitAndWait(cmdBuf);
+
+      if(queryPool)
+      {
+        VkCommandBuffer cmdBuf = m_cmdPool.createCommandBuffer();
+        cmdCompactBlas(cmdBuf, indices, buildAs, queryPool);
+        m_cmdPool.submitAndWait(cmdBuf);  // Submit command buffer and call vkQueueWaitIdle
+
+        // Destroy the non-compacted version
+        destroyNonCompacted(indices, buildAs);
+      }
+      // Reset
+
+      batchSize = 0;
+      indices.clear();
+    }
+  }
+
+  // Logging reduction
+  if(queryPool)
+  {
+    VkDeviceSize compactSize = std::accumulate(buildAs.begin(), buildAs.end(), 0ULL, [](const auto& a, const auto& b) {
+      return a + b.sizeInfo.accelerationStructureSize;
+    });
+    const float  fractionSmaller = (asTotalSize == 0) ? 0 : (asTotalSize - compactSize) / float(asTotalSize);
+    LOGI("%sRT BLAS: reducing from: %" PRIu64 " to: %" PRIu64 " = %" PRIu64 " (%2.2f%s smaller) \n",
+         nvh::ScopedTimer::indent().c_str(), asTotalSize, compactSize, asTotalSize - compactSize, fractionSmaller * 100.f, "%");
+  }
+
+  // Keeping all the created acceleration structures
+  for(auto& b : buildAs)
+  {
+    m_blas.emplace_back(b.as);
+  }
+
+  // Clean up
+  vkDestroyQueryPool(m_device, queryPool, nullptr);
+  m_alloc->finalizeAndReleaseStaging();
+  m_alloc->destroy(scratchBuffer);
+  m_cmdPool.deinit();
+}
+
+
+//--------------------------------------------------------------------------------------------------
+// Creating the bottom level acceleration structure for all indices of `buildAs` vector.
+// The array of BuildAccelerationStructure was created in buildBlas and the vector of
+// indices limits the number of BLAS to create at once. This limits the amount of
+// memory needed when compacting the BLAS.
+void nvvk::RaytracingBuilderKHR::cmdCreateBlas(VkCommandBuffer                          cmdBuf,
+                                               std::vector<uint32_t>                    indices,
+                                               std::vector<BuildAccelerationStructure>& buildAs,
+                                               VkDeviceAddress                          scratchAddress,
+                                               VkQueryPool                              queryPool)
+{
+  if(queryPool)  // For querying the compaction size
+    vkResetQueryPool(m_device, queryPool, 0, static_cast<uint32_t>(indices.size()));
+  uint32_t queryCnt{0};
+
+  for(const auto& idx : indices)
+  {
+    // Actual allocation of buffer and acceleration structure.
+    VkAccelerationStructureCreateInfoKHR createInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR};
+    createInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
+    createInfo.size = buildAs[idx].sizeInfo.accelerationStructureSize;  // Will be used to allocate memory.
+    buildAs[idx].as = m_alloc->createAcceleration(createInfo);
+    NAME_IDX_VK(buildAs[idx].as.accel, idx);
+    NAME_IDX_VK(buildAs[idx].as.buffer.buffer, idx);
+
+    // BuildInfo #2 part
+    buildAs[idx].buildInfo.dstAccelerationStructure  = buildAs[idx].as.accel;  // Setting where the build lands
+    buildAs[idx].buildInfo.scratchData.deviceAddress = scratchAddress;  // All build are using the same scratch buffer
+
+    // Building the bottom-level-acceleration-structure
+    vkCmdBuildAccelerationStructuresKHR(cmdBuf, 1, &buildAs[idx].buildInfo, &buildAs[idx].rangeInfo);
+
+    // Since the scratch buffer is reused across builds, we need a barrier to ensure one build
+    // is finished before starting the next one.
+    VkMemoryBarrier barrier{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
+    barrier.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
+    barrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
+    vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
+                         VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, 0, 1, &barrier, 0, nullptr, 0, nullptr);
+
+    if(queryPool)
+    {
+      // Add a query to find the 'real' amount of memory needed, use for compaction
+      vkCmdWriteAccelerationStructuresPropertiesKHR(cmdBuf, 1, &buildAs[idx].buildInfo.dstAccelerationStructure,
+                                                    VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, queryPool, queryCnt++);
+    }
+  }
+}
+
+//--------------------------------------------------------------------------------------------------
+// Create and replace a new acceleration structure and buffer based on the size retrieved by the
+// Query.
+void nvvk::RaytracingBuilderKHR::cmdCompactBlas(VkCommandBuffer                          cmdBuf,
+                                                std::vector<uint32_t>                    indices,
+                                                std::vector<BuildAccelerationStructure>& buildAs,
+                                                VkQueryPool                              queryPool)
+{
+  uint32_t queryCtn{0};
+
+  // Get the compacted size result back
+  std::vector<VkDeviceSize> compactSizes(static_cast<uint32_t>(indices.size()));
+  vkGetQueryPoolResults(m_device, queryPool, 0, (uint32_t)compactSizes.size(), compactSizes.size() * sizeof(VkDeviceSize),
+                        compactSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_WAIT_BIT);
+
+  for(auto idx : indices)
+  {
+    buildAs[idx].cleanupAS                          = buildAs[idx].as;           // previous AS to destroy
+    buildAs[idx].sizeInfo.accelerationStructureSize = compactSizes[queryCtn++];  // new reduced size
+
+    // Creating a compact version of the AS
+    VkAccelerationStructureCreateInfoKHR asCreateInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR};
+    asCreateInfo.size = buildAs[idx].sizeInfo.accelerationStructureSize;
+    asCreateInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
+    buildAs[idx].as   = m_alloc->createAcceleration(asCreateInfo);
+    NAME_IDX_VK(buildAs[idx].as.accel, idx);
+    NAME_IDX_VK(buildAs[idx].as.buffer.buffer, idx);
+
+    // Copy the original BLAS to a compact version
+    VkCopyAccelerationStructureInfoKHR copyInfo{VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR};
+    copyInfo.src  = buildAs[idx].buildInfo.dstAccelerationStructure;
+    copyInfo.dst  = buildAs[idx].as.accel;
+    copyInfo.mode = VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR;
+    vkCmdCopyAccelerationStructureKHR(cmdBuf, &copyInfo);
+  }
+}
+
+//--------------------------------------------------------------------------------------------------
+// Destroy all the non-compacted acceleration structures
+//
+void nvvk::RaytracingBuilderKHR::destroyNonCompacted(std::vector<uint32_t> indices, std::vector<BuildAccelerationStructure>& buildAs)
+{
+  for(auto& i : indices)
+  {
+    m_alloc->destroy(buildAs[i].cleanupAS);
+  }
+}
+
+void nvvk::RaytracingBuilderKHR::buildTlas(const std::vector<VkAccelerationStructureInstanceKHR>& instances,
+                                           VkBuildAccelerationStructureFlagsKHR flags /*= VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR*/,
+                                           bool update /*= false*/)
+{
+  buildTlas(instances, flags, update, false);
+}
+
+#ifdef VK_NV_ray_tracing_motion_blur
+void nvvk::RaytracingBuilderKHR::buildTlas(const std::vector<VkAccelerationStructureMotionInstanceNV>& instances,
+                                           VkBuildAccelerationStructureFlagsKHR flags /*= VK_BUILD_ACCELERATION_STRUCTURE_MOTION_BIT_NV*/,
+                                           bool update /*= false*/)
+{
+  buildTlas(instances, flags, update, true);
+}
+#endif
+
+//--------------------------------------------------------------------------------------------------
+// Low level of Tlas creation - see buildTlas
+//
+void nvvk::RaytracingBuilderKHR::cmdCreateTlas(VkCommandBuffer                      cmdBuf,
+                                               uint32_t                             countInstance,
+                                               VkDeviceAddress                      instBufferAddr,
+                                               nvvk::Buffer&                        scratchBuffer,
+                                               VkBuildAccelerationStructureFlagsKHR flags,
+                                               bool                                 update,
+                                               bool                                 motion)
+{
+  // Wraps a device pointer to the above uploaded instances.
+  VkAccelerationStructureGeometryInstancesDataKHR instancesVk{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR};
+  instancesVk.data.deviceAddress = instBufferAddr;
+
+  // Put the above into a VkAccelerationStructureGeometryKHR. We need to put the instances struct in a union and label it as instance data.
+  VkAccelerationStructureGeometryKHR topASGeometry{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR};
+  topASGeometry.geometryType       = VK_GEOMETRY_TYPE_INSTANCES_KHR;
+  topASGeometry.geometry.instances = instancesVk;
+
+  // Find sizes
+  VkAccelerationStructureBuildGeometryInfoKHR buildInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR};
+  buildInfo.flags         = flags;
+  buildInfo.geometryCount = 1;
+  buildInfo.pGeometries   = &topASGeometry;
+  buildInfo.mode = update ? VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR : VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
+  buildInfo.type                     = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
+  buildInfo.srcAccelerationStructure = VK_NULL_HANDLE;
+
+  VkAccelerationStructureBuildSizesInfoKHR sizeInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR};
+  vkGetAccelerationStructureBuildSizesKHR(m_device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &buildInfo,
+                                          &countInstance, &sizeInfo);
+
+#ifdef VK_NV_ray_tracing_motion_blur
+  VkAccelerationStructureMotionInfoNV motionInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MOTION_INFO_NV};
+  motionInfo.maxInstances = countInstance;
+#endif
+
+  // Create TLAS
+  if(update == false)
+  {
+
+    VkAccelerationStructureCreateInfoKHR createInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR};
+    createInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
+    createInfo.size = sizeInfo.accelerationStructureSize;
+#ifdef VK_NV_ray_tracing_motion_blur
+    if(motion)
+    {
+      createInfo.createFlags = VK_ACCELERATION_STRUCTURE_CREATE_MOTION_BIT_NV;
+      createInfo.pNext       = &motionInfo;
+    }
+#endif
+
+    m_tlas = m_alloc->createAcceleration(createInfo);
+    NAME_VK(m_tlas.accel);
+    NAME_VK(m_tlas.buffer.buffer);
+  }
+
+  // Allocate the scratch memory
+  scratchBuffer = m_alloc->createBuffer(sizeInfo.buildScratchSize,
+                                        VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
+
+  VkBufferDeviceAddressInfo bufferInfo{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, nullptr, scratchBuffer.buffer};
+  VkDeviceAddress           scratchAddress = vkGetBufferDeviceAddress(m_device, &bufferInfo);
+  NAME_VK(scratchBuffer.buffer);
+
+  // Update build information
+  buildInfo.srcAccelerationStructure  = update ? m_tlas.accel : VK_NULL_HANDLE;
+  buildInfo.dstAccelerationStructure  = m_tlas.accel;
+  buildInfo.scratchData.deviceAddress = scratchAddress;
+
+  // Build Offsets info: n instances
+  VkAccelerationStructureBuildRangeInfoKHR        buildOffsetInfo{countInstance, 0, 0, 0};
+  const VkAccelerationStructureBuildRangeInfoKHR* pBuildOffsetInfo = &buildOffsetInfo;
+
+  // Build the TLAS
+  vkCmdBuildAccelerationStructuresKHR(cmdBuf, 1, &buildInfo, &pBuildOffsetInfo);
+}
+
+//--------------------------------------------------------------------------------------------------
+// Refit BLAS number blasIdx from updated buffer contents.
+//
+void nvvk::RaytracingBuilderKHR::updateBlas(uint32_t blasIdx, BlasInput& blas, VkBuildAccelerationStructureFlagsKHR flags)
+{
+  assert(size_t(blasIdx) < m_blas.size());
+
+  // Preparing all build information, acceleration is filled later
+  VkAccelerationStructureBuildGeometryInfoKHR buildInfos{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR};
+  buildInfos.flags                    = flags;
+  buildInfos.geometryCount            = (uint32_t)blas.asGeometry.size();
+  buildInfos.pGeometries              = blas.asGeometry.data();
+  buildInfos.mode                     = VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR;  // UPDATE
+  buildInfos.type                     = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
+  buildInfos.srcAccelerationStructure = m_blas[blasIdx].accel;  // UPDATE
+  buildInfos.dstAccelerationStructure = m_blas[blasIdx].accel;
+
+  // Find size to build on the device
+  std::vector<uint32_t> maxPrimCount(blas.asBuildOffsetInfo.size());
+  for(auto tt = 0; tt < blas.asBuildOffsetInfo.size(); tt++)
+    maxPrimCount[tt] = blas.asBuildOffsetInfo[tt].primitiveCount;  // Number of primitives/triangles
+  VkAccelerationStructureBuildSizesInfoKHR sizeInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR};
+  vkGetAccelerationStructureBuildSizesKHR(m_device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &buildInfos,
+                                          maxPrimCount.data(), &sizeInfo);
+
+  // Allocate the scratch buffer and setting the scratch info
+  nvvk::Buffer scratchBuffer =
+      m_alloc->createBuffer(sizeInfo.buildScratchSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
+  VkBufferDeviceAddressInfo bufferInfo{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO};
+  bufferInfo.buffer                    = scratchBuffer.buffer;
+  buildInfos.scratchData.deviceAddress = vkGetBufferDeviceAddress(m_device, &bufferInfo);
+  NAME_VK(scratchBuffer.buffer);
+
+  std::vector<const VkAccelerationStructureBuildRangeInfoKHR*> pBuildOffset(blas.asBuildOffsetInfo.size());
+  for(size_t i = 0; i < blas.asBuildOffsetInfo.size(); i++)
+    pBuildOffset[i] = &blas.asBuildOffsetInfo[i];
+
+  // Update the instance buffer on the device side and build the TLAS
+  nvvk::CommandPool genCmdBuf(m_device, m_queueIndex);
+  VkCommandBuffer   cmdBuf = genCmdBuf.createCommandBuffer();
+
+
+  // Update the acceleration structure. Note the VK_TRUE parameter to trigger the update,
+  // and the existing BLAS being passed and updated in place
+  vkCmdBuildAccelerationStructuresKHR(cmdBuf, 1, &buildInfos, pBuildOffset.data());
+
+  genCmdBuf.submitAndWait(cmdBuf);
+  m_alloc->destroy(scratchBuffer);
+}
--- a/raytracer/nvpro_core/nvvk/raytraceKHR_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/raytraceKHR_vk.hpp
@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+/** @DOC_START
+
+# class nvvk::RaytracingBuilderKHR
+
+>  nvvk::RaytracingBuilderKHR is a base functionality of raytracing
+
+This class acts as an owning container for a single top-level acceleration
+structure referencing any number of bottom-level acceleration structures.
+We provide functions for building (on the device) an array of BLASs and a
+single TLAS from vectors of BlasInput and Instance, respectively, and
+a destroy function for cleaning up the created acceleration structures.
+
+Generally, we reference BLASs by their index in the stored BLAS array,
+rather than using raw device pointers as the pure Vulkan acceleration
+structure API uses.
+
+This class does not support replacing acceleration structures once
+built, but you can update the acceleration structures. For educational
+purposes, this class prioritizes (relative) understandability over
+performance, so vkQueueWaitIdle is implicitly used everywhere.
+
+# Setup and Usage
+```cpp
+// Borrow a VkDevice and memory allocator pointer (must remain
+// valid throughout our use of the ray trace builder), and
+// instantiate an unspecified queue of the given family for use.
+m_rtBuilder.setup(device, memoryAllocator, queueIndex);
+
+// You create a vector of RayTracingBuilderKHR::BlasInput then
+// pass it to buildBlas.
+std::vector<RayTracingBuilderKHR::BlasInput> inputs = // ...
+m_rtBuilder.buildBlas(inputs);
+
+// You create a vector of RaytracingBuilder::Instance and pass to
+// buildTlas. The blasId member of each instance must be below
+// inputs.size() (above).
+std::vector<VkAccelerationStructureInstanceKHR> instances = // ...
+m_rtBuilder.buildTlas(instances);
+
+// Retrieve the handle to the acceleration structure.
+const VkAccelerationStructureKHR tlas = m.rtBuilder.getAccelerationStructure()
+```
+@DOC_END */
+
+#include <mutex>
+#include <vulkan/vulkan_core.h>
+
+#if VK_KHR_acceleration_structure
+
+#include "resourceallocator_vk.hpp"
+#include "commands_vk.hpp"  // this is only needed here to satisfy some samples that rely on it
+#include "debug_util_vk.hpp"
+#include "nvh/nvprint.hpp"  // this is only needed here to satisfy some samples that rely on it
+#include <glm/glm.hpp>
+#include <type_traits>
+
+
+namespace nvvk {
+
+// Convert a Mat4x4 to the matrix required by acceleration structures
+inline VkTransformMatrixKHR toTransformMatrixKHR(glm::mat4 matrix)
+{
+  // VkTransformMatrixKHR uses a row-major memory layout, while glm::mat4
+  // uses a column-major memory layout. We transpose the matrix so we can
+  // memcpy the matrix's data directly.
+  glm::mat4            temp = glm::transpose(matrix);
+  VkTransformMatrixKHR out_matrix;
+  memcpy(&out_matrix, &temp, sizeof(VkTransformMatrixKHR));
+  return out_matrix;
+}
+
+// Ray tracing BLAS and TLAS builder
+class RaytracingBuilderKHR
+{
+public:
+  // Inputs used to build Bottom-level acceleration structure.
+  // You manage the lifetime of the buffer(s) referenced by the VkAccelerationStructureGeometryKHRs within.
+  // In particular, you must make sure they are still valid and not being modified when the BLAS is built or updated.
+  struct BlasInput
+  {
+    // Data used to build acceleration structure geometry
+    std::vector<VkAccelerationStructureGeometryKHR>       asGeometry;
+    std::vector<VkAccelerationStructureBuildRangeInfoKHR> asBuildOffsetInfo;
+    VkBuildAccelerationStructureFlagsKHR                  flags{0};
+  };
+
+  // Initializing the allocator and querying the raytracing properties
+  void setup(const VkDevice& device, nvvk::ResourceAllocator* allocator, uint32_t queueIndex);
+
+  // Destroying all allocations
+  void destroy();
+
+  // Returning the constructed top-level acceleration structure
+  VkAccelerationStructureKHR getAccelerationStructure() const;
+
+  // Return the Acceleration Structure Device Address of a BLAS Id
+  VkDeviceAddress getBlasDeviceAddress(uint32_t blasId);
+
+  // Create all the BLAS from the vector of BlasInput
+  void buildBlas(const std::vector<BlasInput>& input,
+                 VkBuildAccelerationStructureFlagsKHR flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR);
+
+  // Refit BLAS number blasIdx from updated buffer contents.
+  void updateBlas(uint32_t blasIdx, BlasInput& blas, VkBuildAccelerationStructureFlagsKHR flags);
+
+  // Build TLAS for static acceleration structures
+  void buildTlas(const std::vector<VkAccelerationStructureInstanceKHR>& instances,
+                 VkBuildAccelerationStructureFlagsKHR flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR,
+                 bool                                 update = false);
+
+#ifdef VK_NV_ray_tracing_motion_blur
+  // Build TLAS for mix of motion and static acceleration structures
+  void buildTlas(const std::vector<VkAccelerationStructureMotionInstanceNV>& instances,
+                 VkBuildAccelerationStructureFlagsKHR flags  = VK_BUILD_ACCELERATION_STRUCTURE_MOTION_BIT_NV,
+                 bool                                 update = false);
+#endif
+
+  // Build TLAS from an array of VkAccelerationStructureInstanceKHR
+  // - Use motion=true with VkAccelerationStructureMotionInstanceNV
+  // - The resulting TLAS will be stored in m_tlas
+  // - update is to rebuild the Tlas with updated matrices, flag must have the 'allow_update'
+  template <class T>
+  void buildTlas(const std::vector<T>&                instances,
+                 VkBuildAccelerationStructureFlagsKHR flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR,
+                 bool                                 update = false,
+                 bool                                 motion = false)
+  {
+    // Cannot call buildTlas twice except to update.
+    assert(m_tlas.accel == VK_NULL_HANDLE || update);
+    uint32_t countInstance = static_cast<uint32_t>(instances.size());
+
+    // Command buffer to create the TLAS
+    nvvk::CommandPool genCmdBuf(m_device, m_queueIndex);
+    VkCommandBuffer   cmdBuf = genCmdBuf.createCommandBuffer();
+
+    // Create a buffer holding the actual instance data (matrices++) for use by the AS builder
+    nvvk::Buffer instancesBuffer;  // Buffer of instances containing the matrices and BLAS ids
+    instancesBuffer = m_alloc->createBuffer(cmdBuf, instances,
+                                            VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT
+                                                | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR);
+    NAME_VK(instancesBuffer.buffer);
+    VkBufferDeviceAddressInfo bufferInfo{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, nullptr, instancesBuffer.buffer};
+    VkDeviceAddress           instBufferAddr = vkGetBufferDeviceAddress(m_device, &bufferInfo);
+
+    // Make sure the copy of the instance buffer are copied before triggering the acceleration structure build
+    VkMemoryBarrier barrier{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
+    barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+    barrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
+    vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
+                         0, 1, &barrier, 0, nullptr, 0, nullptr);
+
+    // Creating the TLAS
+    nvvk::Buffer scratchBuffer;
+    cmdCreateTlas(cmdBuf, countInstance, instBufferAddr, scratchBuffer, flags, update, motion);
+
+    // Finalizing and destroying temporary data
+    genCmdBuf.submitAndWait(cmdBuf);  // queueWaitIdle inside.
+    m_alloc->finalizeAndReleaseStaging();
+    m_alloc->destroy(scratchBuffer);
+    m_alloc->destroy(instancesBuffer);
+  }
+
+  // Creating the TLAS, called by buildTlas
+  void cmdCreateTlas(VkCommandBuffer                      cmdBuf,          // Command buffer
+                     uint32_t                             countInstance,   // number of instances
+                     VkDeviceAddress                      instBufferAddr,  // Buffer address of instances
+                     nvvk::Buffer&                        scratchBuffer,   // Scratch buffer for construction
+                     VkBuildAccelerationStructureFlagsKHR flags,           // Build creation flag
+                     bool                                 update,          // Update == animation
+                     bool                                 motion           // Motion Blur
+  );
+
+
+protected:
+  std::vector<nvvk::AccelKHR> m_blas;  // Bottom-level acceleration structure
+  nvvk::AccelKHR              m_tlas;  // Top-level acceleration structure
+
+  // Setup
+  VkDevice                 m_device{VK_NULL_HANDLE};
+  uint32_t                 m_queueIndex{0};
+  nvvk::ResourceAllocator* m_alloc{nullptr};
+  nvvk::DebugUtil          m_debug;
+  nvvk::CommandPool        m_cmdPool;
+
+  struct BuildAccelerationStructure
+  {
+    VkAccelerationStructureBuildGeometryInfoKHR buildInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR};
+    VkAccelerationStructureBuildSizesInfoKHR sizeInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR};
+    const VkAccelerationStructureBuildRangeInfoKHR* rangeInfo;
+    nvvk::AccelKHR                                  as;  // result acceleration structure
+    nvvk::AccelKHR                                  cleanupAS;
+  };
+
+
+  void cmdCreateBlas(VkCommandBuffer                          cmdBuf,
+                     std::vector<uint32_t>                    indices,
+                     std::vector<BuildAccelerationStructure>& buildAs,
+                     VkDeviceAddress                          scratchAddress,
+                     VkQueryPool                              queryPool);
+  void cmdCompactBlas(VkCommandBuffer cmdBuf, std::vector<uint32_t> indices, std::vector<BuildAccelerationStructure>& buildAs, VkQueryPool queryPool);
+  void destroyNonCompacted(std::vector<uint32_t> indices, std::vector<BuildAccelerationStructure>& buildAs);
+  bool hasFlag(VkFlags item, VkFlags flag) { return (item & flag) == flag; }
+};
+
+}  // namespace nvvk
+
+#else
+#error This include requires VK_KHR_acceleration_structure support in the Vulkan SDK.
+#endif
--- a/raytracer/nvpro_core/nvvk/raytraceNV_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/raytraceNV_vk.cpp
@ -0,0 +1,347 @@
+#include "raytraceNV_vk.hpp"
+#include <cinttypes>
+
+void nvvk::RaytracingBuilderNV::setup(VkDevice device, nvvk::ResourceAllocator* allocator, uint32_t queueIndex)
+{
+  m_device     = device;
+  m_queueIndex = queueIndex;
+  m_debug.setup(device);
+  m_alloc = allocator;
+}
+
+void nvvk::RaytracingBuilderNV::destroy()
+{
+  for(auto& b : m_blas)
+  {
+    m_alloc->destroy(b.as);
+  }
+  m_alloc->destroy(m_tlas.as);
+  m_alloc->destroy(m_instBuffer);
+}
+
+VkAccelerationStructureNV nvvk::RaytracingBuilderNV::getAccelerationStructure() const
+{
+  return m_tlas.as.accel;
+}
+
+void nvvk::RaytracingBuilderNV::buildBlas(const std::vector<std::vector<VkGeometryNV>>& geoms, VkBuildAccelerationStructureFlagsNV flags)
+{
+  m_blas.resize(geoms.size());
+
+  VkDeviceSize maxScratch{0};
+
+  // Is compaction requested?
+  bool doCompaction = (flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_NV)
+                      == VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_NV;
+  std::vector<VkDeviceSize> originalSizes;
+  originalSizes.resize(m_blas.size());
+
+
+  // Iterate over the groups of geometries, creating one BLAS for each group
+  for(size_t i = 0; i < geoms.size(); i++)
+  {
+    Blas& blas{m_blas[i]};
+
+    // Set the geometries that will be part of the BLAS
+    blas.asInfo.geometryCount = static_cast<uint32_t>(geoms[i].size());
+    blas.asInfo.pGeometries   = geoms[i].data();
+    blas.asInfo.flags         = flags;
+    VkAccelerationStructureCreateInfoNV createinfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NV};
+    createinfo.info = blas.asInfo;
+
+    // Create an acceleration structure identifier and allocate memory to store the
+    // resulting structure data
+    blas.as = m_alloc->createAcceleration(createinfo);
+    m_debug.setObjectName(blas.as.accel, (std::string("Blas" + std::to_string(i)).c_str()));
+
+    // Estimate the amount of scratch memory required to build the BLAS, and update the
+    // size of the scratch buffer that will be allocated to sequentially build all BLASes
+    VkAccelerationStructureMemoryRequirementsInfoNV memoryRequirementsInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV};
+    memoryRequirementsInfo.type                  = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BUILD_SCRATCH_NV;
+    memoryRequirementsInfo.accelerationStructure = blas.as.accel;
+
+
+    VkMemoryRequirements2 reqMem{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+    vkGetAccelerationStructureMemoryRequirementsNV(m_device, &memoryRequirementsInfo, &reqMem);
+    VkDeviceSize scratchSize = reqMem.memoryRequirements.size;
+
+
+    // Original size
+    memoryRequirementsInfo.type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV;
+    vkGetAccelerationStructureMemoryRequirementsNV(m_device, &memoryRequirementsInfo, &reqMem);
+    originalSizes[i] = reqMem.memoryRequirements.size;
+
+    maxScratch = std::max(maxScratch, scratchSize);
+  }
+
+  // Allocate the scratch buffers holding the temporary data of the acceleration structure builder
+  nvvk::Buffer scratchBuffer = m_alloc->createBuffer(maxScratch, VK_BUFFER_USAGE_RAY_TRACING_BIT_NV);
+
+
+  // Query size of compact BLAS
+  VkQueryPoolCreateInfo qpci{VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO};
+  qpci.queryCount = (uint32_t)m_blas.size();
+  qpci.queryType  = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_NV;
+  VkQueryPool queryPool;
+  vkCreateQueryPool(m_device, &qpci, nullptr, &queryPool);
+
+
+  // Create a command buffer containing all the BLAS builds
+  nvvk::CommandPool            genCmdBuf(m_device, m_queueIndex);
+  int                          ctr{0};
+  std::vector<VkCommandBuffer> allCmdBufs;
+  allCmdBufs.reserve(m_blas.size());
+  for(auto& blas : m_blas)
+  {
+    VkCommandBuffer cmdBuf = genCmdBuf.createCommandBuffer();
+    allCmdBufs.push_back(cmdBuf);
+
+    vkCmdBuildAccelerationStructureNV(cmdBuf, &blas.asInfo, nullptr, 0, VK_FALSE, blas.as.accel, nullptr, scratchBuffer.buffer, 0);
+
+    // Since the scratch buffer is reused across builds, we need a barrier to ensure one build
+    // is finished before starting the next one
+    VkMemoryBarrier barrier{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
+    barrier.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_NV;
+    barrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NV;
+    vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV,
+                         VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV, 0, 1, &barrier, 0, nullptr, 0, nullptr);
+
+    // Query the compact size
+    if(doCompaction)
+    {
+      vkCmdWriteAccelerationStructuresPropertiesNV(cmdBuf, 1, &blas.as.accel,
+                                                   VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_NV, queryPool, ctr++);
+    }
+  }
+  genCmdBuf.submitAndWait(allCmdBufs);
+  allCmdBufs.clear();
+
+
+  // Compacting all BLAS
+  if(doCompaction)
+  {
+    VkCommandBuffer cmdBuf = genCmdBuf.createCommandBuffer();
+
+    // Get the size result back
+    std::vector<VkDeviceSize> compactSizes(m_blas.size());
+    vkGetQueryPoolResults(m_device, queryPool, 0, (uint32_t)compactSizes.size(), compactSizes.size() * sizeof(VkDeviceSize),
+                          compactSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_WAIT_BIT);
+
+
+    // Compacting
+    std::vector<nvvk::AccelNV> cleanupAS(m_blas.size());
+    uint32_t                   totOriginalSize{0}, totCompactSize{0};
+    for(int i = 0; i < m_blas.size(); i++)
+    {
+      LOGI("Reducing %i, from %" PRIu64 " to %" PRIu64 " \n", i, originalSizes[i], compactSizes[i]);
+      totOriginalSize += (uint32_t)originalSizes[i];
+      totCompactSize += (uint32_t)compactSizes[i];
+
+      // Creating a compact version of the AS
+      VkAccelerationStructureInfoNV asInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV};
+      asInfo.type  = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV;
+      asInfo.flags = flags;
+      VkAccelerationStructureCreateInfoNV asCreateInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NV};
+      asCreateInfo.compactedSize = compactSizes[i];
+      asCreateInfo.info          = asInfo;
+      auto as                    = m_alloc->createAcceleration(asCreateInfo);
+
+      // Copy the original BLAS to a compact version
+      vkCmdCopyAccelerationStructureNV(cmdBuf, as.accel, m_blas[i].as.accel, VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_NV);
+
+      cleanupAS[i] = m_blas[i].as;
+      m_blas[i].as = as;
+    }
+    genCmdBuf.submitAndWait(cmdBuf);
+
+    // Destroying the previous version
+    for(auto as : cleanupAS)
+      m_alloc->destroy(as);
+
+    LOGI("------------------\n");
+    const float fractionSmaller = (totOriginalSize == 0) ? 0 : (totOriginalSize - totCompactSize) / float(totOriginalSize);
+    LOGI("Total: %d -> %d = %d (%2.2f%s smaller) \n", totOriginalSize, totCompactSize, totOriginalSize - totCompactSize,
+         fractionSmaller * 100.f, "%%");
+  }
+
+  vkDestroyQueryPool(m_device, queryPool, nullptr);
+  m_alloc->destroy(scratchBuffer);
+  m_alloc->finalizeAndReleaseStaging();
+}
+
+VkGeometryInstanceNV nvvk::RaytracingBuilderNV::instanceToVkGeometryInstanceNV(const nvvk::RaytracingBuilderNV::Instance& instance)
+{
+  Blas& blas{m_blas[instance.blasId]};
+  // For each BLAS, fetch the acceleration structure handle that will allow the builder to
+  // directly access it from the device
+  uint64_t asHandle = 0;
+  vkGetAccelerationStructureHandleNV(m_device, blas.as.accel, sizeof(uint64_t), &asHandle);
+
+  VkGeometryInstanceNV gInst{};
+  // The matrices for the instance transforms are row-major, instead of column-major in the
+  // rest of the application
+  glm::mat4 transp = glm::transpose(instance.transform);
+  // The gInst.transform value only contains 12 values, corresponding to a 4x3 matrix, hence
+  // saving the last row that is anyway always (0,0,0,1). Since the matrix is row-major,
+  // we simply copy the first 12 values of the original 4x4 matrix
+  memcpy(gInst.transform, &transp, sizeof(gInst.transform));
+  gInst.instanceId                  = instance.instanceId;
+  gInst.mask                        = instance.mask;
+  gInst.hitGroupId                  = instance.hitGroupId;
+  gInst.flags                       = static_cast<uint32_t>(instance.flags);
+  gInst.accelerationStructureHandle = asHandle;
+
+  return gInst;
+}
+
+void nvvk::RaytracingBuilderNV::buildTlas(const std::vector<nvvk::RaytracingBuilderNV::Instance>& instances,
+                                          VkBuildAccelerationStructureFlagsNV                     flags)
+{
+  // Set the instance count required to determine how much memory the TLAS will use
+  m_tlas.asInfo.instanceCount = static_cast<uint32_t>(instances.size());
+  m_tlas.asInfo.flags         = flags;
+  VkAccelerationStructureCreateInfoNV accelerationStructureInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NV};
+  accelerationStructureInfo.info = m_tlas.asInfo;
+  // Create the acceleration structure object and allocate the memory required to hold the TLAS data
+  m_tlas.as = m_alloc->createAcceleration(accelerationStructureInfo);
+  m_debug.setObjectName(m_tlas.as.accel, "Tlas");
+
+  // Compute the amount of scratch memory required by the acceleration structure builder
+  VkAccelerationStructureMemoryRequirementsInfoNV memoryRequirementsInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV};
+  memoryRequirementsInfo.type                  = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BUILD_SCRATCH_NV;
+  memoryRequirementsInfo.accelerationStructure = m_tlas.as.accel;
+
+  VkMemoryRequirements2 reqMem{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+  vkGetAccelerationStructureMemoryRequirementsNV(m_device, &memoryRequirementsInfo, &reqMem);
+  VkDeviceSize scratchSize = reqMem.memoryRequirements.size;
+
+
+  // Allocate the scratch memory
+  nvvk::Buffer scratchBuffer = m_alloc->createBuffer(scratchSize, VK_BUFFER_USAGE_RAY_TRACING_BIT_NV);
+
+  // For each instance, build the corresponding instance descriptor
+  std::vector<VkGeometryInstanceNV> geometryInstances;
+  geometryInstances.reserve(instances.size());
+  for(const auto& inst : instances)
+  {
+    geometryInstances.push_back(instanceToVkGeometryInstanceNV(inst));
+  }
+
+  // Building the TLAS
+  nvvk::CommandPool genCmdBuf(m_device, m_queueIndex);
+  VkCommandBuffer   cmdBuf = genCmdBuf.createCommandBuffer();
+
+  // Allocate the instance buffer and copy its contents from host to device memory
+  m_instBuffer = m_alloc->createBuffer(cmdBuf, geometryInstances, VK_BUFFER_USAGE_RAY_TRACING_BIT_NV);
+  m_debug.setObjectName(m_instBuffer.buffer, "TLASInstances");
+
+  // Make sure the copy of the instance buffer are copied before triggering the
+  // acceleration structure build
+  VkMemoryBarrier barrier{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
+  barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+  barrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
+  vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV, 0,
+                       1, &barrier, 0, nullptr, 0, nullptr);
+
+
+  // Build the TLAS
+  vkCmdBuildAccelerationStructureNV(cmdBuf, &m_tlas.asInfo, m_instBuffer.buffer, 0, VK_FALSE, m_tlas.as.accel, nullptr,
+                                    scratchBuffer.buffer, 0);
+
+
+  genCmdBuf.submitAndWait(cmdBuf);
+
+  m_alloc->finalizeAndReleaseStaging();
+  m_alloc->destroy(scratchBuffer);
+}
+
+void nvvk::RaytracingBuilderNV::updateTlasMatrices(const std::vector<nvvk::RaytracingBuilderNV::Instance>& instances)
+{
+  VkDeviceSize bufferSize = instances.size() * sizeof(VkGeometryInstanceNV);
+  // Create a staging buffer on the host to upload the new instance data
+  nvvk::Buffer stagingBuffer = m_alloc->createBuffer(bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+#if defined(NVVK_ALLOC_VMA)
+                                                     VmaMemoryUsage::VMA_MEMORY_USAGE_CPU_TO_GPU
+#else
+                                                     VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
+#endif
+  );
+
+  // Copy the instance data into the staging buffer
+  auto* gInst = reinterpret_cast<VkGeometryInstanceNV*>(m_alloc->map(stagingBuffer));
+  for(int i = 0; i < instances.size(); i++)
+  {
+    gInst[i] = instanceToVkGeometryInstanceNV(instances[i]);
+  }
+  m_alloc->unmap(stagingBuffer);
+
+  // Compute the amount of scratch memory required by the AS builder to update the TLAS
+  VkAccelerationStructureMemoryRequirementsInfoNV memoryRequirementsInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV};
+  memoryRequirementsInfo.type                  = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_UPDATE_SCRATCH_NV;
+  memoryRequirementsInfo.accelerationStructure = m_tlas.as.accel;
+
+  VkMemoryRequirements2 reqMem{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+  vkGetAccelerationStructureMemoryRequirementsNV(m_device, &memoryRequirementsInfo, &reqMem);
+  VkDeviceSize scratchSize = reqMem.memoryRequirements.size;
+
+
+  // Allocate the scratch buffer
+  nvvk::Buffer scratchBuffer = m_alloc->createBuffer(scratchSize, VK_BUFFER_USAGE_RAY_TRACING_BIT_NV);
+
+  // Update the instance buffer on the device side and build the TLAS
+  nvvk::CommandPool genCmdBuf(m_device, m_queueIndex);
+  VkCommandBuffer   cmdBuf = genCmdBuf.createCommandBuffer();
+
+  VkBufferCopy region{0, 0, bufferSize};
+  vkCmdCopyBuffer(cmdBuf, stagingBuffer.buffer, m_instBuffer.buffer, 1, &region);
+
+  // Make sure the copy of the instance buffer are copied before triggering the
+  // acceleration structure build
+  VkMemoryBarrier barrier{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
+  barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+  barrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
+  vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV, 0,
+                       1, &barrier, 0, nullptr, 0, nullptr);
+
+
+  // Update the acceleration structure. Note the VK_TRUE parameter to trigger the update,
+  // and the existing TLAS being passed and updated in place
+  vkCmdBuildAccelerationStructureNV(cmdBuf, &m_tlas.asInfo, m_instBuffer.buffer, 0, VK_TRUE, m_tlas.as.accel,
+                                    m_tlas.as.accel, scratchBuffer.buffer, 0);
+
+
+  genCmdBuf.submitAndWait(cmdBuf);
+
+  m_alloc->destroy(scratchBuffer);
+  m_alloc->destroy(stagingBuffer);
+}
+
+void nvvk::RaytracingBuilderNV::updateBlas(uint32_t blasIdx)
+{
+  Blas& blas = m_blas[blasIdx];
+
+  // Compute the amount of scratch memory required by the AS builder to update the TLAS
+  VkAccelerationStructureMemoryRequirementsInfoNV memoryRequirementsInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV};
+  memoryRequirementsInfo.type                  = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_UPDATE_SCRATCH_NV;
+  memoryRequirementsInfo.accelerationStructure = blas.as.accel;
+
+  VkMemoryRequirements2 reqMem{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+  vkGetAccelerationStructureMemoryRequirementsNV(m_device, &memoryRequirementsInfo, &reqMem);
+  VkDeviceSize scratchSize = reqMem.memoryRequirements.size;
+
+  // Allocate the scratch buffer
+  nvvk::Buffer scratchBuffer = m_alloc->createBuffer(scratchSize, VK_BUFFER_USAGE_RAY_TRACING_BIT_NV);
+
+  // Update the instance buffer on the device side and build the TLAS
+  nvvk::CommandPool genCmdBuf(m_device, m_queueIndex);
+  VkCommandBuffer   cmdBuf = genCmdBuf.createCommandBuffer();
+
+
+  // Update the acceleration structure. Note the VK_TRUE parameter to trigger the update,
+  // and the existing BLAS being passed and updated in place
+  vkCmdBuildAccelerationStructureNV(cmdBuf, &blas.asInfo, nullptr, 0, VK_TRUE, blas.as.accel, blas.as.accel,
+                                    scratchBuffer.buffer, 0);
+
+  genCmdBuf.submitAndWait(cmdBuf);
+  m_alloc->destroy(scratchBuffer);
+}
--- a/raytracer/nvpro_core/nvvk/raytraceNV_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/raytraceNV_vk.hpp
@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+/* @DOC_START
+
+# class nvvk::RaytracingBuilderNV
+
+>  nvvk::RaytracingBuilderNV is a base functionality of raytracing
+
+This class does not implement all what you need to do raytracing, but
+helps creating the BLAS and TLAS, which then can be used by different
+raytracing usage.
+
+# Setup and Usage
+```cpp
+m_rtBuilder.setup(device, memoryAllocator, queueIndex);
+// Create array of VkGeometryNV
+m_rtBuilder.buildBlas(allBlas);
+// Create array of RaytracingBuilder::instance
+m_rtBuilder.buildTlas(instances);
+// Retrieve the acceleration structure
+const VkAccelerationStructureNV& tlas = m.rtBuilder.getAccelerationStructure()
+```
+@DOC_END */
+
+
+#include <mutex>
+#include <vulkan/vulkan_core.h>
+
+#if VK_NV_ray_tracing
+
+#include "resourceallocator_vk.hpp"
+#include "commands_vk.hpp"  // this is only needed here to satisfy some samples that rely on it
+#include "debug_util_vk.hpp"
+#include "nvh/nvprint.hpp"  // this is only needed here to satisfy some samples that rely on it
+#include <glm/glm.hpp>
+
+// See https://www.khronos.org/registry/vulkan/specs/1.1-extensions/html/chap33.html#acceleration-structure
+struct VkGeometryInstanceNV
+{
+  /// Transform matrix, containing only the top 3 rows
+  float transform[12];
+  /// Instance index
+  uint32_t instanceId : 24;
+  /// Visibility mask
+  uint32_t mask : 8;
+  /// Index of the hit group which will be invoked when a ray hits the instance
+  uint32_t hitGroupId : 24;
+  /// Instance flags, such as culling
+  uint32_t flags : 8;
+  /// Opaque handle of the bottom-level acceleration structure
+  uint64_t accelerationStructureHandle;
+};
+
+namespace nvvk {
+class RaytracingBuilderNV
+{
+public:
+  RaytracingBuilderNV(RaytracingBuilderNV const&)            = delete;
+  RaytracingBuilderNV& operator=(RaytracingBuilderNV const&) = delete;
+
+  RaytracingBuilderNV() = default;
+
+  //--------------------------------------------------------------------------------------------------
+  // Initializing the allocator and querying the raytracing properties
+  //
+  void setup(VkDevice device, nvvk::ResourceAllocator* allocator, uint32_t queueIndex);
+
+  // This is an instance of a BLAS
+  struct Instance
+  {
+    uint32_t                  blasId{0};      // Index of the BLAS in m_blas
+    uint32_t                  instanceId{0};  // Instance Index (gl_InstanceID)
+    uint32_t                  hitGroupId{0};  // Hit group index in the SBT
+    uint32_t                  mask{0xFF};     // Visibility mask, will be AND-ed with ray mask
+    VkGeometryInstanceFlagsNV flags = VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_DISABLE_BIT_NV;
+    glm::mat4                 transform{glm::mat4(1)};  // Identity
+  };
+
+
+  //--------------------------------------------------------------------------------------------------
+  // Destroying all allocations
+  //
+  void destroy();
+
+  // Returning the constructed top-level acceleration structure
+  VkAccelerationStructureNV getAccelerationStructure() const;
+
+  //--------------------------------------------------------------------------------------------------
+  // Create all the BLAS from the vector of vectors of VkGeometryNV
+  // - There will be one BLAS per vector of VkGeometryNV
+  // - There will be as many BLAS there are items in the geoms vector
+  // - The resulting BLAS are stored in m_blas
+  //
+  void buildBlas(const std::vector<std::vector<VkGeometryNV>>& geoms,
+                 VkBuildAccelerationStructureFlagsNV flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_NV);
+
+  //--------------------------------------------------------------------------------------------------
+  // Convert an Instance object into a VkGeometryInstanceNV
+
+  VkGeometryInstanceNV instanceToVkGeometryInstanceNV(const Instance& instance);
+
+  //--------------------------------------------------------------------------------------------------
+  // Creating the top-level acceleration structure from the vector of Instance
+  // - See struct of Instance
+  // - The resulting TLAS will be stored in m_tlas
+  //
+  void buildTlas(const std::vector<Instance>&        instances,
+                 VkBuildAccelerationStructureFlagsNV flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_NV);
+
+  //--------------------------------------------------------------------------------------------------
+  // Refit the TLAS using new instance matrices
+  //
+  void updateTlasMatrices(const std::vector<Instance>& instances);
+
+  //--------------------------------------------------------------------------------------------------
+  // Refit the BLAS from updated buffers
+  //
+  void updateBlas(uint32_t blasIdx);
+
+private:
+  // Bottom-level acceleration structure
+  struct Blas
+  {
+    nvvk::AccelNV                 as;
+    VkAccelerationStructureInfoNV asInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV, nullptr,
+                                         VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV};
+    VkGeometryNV                  geometry;
+  };
+
+  // Top-level acceleration structure
+  struct Tlas
+  {
+    nvvk::AccelNV                 as;
+    VkAccelerationStructureInfoNV asInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV, nullptr,
+                                         VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV};
+  };
+
+  //--------------------------------------------------------------------------------------------------
+  // Vector containing all the BLASes built and referenced by the TLAS
+  std::vector<Blas> m_blas;
+  // Top-level acceleration structure
+  Tlas m_tlas;
+  // Instance buffer containing the matrices and BLAS ids
+  nvvk::Buffer m_instBuffer;
+
+  VkDevice m_device;
+  uint32_t m_queueIndex{0};
+
+  nvvk::ResourceAllocator* m_alloc = nullptr;
+  nvvk::DebugUtil          m_debug;
+};
+
+}  // namespace nvvk
+
+#else
+#error This include requires VK_NV_ray_tracing support in the Vulkan SDK.
+#endif
--- a/raytracer/nvpro_core/nvvk/renderpasses_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/renderpasses_vk.cpp
@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include "renderpasses_vk.hpp"
+#include "error_vk.hpp"
+#include <assert.h>
+
+namespace nvvk {
+
+VkFormat findSupportedFormat(VkPhysicalDevice physicalDevice, const std::vector<VkFormat>& candidates, VkImageTiling tiling, VkFormatFeatureFlags features)
+{
+  for(VkFormat format : candidates)
+  {
+    VkFormatProperties props;
+    vkGetPhysicalDeviceFormatProperties(physicalDevice, format, &props);
+
+    if(tiling == VK_IMAGE_TILING_LINEAR && (props.linearTilingFeatures & features) == features)
+    {
+      return format;
+    }
+
+    if(tiling == VK_IMAGE_TILING_OPTIMAL && (props.optimalTilingFeatures & features) == features)
+    {
+      return format;
+    }
+  }
+
+  assert(0 && "failed to find supported format!");
+
+  return VK_FORMAT_UNDEFINED;
+}
+
+VkFormat findDepthFormat(VkPhysicalDevice physicalDevice)
+{
+  return findSupportedFormat(physicalDevice,
+                             {VK_FORMAT_X8_D24_UNORM_PACK32, VK_FORMAT_D24_UNORM_S8_UINT, VK_FORMAT_D32_SFLOAT,
+                              VK_FORMAT_D32_SFLOAT_S8_UINT, VK_FORMAT_D16_UNORM, VK_FORMAT_D16_UNORM_S8_UINT},
+                             VK_IMAGE_TILING_OPTIMAL, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT);
+}
+
+VkFormat findDepthStencilFormat(VkPhysicalDevice physicalDevice)
+{
+  return findSupportedFormat(physicalDevice, {VK_FORMAT_D24_UNORM_S8_UINT, VK_FORMAT_D32_SFLOAT_S8_UINT, VK_FORMAT_D16_UNORM_S8_UINT},
+                             VK_IMAGE_TILING_OPTIMAL, VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT);
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+VkRenderPass createRenderPass(VkDevice                     device,
+                              const std::vector<VkFormat>& colorAttachmentFormats,
+                              VkFormat                     depthAttachmentFormat,
+                              uint32_t                     subpassCount /*= 1*/,
+                              bool                         clearColor /*= true*/,
+                              bool                         clearDepth /*= true*/,
+                              VkImageLayout                initialLayout /*= VK_IMAGE_LAYOUT_UNDEFINED*/,
+                              VkImageLayout                finalLayout /*= VK_IMAGE_LAYOUT_PRESENT_SRC_KHR*/)
+{
+
+  std::vector<VkAttachmentDescription> allAttachments;
+  std::vector<VkAttachmentReference>   colorAttachmentRefs;
+
+  bool hasDepth = (depthAttachmentFormat != VK_FORMAT_UNDEFINED);
+
+  for(const auto& format : colorAttachmentFormats)
+  {
+    VkAttachmentDescription colorAttachment = {};
+    colorAttachment.format                  = format;
+    colorAttachment.samples                 = VK_SAMPLE_COUNT_1_BIT;
+    colorAttachment.loadOp                  = clearColor ? VK_ATTACHMENT_LOAD_OP_CLEAR :
+                                                           ((initialLayout == VK_IMAGE_LAYOUT_UNDEFINED) ? VK_ATTACHMENT_LOAD_OP_DONT_CARE :
+                                                                                                           VK_ATTACHMENT_LOAD_OP_LOAD);
+    colorAttachment.storeOp                 = VK_ATTACHMENT_STORE_OP_STORE;
+    colorAttachment.stencilLoadOp           = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
+    colorAttachment.stencilStoreOp          = VK_ATTACHMENT_STORE_OP_DONT_CARE;
+    colorAttachment.initialLayout           = initialLayout;
+    colorAttachment.finalLayout             = finalLayout;
+
+    VkAttachmentReference colorAttachmentRef = {};
+    colorAttachmentRef.attachment            = static_cast<uint32_t>(allAttachments.size());
+    colorAttachmentRef.layout                = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+
+    allAttachments.push_back(colorAttachment);
+    colorAttachmentRefs.push_back(colorAttachmentRef);
+  }
+
+  VkAttachmentReference depthAttachmentRef = {};
+  if(hasDepth)
+  {
+    VkAttachmentDescription depthAttachment = {};
+    depthAttachment.format                  = depthAttachmentFormat;
+    depthAttachment.samples                 = VK_SAMPLE_COUNT_1_BIT;
+    depthAttachment.loadOp                  = clearDepth ? VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_LOAD;
+
+    depthAttachment.storeOp        = VK_ATTACHMENT_STORE_OP_STORE;
+    depthAttachment.stencilLoadOp  = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
+    depthAttachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
+    depthAttachment.initialLayout  = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+    depthAttachment.finalLayout    = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+
+    depthAttachmentRef.attachment = static_cast<uint32_t>(allAttachments.size());
+    depthAttachmentRef.layout     = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+
+    allAttachments.push_back(depthAttachment);
+  }
+
+  std::vector<VkSubpassDescription> subpasses;
+  std::vector<VkSubpassDependency>  subpassDependencies;
+
+  for(uint32_t i = 0; i < subpassCount; i++)
+  {
+    VkSubpassDescription subpass    = {};
+    subpass.pipelineBindPoint       = VK_PIPELINE_BIND_POINT_GRAPHICS;
+    subpass.colorAttachmentCount    = static_cast<uint32_t>(colorAttachmentRefs.size());
+    subpass.pColorAttachments       = colorAttachmentRefs.data();
+    subpass.pDepthStencilAttachment = hasDepth ? &depthAttachmentRef : VK_NULL_HANDLE;
+
+    VkSubpassDependency dependency = {};
+    dependency.srcSubpass          = i == 0 ? (VK_SUBPASS_EXTERNAL) : (i - 1);
+    dependency.dstSubpass          = i;
+    dependency.srcStageMask        = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+    dependency.dstStageMask        = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+    dependency.srcAccessMask       = 0;
+    dependency.dstAccessMask       = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+
+    subpasses.push_back(subpass);
+    subpassDependencies.push_back(dependency);
+  }
+
+  VkRenderPassCreateInfo renderPassInfo{VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO};
+  renderPassInfo.attachmentCount = static_cast<uint32_t>(allAttachments.size());
+  renderPassInfo.pAttachments    = allAttachments.data();
+  renderPassInfo.subpassCount    = static_cast<uint32_t>(subpasses.size());
+  renderPassInfo.pSubpasses      = subpasses.data();
+  renderPassInfo.dependencyCount = static_cast<uint32_t>(subpassDependencies.size());
+  renderPassInfo.pDependencies   = subpassDependencies.data();
+  VkRenderPass renderPass;
+  NVVK_CHECK(vkCreateRenderPass(device, &renderPassInfo, nullptr, &renderPass));
+  return renderPass;
+}
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/renderpasses_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/renderpasses_vk.hpp
@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#pragma once
+
+#include <vector>
+#include <vulkan/vulkan_core.h>
+
+namespace nvvk {
+/** @DOC_START
+  # functions in nvvk
+
+  - findSupportedFormat : returns supported VkFormat from a list of candidates (returns first match)
+  - findDepthFormat : returns supported depth format (24, 32, 16-bit)
+  - findDepthStencilFormat : returns supported depth-stencil format (24/8, 32/8, 16/8-bit)
+  - createRenderPass : wrapper for vkCreateRenderPass
+
+@DOC_END */
+VkFormat findSupportedFormat(VkPhysicalDevice physicalDevice, const std::vector<VkFormat>& candidates, VkImageTiling tiling, VkFormatFeatureFlags features);
+VkFormat findDepthFormat(VkPhysicalDevice physicalDevice);
+VkFormat findDepthStencilFormat(VkPhysicalDevice physicalDevice);
+
+//////////////////////////////////////////////////////////////////////////
+
+VkRenderPass createRenderPass(VkDevice                     device,
+                              const std::vector<VkFormat>& colorAttachmentFormats,
+                              VkFormat                     depthAttachmentFormat,
+                              uint32_t                     subpassCount  = 1,
+                              bool                         clearColor    = true,
+                              bool                         clearDepth    = true,
+                              VkImageLayout                initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+                              VkImageLayout                finalLayout   = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR);
+
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/resourceallocator_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/resourceallocator_vk.cpp
@ -0,0 +1,739 @@
+/*
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "resourceallocator_vk.hpp"
+
+#include "memallocator_dma_vk.hpp"
+#include "memallocator_dedicated_vk.hpp"
+#include "error_vk.hpp"
+#include "images_vk.hpp"
+
+namespace nvvk {
+
+ResourceAllocator::ResourceAllocator(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, VkDeviceSize stagingBlockSize)
+{
+  init(device, physicalDevice, memAlloc, stagingBlockSize);
+}
+
+ResourceAllocator::~ResourceAllocator()
+{
+  deinit();
+}
+
+void ResourceAllocator::init(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, VkDeviceSize stagingBlockSize)
+{
+  m_device         = device;
+  m_physicalDevice = physicalDevice;
+  m_memAlloc       = memAlloc;
+  vkGetPhysicalDeviceMemoryProperties(physicalDevice, &m_memoryProperties);
+  m_samplerPool.init(device);
+  m_staging = std::make_unique<StagingMemoryManager>(memAlloc, stagingBlockSize);
+}
+
+void ResourceAllocator::deinit()
+{
+  m_samplerPool.deinit();
+  m_staging.reset();
+}
+
+Buffer ResourceAllocator::createBuffer(const VkBufferCreateInfo& info_, const VkMemoryPropertyFlags memProperties_)
+{
+  Buffer resultBuffer;
+  // Create Buffer (can be overloaded)
+  CreateBufferEx(info_, &resultBuffer.buffer);
+
+  // Find memory requirements
+  VkMemoryRequirements2           memReqs{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+  VkMemoryDedicatedRequirements   dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
+  VkBufferMemoryRequirementsInfo2 bufferReqs{VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2};
+
+  memReqs.pNext     = &dedicatedRegs;
+  bufferReqs.buffer = resultBuffer.buffer;
+
+  vkGetBufferMemoryRequirements2(m_device, &bufferReqs, &memReqs);
+
+  // Build up allocation info
+  MemAllocateInfo allocInfo(memReqs.memoryRequirements, memProperties_, false);
+
+  if(info_.usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT)
+  {
+    allocInfo.setAllocationFlags(VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT);
+  }
+  if(dedicatedRegs.requiresDedicatedAllocation)
+  {
+    allocInfo.setDedicatedBuffer(resultBuffer.buffer);
+  }
+
+  // Allocate memory
+  resultBuffer.memHandle = AllocateMemory(allocInfo);
+  if(resultBuffer.memHandle)
+  {
+    const auto memInfo = m_memAlloc->getMemoryInfo(resultBuffer.memHandle);
+    // Bind memory to buffer
+    NVVK_CHECK(vkBindBufferMemory(m_device, resultBuffer.buffer, memInfo.memory, memInfo.offset));
+  }
+  else
+  {
+    destroy(resultBuffer);
+  }
+
+  return resultBuffer;
+}
+
+Buffer ResourceAllocator::createBuffer(VkDeviceSize size_, VkBufferUsageFlags usage_, const VkMemoryPropertyFlags memUsage_)
+{
+  VkBufferCreateInfo info{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
+  info.size  = size_;
+  info.usage = usage_ | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+
+  return createBuffer(info, memUsage_);
+}
+
+Buffer ResourceAllocator::createBuffer(const VkCommandBuffer& cmdBuf,
+                                       const VkDeviceSize&    size_,
+                                       const void*            data_,
+                                       VkBufferUsageFlags     usage_,
+                                       VkMemoryPropertyFlags  memProps)
+{
+  VkBufferCreateInfo createInfoR{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
+  createInfoR.size    = size_;
+  createInfoR.usage   = usage_ | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+  Buffer resultBuffer = createBuffer(createInfoR, memProps);
+
+  if(data_)
+  {
+    m_staging->cmdToBuffer(cmdBuf, resultBuffer.buffer, 0, size_, data_);
+  }
+
+  return resultBuffer;
+}
+
+Image ResourceAllocator::createImage(const VkImageCreateInfo& info_, const VkMemoryPropertyFlags memUsage_)
+{
+  Image resultImage;
+  // Create image
+  CreateImageEx(info_, &resultImage.image);
+
+  // Find memory requirements
+  VkMemoryRequirements2          memReqs{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+  VkMemoryDedicatedRequirements  dedicatedRegs = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS};
+  VkImageMemoryRequirementsInfo2 imageReqs{VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2};
+
+  imageReqs.image = resultImage.image;
+  memReqs.pNext   = &dedicatedRegs;
+
+  vkGetImageMemoryRequirements2(m_device, &imageReqs, &memReqs);
+
+  // Build up allocation info
+  MemAllocateInfo allocInfo(memReqs.memoryRequirements, memUsage_, true);
+  if(dedicatedRegs.requiresDedicatedAllocation)
+  {
+    allocInfo.setDedicatedImage(resultImage.image);
+  }
+
+  // Allocate memory
+  resultImage.memHandle = AllocateMemory(allocInfo);
+  if(resultImage.memHandle)
+  {
+    const auto memInfo = m_memAlloc->getMemoryInfo(resultImage.memHandle);
+    // Bind memory to image
+    NVVK_CHECK(vkBindImageMemory(m_device, resultImage.image, memInfo.memory, memInfo.offset));
+  }
+  else
+  {
+    destroy(resultImage);
+  }
+  return resultImage;
+}
+
+Image ResourceAllocator::createImage(const VkCommandBuffer&   cmdBuf,
+                                     size_t                   size_,
+                                     const void*              data_,
+                                     const VkImageCreateInfo& info_,
+                                     const VkImageLayout&     layout_)
+{
+  Image resultImage = createImage(info_, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+
+  // Copy the data to staging buffer than to image
+  if(data_ != nullptr)
+  {
+    // Copy buffer to image
+    VkImageSubresourceRange subresourceRange{};
+    subresourceRange.aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT;
+    subresourceRange.baseArrayLayer = 0;
+    subresourceRange.baseMipLevel   = 0;
+    subresourceRange.layerCount     = 1;
+    subresourceRange.levelCount     = info_.mipLevels;
+
+    // doing these transitions per copy is not efficient, should do in bulk for many images
+    nvvk::cmdBarrierImageLayout(cmdBuf, resultImage.image, VK_IMAGE_LAYOUT_UNDEFINED,
+                                VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresourceRange);
+
+    VkOffset3D               offset      = {0};
+    VkImageSubresourceLayers subresource = {0};
+    subresource.aspectMask               = VK_IMAGE_ASPECT_COLOR_BIT;
+    subresource.layerCount               = 1;
+
+    m_staging->cmdToImage(cmdBuf, resultImage.image, offset, info_.extent, subresource, size_, data_);
+
+    // Setting final image layout
+    nvvk::cmdBarrierImageLayout(cmdBuf, resultImage.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, layout_);
+  }
+  else
+  {
+    // Setting final image layout
+    nvvk::cmdBarrierImageLayout(cmdBuf, resultImage.image, VK_IMAGE_LAYOUT_UNDEFINED, layout_);
+  }
+
+  return resultImage;
+}
+
+nvvk::Texture ResourceAllocator::createTexture(const Image&                 image,
+                                               const VkImageViewCreateInfo& imageViewCreateInfo,
+                                               const VkSamplerCreateInfo&   samplerCreateInfo)
+{
+  Texture resultTexture            = createTexture(image, imageViewCreateInfo);
+  resultTexture.descriptor.sampler = m_samplerPool.acquireSampler(samplerCreateInfo);
+
+  return resultTexture;
+}
+
+
+Texture ResourceAllocator::createTexture(const Image& image, const VkImageViewCreateInfo& imageViewCreateInfo)
+{
+  Texture resultTexture;
+  resultTexture.image                  = image.image;
+  resultTexture.memHandle              = image.memHandle;
+  resultTexture.descriptor.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+
+  assert(imageViewCreateInfo.image == image.image);
+  NVVK_CHECK(vkCreateImageView(m_device, &imageViewCreateInfo, nullptr, &resultTexture.descriptor.imageView));
+
+  return resultTexture;
+}
+
+Texture ResourceAllocator::createTexture(const VkCommandBuffer&     cmdBuf,
+                                         size_t                     size_,
+                                         const void*                data_,
+                                         const VkImageCreateInfo&   info_,
+                                         const VkSamplerCreateInfo& samplerCreateInfo,
+                                         const VkImageLayout&       layout_,
+                                         bool                       isCube)
+{
+  Image image = createImage(cmdBuf, size_, data_, info_, layout_);
+
+  VkImageViewCreateInfo viewInfo{VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
+  viewInfo.pNext                           = nullptr;
+  viewInfo.image                           = image.image;
+  viewInfo.format                          = info_.format;
+  viewInfo.subresourceRange.aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT;
+  viewInfo.subresourceRange.baseMipLevel   = 0;
+  viewInfo.subresourceRange.levelCount     = VK_REMAINING_MIP_LEVELS;
+  viewInfo.subresourceRange.baseArrayLayer = 0;
+  viewInfo.subresourceRange.layerCount     = VK_REMAINING_ARRAY_LAYERS;
+  switch(info_.imageType)
+  {
+    case VK_IMAGE_TYPE_1D:
+      viewInfo.viewType = (info_.arrayLayers > 1 ? VK_IMAGE_VIEW_TYPE_1D_ARRAY : VK_IMAGE_VIEW_TYPE_1D);
+      break;
+    case VK_IMAGE_TYPE_2D:
+      viewInfo.viewType = isCube ? VK_IMAGE_VIEW_TYPE_CUBE :
+                                   (info_.arrayLayers > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D);
+      break;
+    case VK_IMAGE_TYPE_3D:
+      viewInfo.viewType = VK_IMAGE_VIEW_TYPE_3D;
+      break;
+    default:
+      assert(0);
+  }
+
+  Texture resultTexture                = createTexture(image, viewInfo, samplerCreateInfo);
+  resultTexture.descriptor.imageLayout = layout_;
+  return resultTexture;
+}
+
+
+SparseImage ResourceAllocator::createSparseImage(VkImageCreateInfo info_, const VkMemoryPropertyFlags memUsage_ /*= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT*/)
+{
+  SparseImage resultImage;
+
+  std::array<VkImage, SparseImage::s_sparseImageCount> images;
+  for(size_t i = 0; i < images.size(); i++)
+  {
+    if(NVVK_CHECK(vkCreateImage(m_device, &info_, nullptr, &images[i])))
+    {
+      LOGE("Could not create requested image\n");
+      return {};
+    }
+  }
+
+  std::vector<VkMemoryRequirements> mipTailMemRequirements =
+      resultImage.create(m_device, images, info_.mipLevels, info_.arrayLayers, info_.extent);
+
+  std::vector<std::pair<VkDeviceMemory, VkDeviceSize>> mipTailMemAndOffsets;
+
+
+  for(const auto& memReq : mipTailMemRequirements)
+  {
+    nvvk::MemAllocateInfo allocInfo(m_device, images[0], memUsage_);
+    allocInfo.setMemoryRequirements(memReq);
+
+    nvvk::MemHandle             mipTailAllocationID = AllocateMemory(allocInfo);
+    nvvk::MemAllocator::MemInfo memInfo             = m_memAlloc->getMemoryInfo(mipTailAllocationID);
+
+    resultImage.mipTailAllocations.push_back(mipTailAllocationID);
+    mipTailMemAndOffsets.push_back({memInfo.memory, memInfo.offset});
+  }
+
+  resultImage.bindMipTailMemory(mipTailMemAndOffsets);
+
+  resultImage.memoryProperties = memUsage_;
+
+  return resultImage;
+}
+
+
+void ResourceAllocator::flushSparseImage(SparseImage& sparseImage)
+{
+
+
+  sparseImage.sparseImageMemoryBinds.clear();
+  sparseImage.sparseImageMemoryBinds.reserve(sparseImage.allocatedPages.size());
+  for(auto it : sparseImage.allocatedPages)
+  {
+    auto& page = it.second;
+    if(!page.hasBoundMemory())
+      continue;
+    m_memAlloc->freeMemory(page.allocation);
+    page.allocation                   = {};
+    page.imageMemoryBind.memory       = {};
+    page.imageMemoryBind.memoryOffset = {};
+    sparseImage.sparseImageMemoryBinds.push_back(page.imageMemoryBind);
+  }
+  sparseImage.allocatedPages.clear();
+  sparseImage.updateSparseBindInfo();
+}
+
+// Returns true if the allocation was performed, false if it was already allocated
+bool ResourceAllocator::createSparseImagePage(SparseImage& sparseImage, uint32_t pageIndex, uint32_t layer /*= 0u*/)
+{
+  SparseImage::PageId id{layer, pageIndex};
+
+  auto it = sparseImage.allocatedPages.find(id);
+  // If already allocated, nothing to do
+  if(it != sparseImage.allocatedPages.end())
+  {
+    return false;
+  }
+
+  SparseImagePage page = sparseImage.createPageInfo(pageIndex, layer);
+
+  VkMemoryRequirements memReqs = sparseImage.memoryReqs;
+  memReqs.size                 = page.size;
+
+  nvvk::MemAllocateInfo allocInfo(m_device, sparseImage.images[0], sparseImage.memoryProperties);
+  allocInfo.setMemoryRequirements(memReqs);
+
+  nvvk::MemHandle             allocationID = AllocateMemory(static_cast<VkMemoryRequirements>(memReqs));
+  nvvk::MemAllocator::MemInfo memInfo      = m_memAlloc->getMemoryInfo(allocationID);
+  page.allocation                          = allocationID;
+  page.bindDeviceMemory(memInfo.memory, memInfo.offset);
+
+  sparseImage.allocatedPages[id] = page;
+
+  return true;
+}
+
+
+void ResourceAllocator::finalizeStaging(VkFence fence /*= VK_NULL_HANDLE*/)
+{
+  m_staging->finalizeResources(fence);
+}
+
+void ResourceAllocator::releaseStaging()
+{
+  m_staging->releaseResources();
+}
+
+void ResourceAllocator::finalizeAndReleaseStaging(VkFence fence /*= VK_NULL_HANDLE*/)
+{
+  m_staging->finalizeResources(fence);
+  m_staging->releaseResources();
+}
+
+nvvk::StagingMemoryManager* ResourceAllocator::getStaging()
+{
+  return m_staging.get();
+}
+
+const nvvk::StagingMemoryManager* ResourceAllocator::getStaging() const
+{
+  return m_staging.get();
+}
+
+void ResourceAllocator::destroy(Buffer& b_)
+{
+  vkDestroyBuffer(m_device, b_.buffer, nullptr);
+  m_memAlloc->freeMemory(b_.memHandle);
+
+  b_ = Buffer();
+}
+
+void ResourceAllocator::destroy(Image& i_)
+{
+  vkDestroyImage(m_device, i_.image, nullptr);
+
+  m_memAlloc->freeMemory(i_.memHandle);
+  i_ = Image();
+}
+
+void ResourceAllocator::destroy(Texture& t_)
+{
+  vkDestroyImageView(m_device, t_.descriptor.imageView, nullptr);
+  vkDestroyImage(m_device, t_.image, nullptr);
+  m_memAlloc->freeMemory(t_.memHandle);
+
+  if(t_.descriptor.sampler)
+  {
+    m_samplerPool.releaseSampler(t_.descriptor.sampler);
+  }
+
+  t_ = Texture();
+}
+
+
+void ResourceAllocator::destroy(nvvk::SparseImage& i_)
+{
+  flushSparseImage(i_);
+  for(auto& mipTailAlloc : i_.mipTailAllocations)
+  {
+    m_memAlloc->freeMemory(mipTailAlloc);
+  }
+  i_.mipTailAllocations.clear();
+  i_.unbindMipTailMemory();
+  for(size_t i = 0; i < nvvk::SparseImage::s_sparseImageCount; i++)
+    vkDestroyImage(m_device, i_.images[i], nullptr);
+}
+bool ResourceAllocator::destroy(nvvk::SparseImage& i_, uint32_t pageIndex, uint32_t layer)
+{
+  auto it = i_.allocatedPages.find({layer, pageIndex});
+  if(it == i_.allocatedPages.end())
+  {
+    return false;
+  }
+
+  SparseImagePage& page = it->second;
+  if(!page.hasBoundMemory())
+    return false;
+
+  m_memAlloc->freeMemory(page.allocation);
+  i_.allocatedPages.erase(it);
+  return true;
+}
+
+void* ResourceAllocator::map(const Buffer& buffer)
+{
+  void* pData = m_memAlloc->map(buffer.memHandle);
+  return pData;
+}
+
+void ResourceAllocator::unmap(const Buffer& buffer)
+{
+  m_memAlloc->unmap(buffer.memHandle);
+}
+
+void* ResourceAllocator::map(const Image& buffer)
+{
+  void* pData = m_memAlloc->map(buffer.memHandle);
+  return pData;
+}
+
+void ResourceAllocator::unmap(const Image& image)
+{
+  m_memAlloc->unmap(image.memHandle);
+}
+
+MemHandle ResourceAllocator::AllocateMemory(const MemAllocateInfo& allocateInfo)
+{
+  return m_memAlloc->allocMemory(allocateInfo);
+}
+
+void ResourceAllocator::CreateBufferEx(const VkBufferCreateInfo& info_, VkBuffer* buffer)
+{
+  NVVK_CHECK(vkCreateBuffer(m_device, &info_, nullptr, buffer));
+}
+
+void ResourceAllocator::CreateImageEx(const VkImageCreateInfo& info_, VkImage* image)
+{
+  NVVK_CHECK(vkCreateImage(m_device, &info_, nullptr, image));
+}
+
+uint32_t ResourceAllocator::getMemoryType(uint32_t typeBits, const VkMemoryPropertyFlags& properties)
+{
+  for(uint32_t i = 0; i < m_memoryProperties.memoryTypeCount; i++)
+  {
+    if(((typeBits & (1 << i)) > 0) && (m_memoryProperties.memoryTypes[i].propertyFlags & properties) == properties)
+    {
+      return i;
+    }
+  }
+  assert(0);
+  return ~0u;
+}
+
+
+AccelNV ResourceAllocator::createAcceleration(VkAccelerationStructureCreateInfoNV& accel_)
+{
+  AccelNV resultAccel;
+  // Create the acceleration structure
+  NVVK_CHECK(vkCreateAccelerationStructureNV(m_device, &accel_, nullptr, &resultAccel.accel));
+
+  // Find memory requirements
+  VkAccelerationStructureMemoryRequirementsInfoNV accelMemInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV};
+  accelMemInfo.accelerationStructure = resultAccel.accel;
+  VkMemoryRequirements2 memReqs{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+  vkGetAccelerationStructureMemoryRequirementsNV(m_device, &accelMemInfo, &memReqs);
+
+  // Allocate memory
+  MemAllocateInfo info(memReqs.memoryRequirements, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, false);
+  resultAccel.memHandle = AllocateMemory(info);
+  if(resultAccel.memHandle)
+  {
+    const auto memInfo = m_memAlloc->getMemoryInfo(resultAccel.memHandle);
+
+    // Bind memory with acceleration structure
+    VkBindAccelerationStructureMemoryInfoNV bind{VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV};
+    bind.accelerationStructure = resultAccel.accel;
+    bind.memory                = memInfo.memory;
+    bind.memoryOffset          = memInfo.offset;
+    NVVK_CHECK(vkBindAccelerationStructureMemoryNV(m_device, 1, &bind));
+  }
+  else
+  {
+    destroy(resultAccel);
+  }
+  return resultAccel;
+}
+
+void ResourceAllocator::destroy(AccelNV& a_)
+{
+  vkDestroyAccelerationStructureNV(m_device, a_.accel, nullptr);
+  m_memAlloc->freeMemory(a_.memHandle);
+
+  a_ = AccelNV();
+}
+
+AccelKHR ResourceAllocator::createAcceleration(VkAccelerationStructureCreateInfoKHR& accel_)
+{
+  AccelKHR resultAccel;
+  // Allocating the buffer to hold the acceleration structure
+  resultAccel.buffer = createBuffer(accel_.size, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR
+                                                     | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
+  // Setting the buffer
+  accel_.buffer = resultAccel.buffer.buffer;
+  // Create the acceleration structure
+  vkCreateAccelerationStructureKHR(m_device, &accel_, nullptr, &resultAccel.accel);
+
+  return resultAccel;
+}
+
+void ResourceAllocator::destroy(AccelKHR& a_)
+{
+  vkDestroyAccelerationStructureKHR(m_device, a_.accel, nullptr);
+  destroy(a_.buffer);
+
+  a_ = AccelKHR();
+}
+
+VkSampler ResourceAllocator::acquireSampler(const VkSamplerCreateInfo& info)
+{
+  return m_samplerPool.acquireSampler(info);
+}
+
+void ResourceAllocator::releaseSampler(VkSampler sampler)
+{
+  m_samplerPool.releaseSampler(sampler);
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ExportResourceAllocator::ExportResourceAllocator(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAllocator, VkDeviceSize stagingBlockSize)
+    : ResourceAllocator(device, physicalDevice, memAllocator, stagingBlockSize)
+{
+}
+
+void ExportResourceAllocator::CreateBufferEx(const VkBufferCreateInfo& info_, VkBuffer* buffer)
+{
+  VkBufferCreateInfo               info = info_;
+  VkExternalMemoryBufferCreateInfo infoEx{VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO};
+#ifdef WIN32
+  infoEx.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
+#else
+  infoEx.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
+#endif
+  info.pNext = &infoEx;
+  NVVK_CHECK(vkCreateBuffer(m_device, &info, nullptr, buffer));
+}
+
+void ExportResourceAllocator::CreateImageEx(const VkImageCreateInfo& info_, VkImage* image)
+{
+  auto                            info = info_;
+  VkExternalMemoryImageCreateInfo infoEx{VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO};
+#ifdef WIN32
+  infoEx.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
+#else
+  infoEx.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
+#endif
+  info.pNext = &infoEx;
+  NVVK_CHECK(vkCreateImage(m_device, &info, nullptr, image));
+}
+
+MemHandle ExportResourceAllocator::AllocateMemory(const MemAllocateInfo& allocateInfo)
+{
+  MemAllocateInfo exportAllocateInfo(allocateInfo);
+  exportAllocateInfo.setExportable(true);
+  return ResourceAllocator::AllocateMemory(exportAllocateInfo);
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+ExportResourceAllocatorDedicated::ExportResourceAllocatorDedicated(VkDevice         device,
+                                                                   VkPhysicalDevice physicalDevice,
+                                                                   VkDeviceSize stagingBlockSize /*= NVVK_DEFAULT_STAGING_BLOCKSIZE*/)
+{
+  init(device, physicalDevice, stagingBlockSize);
+}
+
+ExportResourceAllocatorDedicated::~ExportResourceAllocatorDedicated()
+{
+  deinit();
+}
+
+
+void ExportResourceAllocatorDedicated::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize /*= NVVK_DEFAULT_STAGING_BLOCKSIZE*/)
+{
+  m_memAlloc = std::make_unique<DedicatedMemoryAllocator>(device, physicalDevice);
+  ExportResourceAllocator::init(device, physicalDevice, m_memAlloc.get(), stagingBlockSize);
+}
+
+void ExportResourceAllocatorDedicated::deinit()
+{
+  ExportResourceAllocator::deinit();
+  m_memAlloc.reset();
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ExplicitDeviceMaskResourceAllocator::ExplicitDeviceMaskResourceAllocator(VkDevice         device,
+                                                                         VkPhysicalDevice physicalDevice,
+                                                                         MemAllocator*    memAlloc,
+                                                                         uint32_t         deviceMask)
+{
+  init(device, physicalDevice, memAlloc, deviceMask);
+}
+
+void ExplicitDeviceMaskResourceAllocator::init(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, uint32_t deviceMask)
+{
+  ResourceAllocator::init(device, physicalDevice, memAlloc);
+  m_deviceMask = deviceMask;
+}
+
+MemHandle ExplicitDeviceMaskResourceAllocator::AllocateMemory(const MemAllocateInfo& allocateInfo)
+{
+  MemAllocateInfo deviceMaskAllocateInfo(allocateInfo);
+  deviceMaskAllocateInfo.setDeviceMask(m_deviceMask);
+
+  return ResourceAllocator::AllocateMemory(deviceMaskAllocateInfo);
+}
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ResourceAllocatorDma::ResourceAllocatorDma(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize, VkDeviceSize memBlockSize)
+{
+  init(device, physicalDevice, stagingBlockSize, memBlockSize);
+}
+
+ResourceAllocatorDma::~ResourceAllocatorDma()
+{
+  deinit();
+}
+
+void ResourceAllocatorDma::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize, VkDeviceSize memBlockSize)
+{
+  m_dma = std::make_unique<DeviceMemoryAllocator>(device, physicalDevice, memBlockSize);
+  ResourceAllocator::init(device, physicalDevice, m_dma.get(), stagingBlockSize);
+}
+
+
+void ResourceAllocatorDma::init(VkInstance, VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize, VkDeviceSize memBlockSize)
+{
+  init(device, physicalDevice, stagingBlockSize, memBlockSize);
+}
+
+void ResourceAllocatorDma::deinit()
+{
+  ResourceAllocator::deinit();
+  m_dma.reset();
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ResourceAllocatorDedicated::ResourceAllocatorDedicated(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize)
+{
+  init(device, physicalDevice, stagingBlockSize);
+}
+
+
+ResourceAllocatorDedicated::~ResourceAllocatorDedicated()
+{
+  deinit();
+}
+
+void ResourceAllocatorDedicated::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize)
+{
+  m_memAlloc = std::make_unique<DedicatedMemoryAllocator>(device, physicalDevice);
+  ResourceAllocator::init(device, physicalDevice, m_memAlloc.get(), stagingBlockSize);
+}
+
+
+void ResourceAllocatorDedicated::init(VkInstance,  // unused
+                                      VkDevice         device,
+                                      VkPhysicalDevice physicalDevice,
+                                      VkDeviceSize     stagingBlockSize /*= NVVK_DEFAULT_STAGING_BLOCKSIZE*/)
+{
+  init(device, physicalDevice, stagingBlockSize);
+}
+
+void ResourceAllocatorDedicated::deinit()
+{
+  ResourceAllocator::deinit();
+  m_memAlloc.reset();
+}
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/resourceallocator_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/resourceallocator_vk.hpp
@ -0,0 +1,467 @@
+/*
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <vulkan/vulkan_core.h>
+
+#include <memory>
+#include <vector>
+
+#include "memallocator_vk.hpp"
+#include "samplers_vk.hpp"
+#include "stagingmemorymanager_vk.hpp"
+#include "sparse_image_vk.hpp"
+
+
+/** @DOC_START
+ # class nvvk::ResourceAllocator
+
+ The goal of nvvk::ResourceAllocator is to aid creation of typical Vulkan
+ resources (VkBuffer, VkImage and VkAccelerationStructure).
+ All memory is allocated using the provided [nvvk::MemAllocator](#class-nvvkmemallocator)
+ and bound to the appropriate resources. The allocator contains a 
+ [nvvk::StagingMemoryManager](#class-nvvkstagingmemorymanager) and 
+ [nvvk::SamplerPool](#class-nvvksamplerpool) to aid this process.
+
+ ResourceAllocator separates object creation and memory allocation by delegating allocation 
+ of memory to an object of interface type 'nvvk::MemAllocator'.
+ This way the ResourceAllocator can be used with different memory allocation strategies, depending on needs.
+ nvvk provides three implementations of MemAllocator:
+ * nvvk::DedicatedMemoryAllocator is using a very simple allocation scheme, one VkDeviceMemory object per allocation.
+   This strategy is only useful for very simple applications due to the overhead of vkAllocateMemory and 
+   an implementation dependent bounded number of vkDeviceMemory allocations possible.
+ * nvvk::DMAMemoryAllocator delegates memory requests to a 'nvvk:DeviceMemoryAllocator',
+   as an example implemention of a suballocator
+ * nvvk::VMAMemoryAllocator delegates memory requests to a [Vulkan Memory Allocator](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator)
+ 
+ Utility wrapper structs contain the appropriate Vulkan resource and the
+ appropriate nvvk::MemHandle :
+
+ - nvvk::Buffer
+ - nvvk::Image
+ - nvvk::Texture  contains VkImage and VkImageView as well as an 
+   optional VkSampler stored witin VkDescriptorImageInfo
+ - nvvk::AccelNV
+ - nvvk::AccelKHR
+
+ nvvk::Buffer, nvvk::Image, nvvk::Texture and nvvk::AccelKHR nvvk::AccelNV objects can be copied
+ by value. They do not track lifetime of the underlying Vulkan objects and memory allocations. 
+ The corresponding destroy() functions of nvvk::ResourceAllocator destroy created objects and
+ free up their memory. ResourceAllocator does not track usage of objects either. Thus, one has to
+ make sure that objects are no longer in use by the GPU when they get destroyed.
+
+ > Note: These classes are foremost to showcase principle components that
+ > a Vulkan engine would most likely have.
+ > They are geared towards ease of use in this sample framework, and 
+ > not optimized nor meant for production code.
+
+ ```cpp
+ nvvk::DeviceMemoryAllocator memAllocator;
+ nvvk::ResourceAllocator     resAllocator;
+
+ memAllocator.init(device, physicalDevice);
+ resAllocator.init(device, physicalDevice, &memAllocator);
+
+ ...
+
+ VkCommandBuffer cmd = ... transfer queue command buffer
+
+ // creates new resources and 
+ // implicitly triggers staging transfer copy operations into cmd
+ nvvk::Buffer vbo = resAllocator.createBuffer(cmd, vboSize, vboData, vboUsage);
+ nvvk::Buffer ibo = resAllocator.createBuffer(cmd, iboSize, iboData, iboUsage);
+
+ // use functions from staging memory manager
+ // here we associate the temporary staging resources with a fence
+ resAllocator.finalizeStaging( fence );
+
+ // submit cmd buffer with staging copy operations
+ vkQueueSubmit(... cmd ... fence ...)
+
+ ...
+
+ // if you do async uploads you would
+ // trigger garbage collection somewhere per frame
+ resAllocator.releaseStaging();
+
+ ```
+
+ Separation of memory allocation and resource creation is very flexible, but it
+ can be tedious to set up for simple usecases. nvvk offers three helper ResourceAllocator
+ derived classes which internally contain the MemAllocator object and manage its lifetime:
+ * [ResourceAllocatorDedicated](#class nvvk::ResourceAllocatorDedicated)
+ * [ResourceAllocatorDma](#class nvvk::ResourceAllocatorDma)
+ * [ResourceAllocatorVma](#cass nvvk::ResourceAllocatorVma)
+ 
+ In these cases, only one object needs to be created and initialized. 
+ 
+ ResourceAllocator can also be subclassed to specialize some of its functionality.
+ Examples are [ExportResourceAllocator](#class ExportResourceAllocator) and [ExplicitDeviceMaskResourceAllocator](#class ExplicitDeviceMaskResourceAllocator).
+ ExportResourceAllocator injects itself into the object allocation process such that 
+ the resulting allocations can be exported or created objects may be bound to exported
+ memory
+ ExplicitDeviceMaskResourceAllocator overrides the devicemask of allocations such that
+ objects can be created on a specific device in a device group.
+ @DOC_END */
+
+namespace nvvk {
+
+// Objects
+struct Buffer
+{
+  VkBuffer  buffer = VK_NULL_HANDLE;
+  MemHandle memHandle{nullptr};
+};
+
+struct Image
+{
+  VkImage   image = VK_NULL_HANDLE;
+  MemHandle memHandle{nullptr};
+};
+
+struct Texture
+{
+  VkImage               image = VK_NULL_HANDLE;
+  MemHandle             memHandle{nullptr};
+  VkDescriptorImageInfo descriptor{};
+};
+
+struct AccelNV
+{
+  VkAccelerationStructureNV accel = VK_NULL_HANDLE;
+  MemHandle                 memHandle{nullptr};
+};
+
+struct AccelKHR
+{
+  VkAccelerationStructureKHR accel = VK_NULL_HANDLE;
+  nvvk::Buffer               buffer;
+};
+
+//--------------------------------------------------------------------------------------------------
+// Allocator for buffers, images and acceleration structures
+//
+class StagingMemoryManager;
+
+
+class ResourceAllocator
+{
+public:
+  ResourceAllocator(ResourceAllocator const&)            = delete;
+  ResourceAllocator& operator=(ResourceAllocator const&) = delete;
+
+  ResourceAllocator() = default;
+  ResourceAllocator(VkDevice         device,
+                    VkPhysicalDevice physicalDevice,
+                    MemAllocator*    memAllocator,
+                    VkDeviceSize     stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+
+  // All staging buffers must be cleared before
+  virtual ~ResourceAllocator();
+
+  //--------------------------------------------------------------------------------------------------
+  // Initialization of the allocator
+  void init(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+
+  void deinit();
+
+  MemAllocator* getMemoryAllocator() { return m_memAlloc; }
+
+  //--------------------------------------------------------------------------------------------------
+  // Basic buffer creation
+  nvvk::Buffer createBuffer(const VkBufferCreateInfo& info_, const VkMemoryPropertyFlags memUsage_ = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+
+  //--------------------------------------------------------------------------------------------------
+  // Simple buffer creation
+  // implicitly sets VK_BUFFER_USAGE_TRANSFER_DST_BIT
+  nvvk::Buffer createBuffer(VkDeviceSize                size_     = 0,
+                            VkBufferUsageFlags          usage_    = VkBufferUsageFlags(),
+                            const VkMemoryPropertyFlags memUsage_ = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+
+  //--------------------------------------------------------------------------------------------------
+  // Simple buffer creation with data uploaded through staging manager
+  // implicitly sets VK_BUFFER_USAGE_TRANSFER_DST_BIT
+  nvvk::Buffer createBuffer(const VkCommandBuffer& cmdBuf,
+                            const VkDeviceSize&    size_,
+                            const void*            data_,
+                            VkBufferUsageFlags     usage_,
+                            VkMemoryPropertyFlags  memProps = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+
+  //--------------------------------------------------------------------------------------------------
+  // Simple buffer creation with data uploaded through staging manager
+  // implicitly sets VK_BUFFER_USAGE_TRANSFER_DST_BIT
+  template <typename T>
+  nvvk::Buffer createBuffer(const VkCommandBuffer& cmdBuf,
+                            const std::vector<T>&  data_,
+                            VkBufferUsageFlags     usage_,
+                            VkMemoryPropertyFlags  memProps_ = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
+  {
+    return createBuffer(cmdBuf, sizeof(T) * data_.size(), data_.data(), usage_, memProps_);
+  }
+
+
+  //--------------------------------------------------------------------------------------------------
+  // Basic image creation
+  nvvk::Image createImage(const VkImageCreateInfo& info_, const VkMemoryPropertyFlags memUsage_ = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+
+
+  //--------------------------------------------------------------------------------------------------
+  // Create an image with data uploaded through staging manager
+  nvvk::Image createImage(const VkCommandBuffer&   cmdBuf,
+                          size_t                   size_,
+                          const void*              data_,
+                          const VkImageCreateInfo& info_,
+                          const VkImageLayout&     layout_ = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+
+  //--------------------------------------------------------------------------------------------------
+  // other variants could exist with a few defaults but we already have nvvk::makeImage2DViewCreateInfo()
+  // we could always override viewCreateInfo.image
+  nvvk::Texture createTexture(const Image& image, const VkImageViewCreateInfo& imageViewCreateInfo);
+  nvvk::Texture createTexture(const Image& image, const VkImageViewCreateInfo& imageViewCreateInfo, const VkSamplerCreateInfo& samplerCreateInfo);
+
+  //--------------------------------------------------------------------------------------------------
+  // shortcut that creates the image for the texture
+  // - creates the image
+  // - creates the texture part by associating image and sampler
+  //
+  nvvk::Texture createTexture(const VkCommandBuffer&     cmdBuf,
+                              size_t                     size_,
+                              const void*                data_,
+                              const VkImageCreateInfo&   info_,
+                              const VkSamplerCreateInfo& samplerCreateInfo,
+                              const VkImageLayout&       layout_ = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                              bool                       isCube  = false);
+
+  nvvk::SparseImage createSparseImage(VkImageCreateInfo           info_,
+                                      const VkMemoryPropertyFlags memUsage_ = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+
+
+  void flushSparseImage(SparseImage& sparseImage);
+
+  bool createSparseImagePage(SparseImage& sparseImage, uint32_t pageIndex, uint32_t layer = 0u);
+
+  //--------------------------------------------------------------------------------------------------
+  // Create the acceleration structure
+  //
+  nvvk::AccelNV createAcceleration(VkAccelerationStructureCreateInfoNV& accel_);
+
+
+  //--------------------------------------------------------------------------------------------------
+  // Create the acceleration structure
+  //
+  nvvk::AccelKHR createAcceleration(VkAccelerationStructureCreateInfoKHR& accel_);
+
+  //--------------------------------------------------------------------------------------------------
+  // Acquire a sampler with the provided information (see nvvk::SamplerPool for details).
+  // Every acquire must have an appropriate release for appropriate internal reference counting
+  VkSampler acquireSampler(const VkSamplerCreateInfo& info);
+  void      releaseSampler(VkSampler sampler);
+
+  //--------------------------------------------------------------------------------------------------
+  // implicit staging operations triggered by create are managed here
+  void finalizeStaging(VkFence fence = VK_NULL_HANDLE);
+  void finalizeAndReleaseStaging(VkFence fence = VK_NULL_HANDLE);
+  void releaseStaging();
+
+  StagingMemoryManager*       getStaging();
+  const StagingMemoryManager* getStaging() const;
+
+
+  //--------------------------------------------------------------------------------------------------
+  // Destroy
+  //
+  void destroy(nvvk::Buffer& b_);
+  void destroy(nvvk::Image& i_);
+  void destroy(nvvk::AccelNV& a_);
+  void destroy(nvvk::AccelKHR& a_);
+  void destroy(nvvk::Texture& t_);
+  void destroy(nvvk::SparseImage& i_);
+  // Destroy a sparse image page. Returns true if that page actually was present in memory
+  bool destroy(nvvk::SparseImage& i_, uint32_t pageIndex, uint32_t layer = 0);
+
+  //--------------------------------------------------------------------------------------------------
+  // Other
+  //
+  void* map(const nvvk::Buffer& buffer);
+  void  unmap(const nvvk::Buffer& buffer);
+  void* map(const nvvk::Image& image);
+  void  unmap(const nvvk::Image& image);
+
+  VkDevice         getDevice() const { return m_device; }
+  VkPhysicalDevice getPhysicalDevice() const { return m_physicalDevice; }
+
+
+protected:
+  // If necessary, these can be overriden to specialize the allocation, for instance to
+  // enforce allocation of exportable
+  virtual MemHandle AllocateMemory(const MemAllocateInfo& allocateInfo);
+  virtual void      CreateBufferEx(const VkBufferCreateInfo& info_, VkBuffer* buffer);
+  virtual void      CreateImageEx(const VkImageCreateInfo& info_, VkImage* image);
+
+  //--------------------------------------------------------------------------------------------------
+  // Finding the memory type for memory allocation
+  //
+  uint32_t getMemoryType(uint32_t typeBits, const VkMemoryPropertyFlags& properties);
+
+  VkDevice                              m_device{VK_NULL_HANDLE};
+  VkPhysicalDevice                      m_physicalDevice{VK_NULL_HANDLE};
+  VkPhysicalDeviceMemoryProperties      m_memoryProperties{};
+  MemAllocator*                         m_memAlloc{nullptr};
+  std::unique_ptr<StagingMemoryManager> m_staging;
+  SamplerPool                           m_samplerPool;
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+class DeviceMemoryAllocator;
+
+/** @DOC_START
+ # class nvvk::ResourceAllocatorDma
+ nvvk::ResourceAllocatorDMA is a convencience class owning a nvvk::DMAMemoryAllocator and nvvk::DeviceMemoryAllocator object
+@DOC_END */
+class ResourceAllocatorDma : public ResourceAllocator
+{
+public:
+  ResourceAllocatorDma() = default;
+  ResourceAllocatorDma(VkDevice         device,
+                       VkPhysicalDevice physicalDevice,
+                       VkDeviceSize     stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE,
+                       VkDeviceSize     memBlockSize     = 0);
+  virtual ~ResourceAllocatorDma();
+
+  void init(VkDevice         device,
+            VkPhysicalDevice physicalDevice,
+            VkDeviceSize     stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE,
+            VkDeviceSize     memBlockSize     = 0);
+  // Provided such that ResourceAllocatorDedicated, ResourceAllocatorDma and ResourceAllocatorVma all have the same interface
+  void init(VkInstance,
+            VkDevice         device,
+            VkPhysicalDevice physicalDevice,
+            VkDeviceSize     stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE,
+            VkDeviceSize     memBlockSize     = 0);
+
+  void deinit();
+
+  nvvk::DeviceMemoryAllocator*       getDMA() { return m_dma.get(); }
+  const nvvk::DeviceMemoryAllocator* getDMA() const { return m_dma.get(); }
+
+protected:
+  std::unique_ptr<nvvk::DeviceMemoryAllocator> m_dma;
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/** @DOC_START
+ # class nvvk::ResourceAllocatorDedicated
+ >  nvvk::ResourceAllocatorDedicated is a convencience class automatically creating and owning a DedicatedMemoryAllocator object
+ @DOC_END */
+class ResourceAllocatorDedicated : public ResourceAllocator
+{
+public:
+  ResourceAllocatorDedicated() = default;
+  ResourceAllocatorDedicated(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+  virtual ~ResourceAllocatorDedicated();
+
+  void init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+  // Provided such that ResourceAllocatorDedicated, ResourceAllocatorDma and ResourceAllocatorVma all have the same interface
+  void init(VkInstance, VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+
+  void deinit();
+
+protected:
+  std::unique_ptr<MemAllocator> m_memAlloc;
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/** @DOC_START
+ #class nvvk::ExportResourceAllocator
+
+ ExportResourceAllocator specializes the object allocation process such that resulting memory allocations are
+ exportable and buffers and images can be bound to external memory.
+@DOC_END */
+class ExportResourceAllocator : public ResourceAllocator
+{
+public:
+  ExportResourceAllocator() = default;
+  ExportResourceAllocator(VkDevice         device,
+                          VkPhysicalDevice physicalDevice,
+                          MemAllocator*    memAlloc,
+                          VkDeviceSize     stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+
+protected:
+  virtual MemHandle AllocateMemory(const MemAllocateInfo& allocateInfo) override;
+  virtual void      CreateBufferEx(const VkBufferCreateInfo& info_, VkBuffer* buffer) override;
+  virtual void      CreateImageEx(const VkImageCreateInfo& info_, VkImage* image) override;
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/** @DOC_START
+ # class nvvk::ExportResourceAllocatorDedicated
+ nvvk::ExportResourceAllocatorDedicated is a resource allocator that is using DedicatedMemoryAllocator to allocate memory
+ and at the same time it'll make all allocations exportable.
+@DOC_END */
+class ExportResourceAllocatorDedicated : public ExportResourceAllocator
+{
+public:
+  ExportResourceAllocatorDedicated() = default;
+  ExportResourceAllocatorDedicated(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+  virtual ~ExportResourceAllocatorDedicated() override;
+
+  void init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+  void deinit();
+
+protected:
+  std::unique_ptr<MemAllocator> m_memAlloc;
+};
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/** @DOC_START
+ # class nvvk::ExplicitDeviceMaskResourceAllocator
+ nvvk::ExplicitDeviceMaskResourceAllocator is a resource allocator that will inject a specific devicemask into each
+ allocation, making the created allocations and objects available to only the devices in the mask.
+@DOC_END */
+class ExplicitDeviceMaskResourceAllocator : public ResourceAllocator
+{
+public:
+  ExplicitDeviceMaskResourceAllocator() = default;
+  ExplicitDeviceMaskResourceAllocator(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, uint32_t deviceMask);
+
+  void init(VkDevice device, VkPhysicalDevice physicalDevice, MemAllocator* memAlloc, uint32_t deviceMask);
+
+protected:
+  virtual MemHandle AllocateMemory(const MemAllocateInfo& allocateInfo) override;
+
+  uint32_t m_deviceMask;
+};
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/samplers_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/samplers_vk.cpp
@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include "samplers_vk.hpp"
+
+namespace nvvk {
+//////////////////////////////////////////////////////////////////////////
+
+void SamplerPool::deinit()
+{
+  if(!m_device)
+    return;
+
+  for(auto it : m_entries)
+  {
+    if(it.sampler)
+    {
+      vkDestroySampler(m_device, it.sampler, nullptr);
+    }
+  }
+
+  m_freeIndex = ~0;
+  m_entries.clear();
+  m_samplerMap.clear();
+  m_stateMap.clear();
+  m_device = nullptr;
+}
+
+VkSampler SamplerPool::acquireSampler(const VkSamplerCreateInfo& createInfo)
+{
+  SamplerState state;
+  state.createInfo = createInfo;
+
+  const Chain* ext = (const Chain*)createInfo.pNext;
+  while(ext)
+  {
+    switch(ext->sType)
+    {
+      case VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO:
+        state.reduction = *(const VkSamplerReductionModeCreateInfo*)ext;
+        break;
+      case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO:
+        state.ycbr = *(const VkSamplerYcbcrConversionCreateInfo*)ext;
+        break;
+      default:
+        assert(0 && "unsupported sampler create");
+    }
+    ext = ext->pNext;
+  }
+  // always remove pointers for comparison lookup
+  state.createInfo.pNext = nullptr;
+  state.reduction.pNext  = nullptr;
+  state.ycbr.pNext       = nullptr;
+
+  auto it = m_stateMap.find(state);
+  if(it == m_stateMap.end())
+  {
+    uint32_t index = 0;
+    if(m_freeIndex != ~0)
+    {
+      index       = m_freeIndex;
+      m_freeIndex = m_entries[index].nextFreeIndex;
+    }
+    else
+    {
+      index = (uint32_t)m_entries.size();
+      m_entries.resize(m_entries.size() + 1);
+    }
+
+    VkSampler sampler;
+    VkResult  result = vkCreateSampler(m_device, &createInfo, nullptr, &sampler);
+    assert(result == VK_SUCCESS);
+
+    m_entries[index].refCount = 1;
+    m_entries[index].sampler  = sampler;
+    m_entries[index].state    = state;
+
+    m_stateMap.insert({state, index});
+    m_samplerMap.insert({sampler, index});
+
+    return sampler;
+  }
+  else
+  {
+    m_entries[it->second].refCount++;
+    return m_entries[it->second].sampler;
+  }
+}
+
+void SamplerPool::releaseSampler(VkSampler sampler)
+{
+  auto it = m_samplerMap.find(sampler);
+  assert(it != m_samplerMap.end());
+
+  uint32_t index = it->second;
+  Entry&   entry = m_entries[index];
+
+  assert(entry.sampler == sampler);
+  assert(entry.refCount);
+
+  entry.refCount--;
+
+  if(!entry.refCount)
+  {
+    vkDestroySampler(m_device, sampler, nullptr);
+    entry.sampler       = nullptr;
+    entry.nextFreeIndex = m_freeIndex;
+    m_freeIndex         = index;
+
+    m_stateMap.erase(entry.state);
+    m_samplerMap.erase(sampler);
+  }
+}
+
+VkSamplerCreateInfo makeSamplerCreateInfo(VkFilter             magFilter,
+                                          VkFilter             minFilter,
+                                          VkSamplerAddressMode addressModeU,
+                                          VkSamplerAddressMode addressModeV,
+                                          VkSamplerAddressMode addressModeW,
+                                          VkBool32             anisotropyEnable,
+                                          float                maxAnisotropy,
+                                          VkSamplerMipmapMode  mipmapMode,
+                                          float                minLod,
+                                          float                maxLod,
+                                          float                mipLodBias,
+                                          VkBool32             compareEnable,
+                                          VkCompareOp          compareOp,
+                                          VkBorderColor        borderColor,
+                                          VkBool32             unnormalizedCoordinates)
+{
+  VkSamplerCreateInfo samplerInfo     = {VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO};
+  samplerInfo.flags                   = 0;
+  samplerInfo.pNext                   = nullptr;
+  samplerInfo.magFilter               = magFilter;
+  samplerInfo.minFilter               = minFilter;
+  samplerInfo.mipmapMode              = mipmapMode;
+  samplerInfo.addressModeU            = addressModeU;
+  samplerInfo.addressModeV            = addressModeV;
+  samplerInfo.addressModeW            = addressModeW;
+  samplerInfo.anisotropyEnable        = anisotropyEnable;
+  samplerInfo.maxAnisotropy           = maxAnisotropy;
+  samplerInfo.borderColor             = borderColor;
+  samplerInfo.unnormalizedCoordinates = unnormalizedCoordinates;
+  samplerInfo.compareEnable           = compareEnable;
+  samplerInfo.compareOp               = compareOp;
+  return samplerInfo;
+}
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/samplers_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/samplers_vk.hpp
@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2020-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#pragma once
+
+#include <vulkan/vulkan_core.h>
+
+#include <assert.h>
+#include <float.h>
+#include <functional>
+#include <string.h>  //memcmp
+#include <unordered_map>
+#include <vector>
+
+#include "nvh/container_utils.hpp"
+
+namespace nvvk {
+//////////////////////////////////////////////////////////////////////////
+/** @DOC_START
+  # class nvvk::SamplerPool
+
+  This nvvk::SamplerPool class manages unique VkSampler objects. To minimize the total
+  number of sampler objects, this class ensures that identical configurations
+  return the same sampler
+
+  Example :
+  ```cpp
+  nvvk::SamplerPool pool(device);
+
+  for (auto it : textures) {
+    VkSamplerCreateInfo info = {...};
+
+    // acquire ensures we create the minimal subset of samplers
+    it.sampler = pool.acquireSampler(info);
+  }
+
+  // you can manage releases individually, or just use deinit/destructor of pool
+  for (auto it : textures) {
+    pool.releaseSampler(it.sampler);
+  }
+  ```
+
+  - makeSamplerCreateInfo : aids for sampler creation
+
+@DOC_END */
+
+class SamplerPool
+{
+public:
+  SamplerPool(SamplerPool const&)            = delete;
+  SamplerPool& operator=(SamplerPool const&) = delete;
+
+  SamplerPool() {}
+  SamplerPool(VkDevice device) { init(device); }
+  ~SamplerPool() { deinit(); }
+
+  void init(VkDevice device) { m_device = device; }
+  void deinit();
+
+  // creates a new sampler or re-uses an existing one with ref-count
+  // createInfo may contain VkSamplerReductionModeCreateInfo and VkSamplerYcbcrConversionCreateInfo
+  VkSampler acquireSampler(const VkSamplerCreateInfo& createInfo);
+
+  // decrements ref-count and destroys sampler if possible
+  void releaseSampler(VkSampler sampler);
+
+private:
+  struct SamplerState
+  {
+    VkSamplerCreateInfo                createInfo;
+    VkSamplerReductionModeCreateInfo   reduction;
+    VkSamplerYcbcrConversionCreateInfo ycbr;
+
+    SamplerState() { memset(this, 0, sizeof(SamplerState)); }
+
+    bool operator==(const SamplerState& other) const { return memcmp(this, &other, sizeof(SamplerState)) == 0; }
+  };
+
+
+  struct Chain
+  {
+    VkStructureType sType;
+    const Chain*    pNext;
+  };
+
+  struct Entry
+  {
+    VkSampler    sampler       = nullptr;
+    uint32_t     nextFreeIndex = ~0;
+    uint32_t     refCount      = 0;
+    SamplerState state;
+  };
+
+  VkDevice           m_device    = nullptr;
+  uint32_t           m_freeIndex = ~0;
+  std::vector<Entry> m_entries;
+
+  std::unordered_map<SamplerState, uint32_t, nvh::HashAligned32<SamplerState>> m_stateMap;
+  std::unordered_map<VkSampler, uint32_t>                                      m_samplerMap;
+};
+
+VkSamplerCreateInfo makeSamplerCreateInfo(VkFilter             magFilter        = VK_FILTER_LINEAR,
+                                          VkFilter             minFilter        = VK_FILTER_LINEAR,
+                                          VkSamplerAddressMode addressModeU     = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+                                          VkSamplerAddressMode addressModeV     = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+                                          VkSamplerAddressMode addressModeW     = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+                                          VkBool32             anisotropyEnable = VK_FALSE,
+                                          float                maxAnisotropy    = 16,
+                                          VkSamplerMipmapMode  mipmapMode       = VK_SAMPLER_MIPMAP_MODE_LINEAR,
+                                          float                minLod           = 0.0f,
+                                          float                maxLod           = FLT_MAX,
+                                          float                mipLodBias       = 0.0f,
+                                          VkBool32             compareEnable    = VK_FALSE,
+                                          VkCompareOp          compareOp        = VK_COMPARE_OP_ALWAYS,
+                                          VkBorderColor        borderColor      = VK_BORDER_COLOR_INT_OPAQUE_BLACK,
+                                          VkBool32             unnormalizedCoordinates = VK_FALSE);
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/sbtwrapper_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/sbtwrapper_vk.cpp
@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "sbtwrapper_vk.hpp"
+
+#include "nvvk/commands_vk.hpp"
+#include "nvvk/debug_util_vk.hpp"
+#include "nvvk/error_vk.hpp"
+#include "nvh/nvprint.hpp"
+#include "nvh/alignment.hpp"
+
+using namespace nvvk;
+
+//--------------------------------------------------------------------------------------------------
+// Default setup
+//
+void nvvk::SBTWrapper::setup(VkDevice                                               device,
+                             uint32_t                                               familyIndex,
+                             nvvk::ResourceAllocator*                               allocator,
+                             const VkPhysicalDeviceRayTracingPipelinePropertiesKHR& rtProperties)
+{
+  m_device     = device;
+  m_queueIndex = familyIndex;
+  m_pAlloc     = allocator;
+  m_debug.setup(device);
+
+  m_handleSize               = rtProperties.shaderGroupHandleSize;       // Size of a program identifier
+  m_handleAlignment          = rtProperties.shaderGroupHandleAlignment;  // Alignment in bytes for each SBT entry
+  m_shaderGroupBaseAlignment = rtProperties.shaderGroupBaseAlignment;
+}
+
+//--------------------------------------------------------------------------------------------------
+// Destroying the allocated buffers and clearing all vectors
+//
+void SBTWrapper::destroy()
+{
+  if(m_pAlloc)
+  {
+    for(auto& b : m_buffer)
+      m_pAlloc->destroy(b);
+  }
+
+  for(auto& i : m_index)
+    i = {};
+}
+
+//--------------------------------------------------------------------------------------------------
+// Finding the handle index position of each group type in the pipeline creation info.
+// If the pipeline was created like: raygen, miss, hit, miss, hit, hit
+// The result will be: raygen[0], miss[1, 3], hit[2, 4, 5], callable[]
+//
+void SBTWrapper::addIndices(VkRayTracingPipelineCreateInfoKHR                     rayPipelineInfo,
+                            const std::vector<VkRayTracingPipelineCreateInfoKHR>& libraries)
+{
+  for(auto& i : m_index)
+    i = {};
+
+  // Libraries contain stages referencing their internal groups. When those groups
+  // are used in the final pipeline we need to offset them to ensure each group has
+  // a unique index
+  uint32_t groupOffset = 0;
+
+  for(size_t i = 0; i < libraries.size() + 1; i++)
+  {
+    // When using libraries, their groups and stages are appended after the groups and
+    // stages defined in the main VkRayTracingPipelineCreateInfoKHR
+    const auto& info = (i == 0) ? rayPipelineInfo : libraries[i - 1];
+
+    // Finding the handle position of each group, splitting by raygen, miss and hit group
+    for(uint32_t g = 0; g < info.groupCount; g++)
+    {
+      if(info.pGroups[g].type == VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR)
+      {
+        uint32_t genShader = info.pGroups[g].generalShader;
+        assert(genShader < info.stageCount);
+        if(info.pStages[genShader].stage == VK_SHADER_STAGE_RAYGEN_BIT_KHR)
+        {
+          m_index[eRaygen].push_back(g + groupOffset);
+        }
+        else if(info.pStages[genShader].stage == VK_SHADER_STAGE_MISS_BIT_KHR)
+        {
+          m_index[eMiss].push_back(g + groupOffset);
+        }
+        else if(info.pStages[genShader].stage == VK_SHADER_STAGE_CALLABLE_BIT_KHR)
+        {
+          m_index[eCallable].push_back(g + groupOffset);
+        }
+      }
+      else
+      {
+        m_index[eHit].push_back(g + groupOffset);
+      }
+    }
+
+    groupOffset += info.groupCount;
+  }
+}
+
+//--------------------------------------------------------------------------------------------------
+// This function creates 4 buffers, for raygen, miss, hit and callable shader.
+// Each buffer will have the handle + 'data (if any)', .. n-times they have entries in the pipeline.
+//
+void SBTWrapper::create(VkPipeline                                            rtPipeline,
+                        VkRayTracingPipelineCreateInfoKHR                     rayPipelineInfo /*= {}*/,
+                        const std::vector<VkRayTracingPipelineCreateInfoKHR>& librariesInfo /*= {}*/)
+{
+  for(auto& b : m_buffer)
+    m_pAlloc->destroy(b);
+
+  // Get the total number of groups and handle index position
+  uint32_t              totalGroupCount{0};
+  std::vector<uint32_t> groupCountPerInput;
+  // A pipeline is defined by at least its main VkRayTracingPipelineCreateInfoKHR, plus a number of external libraries
+  groupCountPerInput.reserve(1 + librariesInfo.size());
+  if(rayPipelineInfo.sType == VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR)
+  {
+    addIndices(rayPipelineInfo, librariesInfo);
+    groupCountPerInput.push_back(rayPipelineInfo.groupCount);
+    totalGroupCount += rayPipelineInfo.groupCount;
+    for(const auto& lib : librariesInfo)
+    {
+      groupCountPerInput.push_back(lib.groupCount);
+      totalGroupCount += lib.groupCount;
+    }
+  }
+  else
+  {
+    // Find how many groups when added manually, by finding the largest index and adding 1
+    // See also addIndex for manual entries
+    for(auto& i : m_index)
+    {
+      if(!i.empty())
+        totalGroupCount = std::max(totalGroupCount, *std::max_element(std::begin(i), std::end(i)));
+    }
+    totalGroupCount++;
+    groupCountPerInput.push_back(totalGroupCount);
+  }
+
+  // Fetch all the shader handles used in the pipeline, so that they can be written in the SBT
+  uint32_t             sbtSize = totalGroupCount * m_handleSize;
+  std::vector<uint8_t> shaderHandleStorage(sbtSize);
+
+  NVVK_CHECK(vkGetRayTracingShaderGroupHandlesKHR(m_device, rtPipeline, 0, totalGroupCount, sbtSize, shaderHandleStorage.data()));
+  // Find the max stride, minimum is the handle size + size of 'data (if any)' aligned to shaderGroupBaseAlignment
+  auto findStride = [&](auto entry, auto& stride) {
+    stride = nvh::align_up(m_handleSize, m_handleAlignment);  // minimum stride
+    for(auto& e : entry)
+    {
+      // Find the largest data + handle size, all aligned
+      uint32_t dataHandleSize =
+          nvh::align_up(static_cast<uint32_t>(m_handleSize + e.second.size() * sizeof(uint8_t)), m_handleAlignment);
+      stride = std::max(stride, dataHandleSize);
+    }
+  };
+  findStride(m_data[eRaygen], m_stride[eRaygen]);
+  findStride(m_data[eMiss], m_stride[eMiss]);
+  findStride(m_data[eHit], m_stride[eHit]);
+  findStride(m_data[eCallable], m_stride[eCallable]);
+
+  // Special case, all Raygen must start aligned on GroupBase
+  m_stride[eRaygen] = nvh::align_up(m_stride[eRaygen], m_shaderGroupBaseAlignment);
+
+  // Buffer holding the staging information
+  std::array<std::vector<uint8_t>, 4> stage;
+  stage[eRaygen]   = std::vector<uint8_t>(m_stride[eRaygen] * indexCount(eRaygen));
+  stage[eMiss]     = std::vector<uint8_t>(m_stride[eMiss] * indexCount(eMiss));
+  stage[eHit]      = std::vector<uint8_t>(m_stride[eHit] * indexCount(eHit));
+  stage[eCallable] = std::vector<uint8_t>(m_stride[eCallable] * indexCount(eCallable));
+
+  // Write the handles in the SBT buffer + data info (if any)
+  auto copyHandles = [&](std::vector<uint8_t>& buffer, std::vector<uint32_t>& indices, uint32_t stride, auto& data) {
+    auto* pBuffer = buffer.data();
+    for(uint32_t index = 0; index < static_cast<uint32_t>(indices.size()); index++)
+    {
+      auto* pStart = pBuffer;
+      // Copy the handle
+      memcpy(pBuffer, shaderHandleStorage.data() + (indices[index] * m_handleSize), m_handleSize);
+      // If there is data for this group index, copy it too
+      auto it = data.find(index);
+      if(it != std::end(data))
+      {
+        pBuffer += m_handleSize;
+        memcpy(pBuffer, it->second.data(), it->second.size() * sizeof(uint8_t));
+      }
+      pBuffer = pStart + stride;  // Jumping to next group
+    }
+  };
+
+  // Copy the handles/data to each staging buffer
+  copyHandles(stage[eRaygen], m_index[eRaygen], m_stride[eRaygen], m_data[eRaygen]);
+  copyHandles(stage[eMiss], m_index[eMiss], m_stride[eMiss], m_data[eMiss]);
+  copyHandles(stage[eHit], m_index[eHit], m_stride[eHit], m_data[eHit]);
+  copyHandles(stage[eCallable], m_index[eCallable], m_stride[eCallable], m_data[eCallable]);
+
+  // Creating device local buffers where handles will be stored
+  auto usage_flags = VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR;
+  auto mem_flags   = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+  nvvk::CommandPool genCmdBuf(m_device, m_queueIndex);
+  VkCommandBuffer   cmdBuf = genCmdBuf.createCommandBuffer();
+  for(uint32_t i = 0; i < 4; i++)
+  {
+    if(!stage[i].empty())
+    {
+      m_buffer[i] = m_pAlloc->createBuffer(cmdBuf, stage[i], usage_flags, mem_flags);
+      NAME_IDX_VK(m_buffer[i].buffer, i);
+    }
+  }
+  genCmdBuf.submitAndWait(cmdBuf);
+  m_pAlloc->finalizeAndReleaseStaging();
+}
+
+VkDeviceAddress SBTWrapper::getAddress(GroupType t)
+{
+  if(m_buffer[t].buffer == VK_NULL_HANDLE)
+    return 0;
+  VkBufferDeviceAddressInfo i{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, nullptr, m_buffer[t].buffer};
+  return vkGetBufferDeviceAddress(m_device, &i);  // Aligned on VkMemoryRequirements::alignment which includes shaderGroupBaseAlignment
+}
+
+const VkStridedDeviceAddressRegionKHR SBTWrapper::getRegion(GroupType t, uint32_t indexOffset)
+{
+  return VkStridedDeviceAddressRegionKHR{getAddress(t) + indexOffset * getStride(t), getStride(t), getSize(t)};
+}
+
+const std::array<VkStridedDeviceAddressRegionKHR, 4> SBTWrapper::getRegions(uint32_t rayGenIndexOffset)
+{
+  std::array<VkStridedDeviceAddressRegionKHR, 4> regions{getRegion(eRaygen, rayGenIndexOffset), getRegion(eMiss),
+                                                         getRegion(eHit), getRegion(eCallable)};
+  return regions;
+}
--- a/raytracer/nvpro_core/nvvk/sbtwrapper_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/sbtwrapper_vk.hpp
@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+
+/** @DOC_START
+
+# class nvvk::SBTWrapper
+
+nvvk::SBTWrapper is a generic SBT builder from the ray tracing pipeline
+
+The builder will iterate through the pipeline create info `VkRayTracingPipelineCreateInfoKHR`
+to find the number of raygen, miss, hit and callable shader groups were created. 
+The handles for those group will be retrieved from the pipeline and written in the right order in
+separated buffer.
+
+Convenient functions exist to retrieve all information to be used in TraceRayKHR.
+
+## Usage
+- Setup the builder (`setup()`)
+- After the pipeline creation, call `create()` with the same info used for the creation of the pipeline.
+- Use `getRegions()` to get all the vk::StridedDeviceAddressRegionKHR needed by TraceRayKHR()
+
+
+### Example
+```cpp
+m_sbtWrapper.setup(m_device, m_graphicsQueueIndex, &m_alloc, m_rtProperties);
+// ...
+m_sbtWrapper.create(m_rtPipeline, rayPipelineInfo);
+// ...
+auto& regions = m_stbWrapper.getRegions();
+vkCmdTraceRaysKHR(cmdBuf, &regions[0], &regions[1], &regions[2], &regions[3], size.width, size.height, 1);
+```
+
+
+## Extra
+
+If data are attached to a shader group (see shaderRecord), it need to be provided independently.
+In this case, the user must know the group index for the group type. 
+
+Here the Hit group 1 and 2 has data, but not the group 0. 
+Those functions must be called before create.
+
+```cpp
+m_sbtWrapper.addData(SBTWrapper::eHit, 1, m_hitShaderRecord[0]);
+m_sbtWrapper.addData(SBTWrapper::eHit, 2, m_hitShaderRecord[1]);
+```
+
+
+## Special case
+
+It is also possible to create a pipeline with only a few groups but having a SBT representing many more groups. 
+
+The following example shows a more complex setup. 
+There are: 1 x raygen, 2 x miss, 2 x hit.
+BUT the SBT will have 3 hit by duplicating the second hit in its table.
+So, the same hit shader defined in the pipeline, can be called with different data.
+
+In this case, the use must provide manually the information to the SBT. 
+All extra group must be explicitly added. 
+
+The following show how to get handle indices provided in the pipeline, and we are adding another hit group, re-using the 4th pipeline entry.
+Note: we are not providing the pipelineCreateInfo, because we are manually defining it.
+
+```cpp
+// Manually defining group indices
+m_sbtWrapper.addIndices(rayPipelineInfo); // Add raygen(0), miss(1), miss(2), hit(3), hit(4) from the pipeline info
+m_sbtWrapper.addIndex(SBTWrapper::eHit, 4);  // Adding a 3rd hit, duplicate from the hit:1, which make hit:2 available.
+m_sbtWrapper.addHitData(SBTWrapper::eHit, 2, m_hitShaderRecord[1]); // Adding data to this hit shader
+m_sbtWrapper.create(m_rtPipeline);
+```
+
+@DOC_END */
+
+#include <array>
+
+#include "nvvk/resourceallocator_vk.hpp"
+#include "nvvk/debug_util_vk.hpp"
+
+namespace nvvk {
+class SBTWrapper
+{
+public:
+  enum GroupType
+  {
+    eRaygen,
+    eMiss,
+    eHit,
+    eCallable
+  };
+
+  void setup(VkDevice                                               device,
+             uint32_t                                               familyIndex,
+             nvvk::ResourceAllocator*                               allocator,
+             const VkPhysicalDeviceRayTracingPipelinePropertiesKHR& rtProperties);
+  void destroy();
+
+  // To call after the ray tracer pipeline creation
+  // The rayPipelineInfo parameter is the structure used to define the pipeline,
+  // while librariesInfo describe the potential input pipeline libraries
+  void create(VkPipeline                                            rtPipeline,
+              VkRayTracingPipelineCreateInfoKHR                     rayPipelineInfo = {},
+              const std::vector<VkRayTracingPipelineCreateInfoKHR>& librariesInfo   = {});
+
+  // Optional, to be used in combination with addIndex. Leave create() `rayPipelineInfo`
+  // and 'librariesInfo' empty.  The rayPipelineInfo parameter is the structure used to
+  // define the pipeline, while librariesInfo describe the potential input pipeline libraries
+  void addIndices(VkRayTracingPipelineCreateInfoKHR                     rayPipelineInfo,
+                  const std::vector<VkRayTracingPipelineCreateInfoKHR>& libraries = {});
+
+  // Pushing back a GroupType and the handle pipeline index to use
+  // i.e addIndex(eHit, 3) is pushing a Hit shader group using the 3rd entry in the pipeline
+  void addIndex(GroupType t, uint32_t index) { m_index[t].push_back(index); }
+
+  // Adding 'Shader Record' data to the group index.
+  // i.e. addData(eHit, 0, myValue) is adding 'myValue' to the HIT group 0.
+  template <typename T>
+  void addData(GroupType t, uint32_t groupIndex, T& data)
+  {
+    addData(t, groupIndex, (uint8_t*)&data, sizeof(T));
+  }
+
+  void addData(GroupType t, uint32_t groupIndex, uint8_t* data, size_t dataSize)
+  {
+    std::vector<uint8_t> dst(data, data + dataSize);
+    m_data[t][groupIndex] = dst;
+  }
+
+  // Getters
+  uint32_t        indexCount(GroupType t) { return static_cast<uint32_t>(m_index[t].size()); }
+  uint32_t        getStride(GroupType t) { return m_stride[t]; }
+  VkDeviceAddress getAddress(GroupType t);
+
+  // returns the entire size of a group. Raygen Stride and Size must be equal, even if the buffer contains many of them.
+  uint32_t getSize(GroupType t) { return t == eRaygen ? getStride(eRaygen) : getStride(t) * indexCount(t); }
+
+  // Return the address region of a group. indexOffset allow to offset the starting shader of the group.
+  const VkStridedDeviceAddressRegionKHR getRegion(GroupType t, uint32_t indexOffset = 0);
+
+  // Return the address regions of all groups. The offset allows to select which RayGen to use.
+  const std::array<VkStridedDeviceAddressRegionKHR, 4> getRegions(uint32_t rayGenIndexOffset = 0);
+
+
+private:
+  using entry = std::unordered_map<uint32_t, std::vector<uint8_t>>;
+
+  std::array<std::vector<uint32_t>, 4> m_index;               // Offset index in pipeline
+  std::array<nvvk::Buffer, 4>          m_buffer;              // Buffer of handles + data
+  std::array<uint32_t, 4>              m_stride{0, 0, 0, 0};  // Stride of each group
+  std::array<entry, 4>                 m_data;                // Local data to groups (Shader Record)
+
+  uint32_t m_handleSize{0};
+  uint32_t m_handleAlignment{0};
+  uint32_t m_shaderGroupBaseAlignment{0};
+
+  VkDevice                 m_device{VK_NULL_HANDLE};
+  nvvk::ResourceAllocator* m_pAlloc{nullptr};  // Allocator for buffer, images, acceleration structures
+  nvvk::DebugUtil          m_debug;            // Utility to name objects
+  uint32_t                 m_queueIndex{0};
+};
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/shadermodulemanager_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/shadermodulemanager_vk.cpp
@ -0,0 +1,582 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include "shadermodulemanager_vk.hpp"
+#include <algorithm>
+#include <assert.h>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <stdarg.h>
+#include <stdio.h>
+
+#include <nvh/fileoperations.hpp>
+#include <nvh/nvprint.hpp>
+
+#if NVP_SUPPORTS_SHADERC
+#include <shaderc/shaderc.hpp>
+#endif
+
+#define NV_LINE_MARKERS 1
+
+
+namespace nvvk {
+
+const VkShaderModule ShaderModuleManager::PREPROCESS_ONLY_MODULE = (VkShaderModule)~0;
+
+#if NVP_SUPPORTS_SHADERC
+// Shared shaderc compiler, and reference count + mutex protecting it.
+shaderc_compiler_t ShaderModuleManager::s_shadercCompiler = nullptr;
+uint32_t           ShaderModuleManager::s_shadercCompilerUsers{0};
+std::mutex         ShaderModuleManager::s_shadercCompilerMutex;
+
+// Adapts the include file loader of nvh::ShaderFileManager to what shaderc expects.
+class ShadercIncludeBridge : public shaderc::CompileOptions::IncluderInterface
+{
+  // Borrowed pointer to our include file loader.
+  nvvk::ShaderModuleManager* m_pShaderFileManager;
+
+  // Inputs/outputs reused for manualInclude.
+  std::string       m_filenameFound;
+  const std::string m_emptyString;
+
+  // Subtype of shaderc_include_result that holds the include data
+  // we found; MUST be static_cast to this type before delete-ing as
+  // shaderc_include_result lacks virtual destructor.
+  class Result : public shaderc_include_result
+  {
+    // Containers for actual data; shaderc_include_result pointers
+    // point to data held within.
+    const std::string m_content;
+    const std::string m_filenameFound;
+
+  public:
+    Result(std::string content, std::string filenameFound)
+        : m_content(std::move(content))
+        , m_filenameFound(std::move(filenameFound))
+    {
+      this->source_name        = m_filenameFound.data();
+      this->source_name_length = m_filenameFound.size();
+      this->content            = m_content.data();
+      this->content_length     = m_content.size();
+      this->user_data          = nullptr;
+    }
+  };
+
+public:
+  ShadercIncludeBridge(nvvk::ShaderModuleManager* pShaderFileManager) { m_pShaderFileManager = pShaderFileManager; }
+
+  // Handles shaderc_include_resolver_fn callbacks.
+  virtual shaderc_include_result* GetInclude(const char*          requested_source,
+                                             shaderc_include_type type,
+                                             const char*          requesting_source,
+                                             size_t /*include_depth*/) override
+  {
+    std::string filename = requested_source;
+    std::string includeFileText;
+    bool versionFound = false;  // Trying to match glslc behavior: it doesn't allow #version directives in include files.
+    if(type == shaderc_include_type_relative)  // "header.h"
+    {
+      includeFileText = m_pShaderFileManager->getContentWithRequestingSourceDirectory(filename, m_filenameFound, requesting_source);
+    }
+    else  // shaderc_include_type_standard <header.h>
+    {
+      includeFileText = m_pShaderFileManager->getContent(filename, m_filenameFound);
+    }
+    std::string content = m_pShaderFileManager->manualIncludeText(includeFileText, m_filenameFound, m_emptyString, versionFound);
+    return new Result(std::move(content), std::move(m_filenameFound));
+  }
+
+  // Handles shaderc_include_result_release_fn callbacks.
+  virtual void ReleaseInclude(shaderc_include_result* data) override { delete static_cast<Result*>(data); }
+
+  // Set as the includer for the given shaderc_compile_options_t.
+  // This ShadercIncludeBridge MUST not be destroyed while in-use by a
+  // shaderc compiler using these options.
+  void setAsIncluder(shaderc_compile_options_t options)
+  {
+    shaderc_compile_options_set_include_callbacks(
+        options,
+        [](void* pvShadercIncludeBridge, const char* requestedSource, int type, const char* requestingSource, size_t includeDepth) {
+          return static_cast<ShadercIncludeBridge*>(pvShadercIncludeBridge)
+              ->GetInclude(requestedSource, (shaderc_include_type)type, requestingSource, includeDepth);
+        },
+        [](void* pvShadercIncludeBridge, shaderc_include_result* includeResult) {
+          return static_cast<ShadercIncludeBridge*>(pvShadercIncludeBridge)->ReleaseInclude(includeResult);
+        },
+        this);
+  }
+};
+#endif /* NVP_SUPPORTS_SHADERC */
+
+std::string ShaderModuleManager::DefaultInterface::getTypeDefine(uint32_t type) const
+{
+  switch(type)
+  {
+    case VK_SHADER_STAGE_VERTEX_BIT:
+      return "#define _VERTEX_SHADER_ 1\n";
+    case VK_SHADER_STAGE_FRAGMENT_BIT:
+      return "#define _FRAGMENT_SHADER_ 1\n";
+    case VK_SHADER_STAGE_COMPUTE_BIT:
+      return "#define _COMPUTE_SHADER_ 1\n";
+    case VK_SHADER_STAGE_GEOMETRY_BIT:
+      return "#define _GEOMETRY_SHADER_ 1\n";
+    case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
+      return "#define _TESS_CONTROL_SHADER_ 1\n";
+    case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
+      return "#define _TESS_EVALUATION_SHADER_ 1\n";
+#if VK_NV_mesh_shader
+    case VK_SHADER_STAGE_MESH_BIT_NV:
+      return "#define _MESH_SHADER_ 1\n";
+    case VK_SHADER_STAGE_TASK_BIT_NV:
+      return "#define _TASK_SHADER_ 1\n";
+#endif
+#if VK_NV_ray_tracing
+    case VK_SHADER_STAGE_RAYGEN_BIT_NV:
+      return "#define _RAY_GENERATION_SHADER_ 1\n";
+    case VK_SHADER_STAGE_ANY_HIT_BIT_NV:
+      return "#define _RAY_ANY_HIT_SHADER_ 1\n";
+    case VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV:
+      return "#define _RAY_CLOSEST_HIT_SHADER_ 1\n";
+    case VK_SHADER_STAGE_MISS_BIT_NV:
+      return "#define _RAY_MISS_SHADER_ 1\n";
+    case VK_SHADER_STAGE_INTERSECTION_BIT_NV:
+      return "#define _RAY_INTERSECTION_SHADER_ 1\n";
+    case VK_SHADER_STAGE_CALLABLE_BIT_NV:
+      return "#define _RAY_CALLABLE_BIT_SHADER_ 1\n";
+#endif
+  }
+
+  return std::string();
+}
+
+uint32_t ShaderModuleManager::DefaultInterface::getTypeShadercKind(uint32_t type) const
+{
+#if NVP_SUPPORTS_SHADERC
+  switch(type)
+  {
+    case VK_SHADER_STAGE_VERTEX_BIT:
+      return shaderc_glsl_vertex_shader;
+    case VK_SHADER_STAGE_FRAGMENT_BIT:
+      return shaderc_glsl_fragment_shader;
+    case VK_SHADER_STAGE_COMPUTE_BIT:
+      return shaderc_glsl_compute_shader;
+    case VK_SHADER_STAGE_GEOMETRY_BIT:
+      return shaderc_glsl_geometry_shader;
+    case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
+      return shaderc_glsl_tess_control_shader;
+    case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
+      return shaderc_glsl_tess_evaluation_shader;
+
+#if VK_NV_mesh_shader
+    case VK_SHADER_STAGE_MESH_BIT_NV:
+      return shaderc_glsl_mesh_shader;
+    case VK_SHADER_STAGE_TASK_BIT_NV:
+      return shaderc_glsl_task_shader;
+#endif
+#if VK_NV_ray_tracing
+    case VK_SHADER_STAGE_RAYGEN_BIT_NV:
+      return shaderc_glsl_raygen_shader;
+    case VK_SHADER_STAGE_ANY_HIT_BIT_NV:
+      return shaderc_glsl_anyhit_shader;
+    case VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV:
+      return shaderc_glsl_closesthit_shader;
+    case VK_SHADER_STAGE_MISS_BIT_NV:
+      return shaderc_glsl_miss_shader;
+    case VK_SHADER_STAGE_INTERSECTION_BIT_NV:
+      return shaderc_glsl_intersection_shader;
+    case VK_SHADER_STAGE_CALLABLE_BIT_NV:
+      return shaderc_glsl_callable_shader;
+#endif
+  }
+
+  return shaderc_glsl_infer_from_source;
+#else
+  return 0;
+#endif
+}
+
+bool ShaderModuleManager::setupShaderModule(ShaderModule& module)
+{
+  Definition& definition = module.definition;
+
+  module.module = VK_NULL_HANDLE;
+  if(definition.filetype == FILETYPE_DEFAULT)
+  {
+    definition.filetype = m_filetype;
+  }
+
+  std::string combinedPrepend = m_prepend;
+  std::string combinedFilenames;
+  combinedPrepend += definition.prepend;
+  combinedFilenames += definition.filename;
+
+  if(definition.filetype == FILETYPE_SPIRV)
+  {
+    std::string filenameFound;
+    definition.content = nvh::loadFile(definition.filename, true, m_directories, filenameFound);
+  }
+  else
+  {
+    std::string prepend = m_usedSetupIF->getTypeDefine(definition.type);
+
+    definition.content =
+        manualInclude(definition.filename, definition.filenameFound, prepend + m_prepend + definition.prepend, false);
+  }
+
+  if(definition.content.empty())
+  {
+    return false;
+  }
+
+  if(m_preprocessOnly)
+  {
+    module.module = PREPROCESS_ONLY_MODULE;
+    return true;
+  }
+  else
+  {
+    VkResult                 vkresult         = VK_ERROR_INVALID_SHADER_NV;
+    VkShaderModuleCreateInfo shaderModuleInfo = {VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
+
+#if NVP_SUPPORTS_SHADERC
+    shaderc_compilation_result_t result = nullptr;
+    if(definition.filetype == FILETYPE_GLSL)
+    {
+      std::lock_guard<std::mutex> guard(s_shadercCompilerMutex);
+      shaderc_shader_kind         shaderkind = (shaderc_shader_kind)m_usedSetupIF->getTypeShadercKind(definition.type);
+      shaderc_compile_options_t options = (shaderc_compile_options_t)m_usedSetupIF->getShadercCompileOption(s_shadercCompiler);
+      if(!options)
+      {
+        if(m_apiMajor == 1 && m_apiMinor == 0)
+        {
+          shaderc_compile_options_set_target_env(m_shadercOptions, shaderc_target_env_vulkan, shaderc_env_version_vulkan_1_0);
+        }
+        else if(m_apiMajor == 1 && m_apiMinor == 1)
+        {
+          shaderc_compile_options_set_target_env(m_shadercOptions, shaderc_target_env_vulkan, shaderc_env_version_vulkan_1_1);
+        }
+        else if(m_apiMajor == 1 && m_apiMinor == 2)
+        {
+          shaderc_compile_options_set_target_env(m_shadercOptions, shaderc_target_env_vulkan, shaderc_env_version_vulkan_1_2);
+        }
+        else if(m_apiMajor == 1 && m_apiMinor == 3)
+        {
+          shaderc_compile_options_set_target_env(m_shadercOptions, shaderc_target_env_vulkan, shaderc_env_version_vulkan_1_3);
+        }
+        else
+        {
+          LOGE("nvvk::ShaderModuleManager: Unsupported Vulkan version: %i.%i\n", int(m_apiMajor), int(m_apiMinor));
+          assert(0);
+        }
+
+        shaderc_compile_options_set_optimization_level(m_shadercOptions, m_shadercOptimizationLevel);
+
+        // Keep debug info, doesn't cost shader execution perf, only compile-time and memory size.
+        // Improves usage for debugging tools, not recommended for shipping application,
+        // but good for developmenent builds.
+        shaderc_compile_options_set_generate_debug_info(m_shadercOptions);
+
+        options = m_shadercOptions;
+      }
+
+      // Tell shaderc to use this class (really our base class, nvh::ShaderFileManager) to load include files.
+      ShadercIncludeBridge shadercIncludeBridge(this);
+      shadercIncludeBridge.setAsIncluder(options);
+
+      // Note: need filenameFound, not filename, so that relative includes work.
+      result = shaderc_compile_into_spv(s_shadercCompiler, definition.content.c_str(), definition.content.size(),
+                                        shaderkind, definition.filenameFound.c_str(), "main", options);
+
+      if(!result)
+      {
+        return false;
+      }
+
+      if(shaderc_result_get_compilation_status(result) != shaderc_compilation_status_success)
+      {
+        bool failedToOptimize = strstr(shaderc_result_get_error_message(result), "failed to optimize");
+        int  level            = failedToOptimize ? LOGLEVEL_WARNING : LOGLEVEL_ERROR;
+        nvprintfLevel(level, "%s: optimization_level_performance\n", definition.filename.c_str());
+        nvprintfLevel(level, "  %s\n", definition.prepend.c_str());
+        nvprintfLevel(level, "  %s\n", shaderc_result_get_error_message(result));
+        shaderc_result_release(result);
+
+        if(!failedToOptimize || options != m_shadercOptions)
+        {
+          return false;
+        }
+
+        // try again without optimization
+        shaderc_compile_options_set_optimization_level(m_shadercOptions, shaderc_optimization_level_zero);
+
+        result = shaderc_compile_into_spv(s_shadercCompiler, definition.content.c_str(), definition.content.size(),
+                                          shaderkind, definition.filename.c_str(), "main", options);
+      }
+
+      if(shaderc_result_get_compilation_status(result) != shaderc_compilation_status_success)
+      {
+        LOGE("%s: optimization_level_zero\n", definition.filename.c_str());
+        LOGE("  %s\n", definition.prepend.c_str());
+        LOGE("  %s\n", shaderc_result_get_error_message(result));
+        shaderc_result_release(result);
+        return false;
+      }
+
+      shaderModuleInfo.codeSize = shaderc_result_get_length(result);
+      shaderModuleInfo.pCode    = (const uint32_t*)shaderc_result_get_bytes(result);
+    }
+    else
+#else
+    if(definition.filetype == FILETYPE_GLSL)
+    {
+      LOGW("No direct GLSL support\n");
+      return false;
+    }
+    else
+#endif
+    {
+      shaderModuleInfo.codeSize = definition.content.size();
+      shaderModuleInfo.pCode    = (const uint32_t*)definition.content.c_str();
+    }
+
+    vkresult = ::vkCreateShaderModule(m_device, &shaderModuleInfo, nullptr, &module.module);
+
+    if(vkresult == VK_SUCCESS && m_keepModuleSPIRV)
+    {
+      module.moduleSPIRV = std::string((const char*)shaderModuleInfo.pCode, shaderModuleInfo.codeSize);
+    }
+
+#if NVP_SUPPORTS_SHADERC
+    if(result)
+    {
+      shaderc_result_release(result);
+    }
+#endif
+
+    return vkresult == VK_SUCCESS;
+  }
+}
+
+void ShaderModuleManager::init(VkDevice device, int apiMajor, int apiMinor)
+{
+  assert(!m_device);
+  m_device   = device;
+  m_apiMajor = apiMajor;
+  m_apiMinor = apiMinor;
+
+#if NVP_SUPPORTS_SHADERC
+  // First user initializes compiler.
+  std::lock_guard<std::mutex> lock(s_shadercCompilerMutex);
+  s_shadercCompilerUsers++;
+  if(!s_shadercCompiler)
+  {
+    s_shadercCompiler = shaderc_compiler_initialize();
+  }
+  m_shadercOptions = shaderc_compile_options_initialize();
+#endif
+}
+
+void ShaderModuleManager::deinit()
+{
+  if(m_device)
+  {
+#if NVP_SUPPORTS_SHADERC
+    // Last user de-inits compiler.
+    std::lock_guard<std::mutex> lock(s_shadercCompilerMutex);
+    s_shadercCompilerUsers--;
+    if(s_shadercCompiler && s_shadercCompilerUsers == 0)
+    {
+      shaderc_compiler_release(s_shadercCompiler);
+      s_shadercCompiler = nullptr;
+    }
+    if(m_shadercOptions)
+    {
+      shaderc_compile_options_release(m_shadercOptions);
+    }
+#endif
+  }
+  deleteShaderModules();
+  m_device = nullptr;
+}
+
+ShaderModuleID ShaderModuleManager::createShaderModule(const Definition& definition)
+{
+  ShaderModule module;
+  module.definition = definition;
+
+  setupShaderModule(module);
+
+  // find unused
+  for(size_t i = 0; i < m_shadermodules.size(); i++)
+  {
+    if(m_shadermodules[i].definition.type == 0)
+    {
+      m_shadermodules[i] = module;
+      return i;
+    }
+  }
+
+  m_shadermodules.push_back(module);
+  return m_shadermodules.size() - 1;
+}
+
+ShaderModuleID ShaderModuleManager::createShaderModule(uint32_t           type,
+                                                       std::string const& filename,
+                                                       std::string const& prepend,
+                                                       FileType           fileType /*= FILETYPE_DEFAULT*/,
+                                                       std::string const& entryname /*= "main"*/)
+{
+  Definition def;
+  def.type     = type;
+  def.filename = filename;
+  def.prepend  = prepend;
+  def.filetype = fileType;
+  def.entry    = entryname;
+  return createShaderModule(def);
+}
+
+bool ShaderModuleManager::areShaderModulesValid()
+{
+  bool valid = true;
+  for(size_t i = 0; i < m_shadermodules.size(); i++)
+  {
+    valid = valid && isValid(i);
+  }
+  return valid;
+}
+
+void ShaderModuleManager::deleteShaderModules()
+{
+  for(size_t i = 0; i < m_shadermodules.size(); i++)
+  {
+    destroyShaderModule((ShaderModuleID)i);
+  }
+  m_shadermodules.clear();
+}
+
+void ShaderModuleManager::reloadModule(ShaderModuleID idx)
+{
+  if(!isValid(idx))
+    return;
+
+  ShaderModule& module = getShaderModule(idx);
+
+  bool old         = m_preprocessOnly;
+  m_preprocessOnly = module.module == PREPROCESS_ONLY_MODULE;
+  if(module.module && module.module != PREPROCESS_ONLY_MODULE)
+  {
+    vkDestroyShaderModule(m_device, module.module, nullptr);
+    module.module = nullptr;
+  }
+  if(module.definition.type != 0)
+  {
+    setupShaderModule(module);
+  }
+
+  m_preprocessOnly = old;
+}
+
+void ShaderModuleManager::reloadShaderModules()
+{
+  LOGI("Reloading programs...\n");
+
+  for(size_t i = 0; i < m_shadermodules.size(); i++)
+  {
+    reloadModule((ShaderModuleID)i);
+  }
+
+  LOGI("done\n");
+}
+
+bool ShaderModuleManager::isValid(ShaderModuleID idx) const
+{
+  return idx.isValid()
+         && ((m_shadermodules[idx].definition.type && m_shadermodules[idx].module != 0)
+             || !m_shadermodules[idx].definition.type);
+}
+
+VkShaderModule ShaderModuleManager::get(ShaderModuleID idx) const
+{
+  return m_shadermodules[idx].module;
+}
+
+ShaderModuleManager::ShaderModule& ShaderModuleManager::getShaderModule(ShaderModuleID idx)
+{
+  return m_shadermodules[idx];
+}
+
+const ShaderModuleManager::ShaderModule& ShaderModuleManager::getShaderModule(ShaderModuleID idx) const
+{
+  return m_shadermodules[idx];
+}
+
+void ShaderModuleManager::destroyShaderModule(ShaderModuleID idx)
+{
+  if(!isValid(idx))
+    return;
+
+  ShaderModule& module = getShaderModule(idx);
+
+  if(module.module && module.module != PREPROCESS_ONLY_MODULE)
+  {
+    vkDestroyShaderModule(m_device, module.module, nullptr);
+    module.module = 0;
+  }
+  module.definition = Definition();
+}
+
+const char* ShaderModuleManager::getCode(ShaderModuleID idx, size_t* len) const
+{
+  return m_shadermodules[idx].definition.content.c_str();
+}
+const size_t ShaderModuleManager::getCodeLen(ShaderModuleID idx) const
+{
+  return m_shadermodules[idx].definition.content.size();
+}
+
+
+bool ShaderModuleManager::dumpSPIRV(ShaderModuleID idx, const char* filename) const
+{
+  if(m_shadermodules[idx].moduleSPIRV.empty())
+    return false;
+
+  FILE* f = fopen(filename, "wb");
+  if(f)
+  {
+    fwrite(m_shadermodules[idx].moduleSPIRV.data(), m_shadermodules[idx].moduleSPIRV.size(), 1, f);
+    fclose(f);
+    return true;
+  }
+
+  return false;
+}
+
+bool ShaderModuleManager::getSPIRV(ShaderModuleID idx, size_t* pLen, const uint32_t** pCode) const
+{
+  if(m_shadermodules[idx].moduleSPIRV.empty())
+    return false;
+
+  *pLen  = m_shadermodules[idx].moduleSPIRV.size();
+  *pCode = reinterpret_cast<const uint32_t*>(m_shadermodules[idx].moduleSPIRV.data());
+  return true;
+}
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/shadermodulemanager_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/shadermodulemanager_vk.hpp
@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#ifndef NV_SHADERMODULEMANAGER_INCLUDED
+#define NV_SHADERMODULEMANAGER_INCLUDED
+
+#include <mutex>
+#include <stdio.h>
+#include <string>
+#include <vector>
+#include <vulkan/vulkan_core.h>
+
+#if NVP_SUPPORTS_SHADERC
+#define NV_EXTENSIONS
+#include <shaderc/shaderc.h>
+#undef NV_EXTENSIONS
+#endif
+
+#include <nvh/shaderfilemanager.hpp>
+
+
+namespace nvvk {
+
+//////////////////////////////////////////////////////////////////////////
+/** @DOC_START
+  # class nvvk::ShaderModuleManager
+
+  The nvvk::ShaderModuleManager manages VkShaderModules stored in files (SPIR-V or GLSL)
+
+  Using ShaderFileManager it will find the files and resolve #include for GLSL.
+  You must add include directories to the base-class for this.
+
+  It also comes with some convenience functions to reload shaders etc.
+  That is why we pass out the ShaderModuleID rather than a VkShaderModule directly.
+
+  To change the compilation behavior manipulate the public member variables
+  prior createShaderModule.
+
+  m_filetype is crucial for this. You can pass raw spir-v files or GLSL.
+  If GLSL is used, shaderc must be used as well (which must be added via
+  _add_package_ShaderC() in CMake of the project)
+
+  Example:
+
+  ```cpp
+  ShaderModuleManager mgr(myDevice);
+
+  // derived from ShaderFileManager
+  mgr.addDirectory("spv/");
+
+  // all shaders get this injected after #version statement
+  mgr.m_prepend = "#define USE_NOISE 1\n";
+
+  vid = mgr.createShaderModule(VK_SHADER_STAGE_VERTEX_BIT,   "object.vert.glsl");
+  fid = mgr.createShaderModule(VK_SHADER_STAGE_FRAGMENT_BIT, "object.frag.glsl");
+
+  // ... later use module
+  info.module = mgr.get(vid);
+  ```
+@DOC_END */
+
+class ShaderModuleID
+{
+public:
+  size_t m_value;
+
+  ShaderModuleID()
+      : m_value(size_t(~0))
+  {
+  }
+
+  ShaderModuleID(size_t b)
+      : m_value(b)
+  {
+  }
+  ShaderModuleID& operator=(size_t b)
+  {
+    m_value = b;
+    return *this;
+  }
+
+  bool isValid() const { return m_value != size_t(~0); }
+
+  operator bool() const { return isValid(); }
+  operator size_t() const { return m_value; }
+
+  friend bool operator==(const ShaderModuleID& lhs, const ShaderModuleID& rhs) { return rhs.m_value == lhs.m_value; }
+};
+
+class ShaderModuleManager : public nvh::ShaderFileManager
+{
+public:
+  struct ShaderModule
+  {
+    ShaderModule()
+        : module(0)
+    {
+    }
+
+    VkShaderModule module;
+    std::string    moduleSPIRV;
+    Definition     definition;
+  };
+
+  void init(VkDevice device, int apiMajor = 1, int apiMinor = 1);
+
+  // also calls deleteShaderModules
+  void deinit();
+
+  ShaderModuleID createShaderModule(uint32_t           type,
+                                    std::string const& filename,
+                                    std::string const& prepend   = "",
+                                    FileType           fileType  = FILETYPE_DEFAULT,
+                                    std::string const& entryname = "main");
+
+  void destroyShaderModule(ShaderModuleID idx);
+  void reloadModule(ShaderModuleID idx);
+
+  void reloadShaderModules();
+  void deleteShaderModules();
+  bool areShaderModulesValid();
+
+#if NVP_SUPPORTS_SHADERC
+  void setOptimizationLevel(shaderc_optimization_level level) { m_shadercOptimizationLevel = level; }
+#endif
+
+
+  bool                isValid(ShaderModuleID idx) const;
+  VkShaderModule      get(ShaderModuleID idx) const;
+  ShaderModule&       getShaderModule(ShaderModuleID idx);
+  const ShaderModule& getShaderModule(ShaderModuleID idx) const;
+  const char*         getCode(ShaderModuleID idx, size_t* len = NULL) const;
+  const size_t        getCodeLen(ShaderModuleID idx) const;
+  bool                dumpSPIRV(ShaderModuleID idx, const char* filename) const;
+  bool                getSPIRV(ShaderModuleID idx, size_t* pLen, const uint32_t** pCode) const;
+
+
+  // state will affect the next created shader module
+  // also keep m_filetype in mind!
+  bool m_preprocessOnly  = false;
+  bool m_keepModuleSPIRV = false;
+
+  //////////////////////////////////////////////////////////////////////////
+  //
+  // for internal development, useful when we have new shader types that
+  // are not covered by public VulkanSDK
+
+  struct SetupInterface
+  {
+    // This class is to aid using a shaderc library version that is not
+    // provided by the Vulkan SDK, but custom. Therefore it allows custom settings etc.
+    // Useful for driver development of new shader stages, otherwise can be pretty much ignored.
+
+    virtual std::string getTypeDefine(uint32_t type) const      = 0;
+    virtual uint32_t    getTypeShadercKind(uint32_t type) const = 0;
+    virtual void*       getShadercCompileOption(void* shadercCompiler) { return nullptr; }
+  };
+
+  void setSetupIF(SetupInterface* setupIF);
+
+
+  ShaderModuleManager(ShaderModuleManager const&)            = delete;
+  ShaderModuleManager& operator=(ShaderModuleManager const&) = delete;
+
+  // Constructors reference-count the shared shaderc compiler, and
+  // disable ShaderFileManager's homemade #include mechanism iff we're
+  // using shaderc.
+#if NVP_SUPPORTS_SHADERC
+  static constexpr bool s_handleIncludePasting = false;
+#else
+  static constexpr bool s_handleIncludePasting = true;
+#endif
+
+  ShaderModuleManager(VkDevice device = nullptr)
+      : ShaderFileManager(s_handleIncludePasting)
+  {
+    m_usedSetupIF             = &m_defaultSetupIF;
+    m_supportsExtendedInclude = true;
+
+    if(device)
+      init(device);
+  }
+
+  ~ShaderModuleManager() { deinit(); }
+
+  // Shaderc has its own interface for handling include files that I
+  // have to subclass; this needs access to protected
+  // ShaderFileManager functions.
+  friend class ShadercIncludeBridge;
+
+private:
+  ShaderModuleID createShaderModule(const Definition& def);
+  bool           setupShaderModule(ShaderModule& prog);
+
+
+  struct DefaultInterface : public SetupInterface
+  {
+    std::string getTypeDefine(uint32_t type) const override;
+    uint32_t    getTypeShadercKind(uint32_t type) const override;
+  };
+
+
+  static const VkShaderModule PREPROCESS_ONLY_MODULE;
+
+  VkDevice         m_device = nullptr;
+  DefaultInterface m_defaultSetupIF;
+  SetupInterface*  m_usedSetupIF = nullptr;
+
+  int m_apiMajor = 1;
+  int m_apiMinor = 1;
+
+#if NVP_SUPPORTS_SHADERC
+  static uint32_t            s_shadercCompilerUsers;
+  static shaderc_compiler_t  s_shadercCompiler;  // Lock mutex below while using.
+  static std::mutex          s_shadercCompilerMutex;
+  shaderc_compile_options_t  m_shadercOptions           = nullptr;
+  shaderc_optimization_level m_shadercOptimizationLevel = shaderc_optimization_level_performance;
+#endif
+
+  std::vector<ShaderModule> m_shadermodules;
+};
+
+}  // namespace nvvk
+
+
+#endif  //NV_PROGRAM_INCLUDED
--- a/raytracer/nvpro_core/nvvk/shaders_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/shaders_vk.hpp
@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#pragma once
+
+#include <assert.h>
+#include <string>
+#include <vector>
+#include <vulkan/vulkan_core.h>
+
+namespace nvvk {
+/** @DOC_START
+# functions in nvvk
+
+- createShaderModule : create the shader module from various binary code inputs
+- createShaderStageInfo: create the shader module and setup the stage from the incoming binary code
+@DOC_END */
+inline VkShaderModule createShaderModule(VkDevice device, const uint32_t* binarycode, size_t sizeInBytes)
+{
+  VkShaderModuleCreateInfo createInfo = {};
+  createInfo.sType                    = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
+  createInfo.codeSize                 = sizeInBytes;
+  createInfo.pCode                    = binarycode;
+
+  VkShaderModule shaderModule = VK_NULL_HANDLE;
+  if(vkCreateShaderModule(device, &createInfo, nullptr, &shaderModule) != VK_SUCCESS)
+  {
+    assert(0 && "failed to create shader module!");
+  }
+
+  return shaderModule;
+}
+
+inline VkShaderModule createShaderModule(VkDevice device, const char* binarycode, size_t numInt32)
+{
+  return createShaderModule(device, (const uint32_t*)binarycode, numInt32 * 4);
+}
+
+inline VkShaderModule createShaderModule(VkDevice device, const std::vector<char>& code)
+{
+  return createShaderModule(device, (const uint32_t*)code.data(), code.size());
+}
+
+inline VkShaderModule createShaderModule(VkDevice device, const std::vector<uint8_t>& code)
+{
+  return createShaderModule(device, (const uint32_t*)code.data(), code.size());
+}
+
+
+inline VkShaderModule createShaderModule(VkDevice device, const std::vector<uint32_t>& code)
+{
+  return createShaderModule(device, code.data(), 4 * code.size());
+}
+
+inline VkShaderModule createShaderModule(VkDevice device, const std::string& code)
+{
+  return createShaderModule(device, (const uint32_t*)code.data(), code.size());
+}
+
+template <typename T>
+inline VkPipelineShaderStageCreateInfo createShaderStageInfo(VkDevice              device,
+                                                             const std::vector<T>& code,
+                                                             VkShaderStageFlagBits stage,
+                                                             const char*           entryPoint = "main")
+{
+  VkPipelineShaderStageCreateInfo shaderStage{VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO};
+  shaderStage.stage  = stage;
+  shaderStage.module = createShaderModule(device, code);
+  shaderStage.pName  = entryPoint;
+  return shaderStage;
+}
+
+inline VkPipelineShaderStageCreateInfo createShaderStageInfo(VkDevice              device,
+                                                             const std::string&    code,
+                                                             VkShaderStageFlagBits stage,
+                                                             const char*           entryPoint = "main")
+{
+  VkPipelineShaderStageCreateInfo shaderStage{VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO};
+  shaderStage.stage  = stage;
+  shaderStage.module = createShaderModule(device, code);
+  shaderStage.pName  = entryPoint;
+  return shaderStage;
+}
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/sparse_image_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/sparse_image_vk.cpp
@ -0,0 +1,455 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include "sparse_image_vk.hpp"
+
+// Compute the number of pages of size `granularity` would be required to represent a texture of size `extent`
+static inline glm::uvec3 alignedDivision(const VkExtent3D& extent, const VkExtent3D& granularity)
+{
+  glm::uvec3 res;
+  if(granularity.width == 0 || granularity.height == 0 || granularity.depth == 0)
+  {
+    LOGE("alignedDivision: invalid granularity\n");
+    assert(false);
+    return glm::uvec3(0u);
+  }
+  res.x = (extent.width + granularity.width - 1) / granularity.width;
+  res.y = (extent.height + granularity.height - 1) / granularity.height;
+  res.z = (extent.depth + granularity.depth - 1) / granularity.depth;
+  return res;
+}
+
+// Set the pointers for the VkBindSparseInfo stored in `image` prior to
+// calling vkQueueBindSparse
+void applySparseMemoryBinds(nvvk::SparseImage& image)
+{
+  image.bindSparseInfo = VkBindSparseInfo{VK_STRUCTURE_TYPE_BIND_SPARSE_INFO};
+
+  // Sparse Image memory binds
+  image.imageMemoryBindInfo.image     = image.getWorkImage();
+  image.imageMemoryBindInfo.bindCount = static_cast<uint32_t>(image.sparseImageMemoryBinds.size());
+  image.imageMemoryBindInfo.pBinds    = image.sparseImageMemoryBinds.data();
+  image.bindSparseInfo.imageBindCount = ((image.imageMemoryBindInfo.bindCount > 0) ? 1 : 0);
+  image.bindSparseInfo.pImageBinds    = &image.imageMemoryBindInfo;
+
+  // Opaque image memory binds (mip tail)
+  image.opaqueMemoryBindInfo.image          = image.getWorkImage();
+  image.opaqueMemoryBindInfo.bindCount      = static_cast<uint32_t>(image.opaqueMemoryBinds.size());
+  image.opaqueMemoryBindInfo.pBinds         = image.opaqueMemoryBinds.data();
+  image.bindSparseInfo.imageOpaqueBindCount = ((image.opaqueMemoryBindInfo.bindCount > 0) ? 1 : 0);
+  image.bindSparseInfo.pImageOpaqueBinds    = &image.opaqueMemoryBindInfo;
+}
+
+// Add mip tail information to the image, return the requested memory requirements for the mip tail
+VkMemoryRequirements nvvk::SparseImage::addMipTail(VkMemoryRequirements             generalMemoryReqs,
+                                                   VkSparseImageMemoryRequirements& sparseMemoryReq,
+                                                   uint32_t                         layer /*= 0*/)
+{
+  // Compute the size of the required mip tail allocation
+  VkMemoryRequirements memReqs = generalMemoryReqs;
+  memReqs.size                 = sparseMemoryReq.imageMipTailSize;
+
+  // Add an `opaque` memory bind representing the mip tail
+  VkSparseMemoryBind sparseMemoryBind{sparseMemoryReq.imageMipTailOffset + layer * sparseMemoryReq.imageMipTailStride,
+                                      sparseMemoryReq.imageMipTailSize, VK_NULL_HANDLE};
+  opaqueMemoryBinds.push_back(sparseMemoryBind);
+  // Return the memory requirements for that mip tail
+  return memReqs;
+}
+
+// Compute and store the number of pages contained in each sparse mip level
+void nvvk::SparseImage::computeMipPageCounts()
+{
+  uint32_t dimensionCount = 1;
+  if(size.height > 1)
+  {
+    dimensionCount++;
+  }
+  if(size.depth > 1)
+  {
+    dimensionCount++;
+  }
+
+  // Since the finest mip level has index 0, the number
+  // of sparse levels is equal to the index of the beginning of the
+  // mip tail
+  uint32_t sparseMipLevels = mipTailStart;
+  sparseMipStartIndices.resize(sparseMipLevels);
+  sparseMipPageCounts.resize(sparseMipLevels);
+
+  // Compute the page count at the coarsest sparse level (just above the mip tail)
+  // For each dimension we compare the resolution of the mip level with the page granularity and
+  // keep the highest ration. This is particularly necessary for 3D textures, where the depth
+  // granularity is typically lower than the width and height granularities
+  uint32_t pageCountAtCoarsestLevel = (size.width >> (sparseMipLevels - 1)) / imageGranularity.width;
+  pageCountAtCoarsestLevel =
+      std::max(pageCountAtCoarsestLevel,
+               pageCountAtCoarsestLevel * ((size.height >> (sparseMipLevels - 1)) / imageGranularity.height));
+  pageCountAtCoarsestLevel =
+      std::max(pageCountAtCoarsestLevel,
+               pageCountAtCoarsestLevel * (size.depth >> (sparseMipLevels - 1)) / imageGranularity.depth);
+
+
+  // When going from level n+1 to level n each dimension will
+  // be divided by 2, hence each page at level n+1 will be represented
+  // by 2^dimensionCount children at level n
+  uint32_t childCount = 1 << dimensionCount;
+
+
+  // The indices of the pages start from the coarsest level, so the
+  // first page of that level will have index 0, and the pages of the
+  // finest level will have the highest indices
+  uint32_t finalIndex          = 0;
+  uint32_t currentPagesInLevel = pageCountAtCoarsestLevel;
+  uint32_t currentMipLevel     = sparseMipLevels - 1;
+
+  sparseMipTotalPageCount = 0u;
+  // Iterate from coarsest to finest level, accumulating the
+  // page counts for each level
+  for(uint32_t i = 0; i < sparseMipLevels; i++, currentMipLevel--)
+  {
+    sparseMipStartIndices[currentMipLevel] = finalIndex;
+    sparseMipPageCounts[currentMipLevel]   = currentPagesInLevel;
+    finalIndex += currentPagesInLevel;
+    currentPagesInLevel *= childCount;
+  }
+  sparseMipTotalPageCount = finalIndex;
+}
+
+// Create the sparse image, return the memory requirements for the mip tail(s)
+std::vector<VkMemoryRequirements> nvvk::SparseImage::create(VkDevice                                      device,
+                                                            const std::array<VkImage, s_sparseImageCount> imageDesc,
+                                                            uint32_t                                      mipLevels,
+                                                            uint32_t                                      arrayLayers,
+                                                            const VkExtent3D&                             extent)
+{
+  if(mipLevels > NVVK_SPARSE_IMAGE_MAX_MIP_LEVELS)
+  {
+    LOGE("SparseImage::create: invalid mip level count\n");
+    assert(false);
+    return {};
+  }
+
+  std::vector<VkMemoryRequirements> mipTailRequirements;
+
+  // Create the image descriptor
+  size.width  = extent.width;
+  size.height = extent.height;
+  size.depth  = extent.depth;
+  images      = imageDesc;
+
+
+  mipLevelCount = mipLevels;
+  layerCount    = arrayLayers;
+
+  // Get memory requirements for later allocations
+  vkGetImageMemoryRequirements(device, images[0], &memoryReqs);
+
+  // Get sparse memory requirements
+  std::vector<VkSparseImageMemoryRequirements> sparseMemoryReqs;
+  uint32_t                                     reqCount = 0u;
+  vkGetImageSparseMemoryRequirements(device, images[0], &reqCount, nullptr);
+
+  if(reqCount == 0u)
+  {
+    LOGE("No sparse image memory requirements available\n");
+    return {};
+  }
+
+  sparseMemoryReqs.resize(reqCount);
+  vkGetImageSparseMemoryRequirements(device, images[0], &reqCount, sparseMemoryReqs.data());
+
+
+  // Select the memory requirements with the smallest granularity to avoid wasting memory
+  uint32_t                        minGranularity = NVVK_SPARSE_IMAGE_INVALID_INDEX;
+  VkSparseImageMemoryRequirements sparseReqs     = {};
+  for(const auto& reqs : sparseMemoryReqs)
+  {
+    uint32_t granularity = reqs.formatProperties.imageGranularity.width * reqs.formatProperties.imageGranularity.height
+                           * reqs.formatProperties.imageGranularity.depth;
+    if(granularity < minGranularity)
+    {
+      minGranularity = granularity;
+      sparseReqs     = reqs;
+    }
+  }
+
+  // sparseMemoryReq.imageMipTailFirstLod is the first mip level stored inside the mip tail
+  mipTailStart = sparseReqs.imageMipTailFirstLod;
+
+  // Get sparse image memory requirements for the color aspect
+  VkSparseImageMemoryRequirements sparseMemoryReq;
+  bool                            colorAspectFound = false;
+  for(const auto& reqs : sparseMemoryReqs)
+  {
+    if((reqs.formatProperties.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0)
+    {
+      sparseMemoryReq  = reqs;
+      colorAspectFound = true;
+      break;
+    }
+  }
+  if(!colorAspectFound)
+  {
+    LOGE("Could not find sparse image memory requirements with color aspect bit");
+    return {};
+  }
+
+  // Check whether a mip tail is necessary
+  bool hasMipTail = (sparseMemoryReq.imageMipTailFirstLod < mipLevels);
+  // Check if the format has a single mip tail for all layers or one mip tail for each layer
+  // The mip tail contains all mip levels >= sparseMemoryReq.imageMipTailFirstLod
+  bool singleMipTail = ((sparseMemoryReq.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) != 0);
+
+
+  imageGranularity = sparseMemoryReq.formatProperties.imageGranularity;
+
+  // Prepare the data structure holding all the virtual pages for the sparse texture
+
+  // Sparse bindings for each mip level of each layer, excepting the mip levels of the mip tail
+  for(uint32_t layer = 0; layer < arrayLayers; layer++)
+  {
+    // If the format has one mip tail per layer, allocate each of them on the device
+    if((!singleMipTail) && hasMipTail)
+    {
+      mipTailRequirements.push_back(addMipTail(memoryReqs, sparseMemoryReq, layer));
+    }
+  }
+
+
+  // If the format has a single mip tail for all layers, allocate it on the device
+  if(singleMipTail && hasMipTail)
+  {
+    mipTailRequirements.push_back(addMipTail(memoryReqs, sparseMemoryReq));
+  }
+
+  // Compute the page indices for each mip level
+  computeMipPageCounts();
+
+  return mipTailRequirements;
+}
+
+// Bind device memory to the mip tail(s)
+void nvvk::SparseImage::bindMipTailMemory(std::vector<std::pair<VkDeviceMemory, VkDeviceSize>> mipTailMemory)
+{
+  if(mipTailMemory.size() != opaqueMemoryBinds.size())
+  {
+    LOGE("Mip tail allocations count must match the number of mip tails in the sparse texture\n");
+    return;
+  }
+  for(size_t i = 0; i < mipTailMemory.size(); i++)
+  {
+    opaqueMemoryBinds[i].memory       = mipTailMemory[i].first;
+    opaqueMemoryBinds[i].memoryOffset = mipTailMemory[i].second;
+  }
+}
+
+// Unbind device memory from the mip tail(s)
+void nvvk::SparseImage::unbindMipTailMemory()
+{
+  for(size_t i = 0; i < opaqueMemoryBinds.size(); i++)
+  {
+    opaqueMemoryBinds[i].resourceOffset = {};
+    opaqueMemoryBinds[i].memory         = {};
+    opaqueMemoryBinds[i].memoryOffset   = {};
+  }
+}
+
+// Update the contents of sparseImageMemoryBinds based on the vector of updated page indices and
+// set the pointers in the VkBindSparseInfo
+// Call before sparse binding to update memory bind list etc.
+// No synchronization is added to the VkBindSparseInfo object, the application
+// is responsible for adding the proper semaphore before calling vkQueueBindSparse
+void nvvk::SparseImage::updateSparseBindInfo(const std::vector<uint32_t>& updatedPageIndices, uint32_t layer /*= 0*/)
+{
+  // Build the list of added/removed sparse image memory binds
+  sparseImageMemoryBinds.resize(updatedPageIndices.size());
+  uint32_t index = 0;
+
+  for(auto pageIndex : updatedPageIndices)
+  {
+    PageId id = {layer, pageIndex};
+    auto   it = allocatedPages.find(id);
+    // If the page actually exists in the image and is not flagged for deletion,
+    // add it to the bindings
+    if(it != allocatedPages.end() && (it->second.allocationFlags & SparseImagePage::eMarkedForDeletion) == 0)
+    {
+      const auto& page              = it->second;
+      sparseImageMemoryBinds[index] = page.imageMemoryBind;
+      index++;
+    }
+    else
+    {
+      // Otherwise the page has been deleted, and the sparse texture bindings
+      // are updated by binding VK_NULL_HANDLE memory to the page
+      SparseImagePage page          = createPageInfo(pageIndex, layer);
+      sparseImageMemoryBinds[index] = page.imageMemoryBind;
+      index++;
+    }
+  }
+  // Set the pointers before calling vkQueueBindSparse
+  applySparseMemoryBinds(*this);
+}
+
+// Set the pointers in the VkBindSparseInfo using the contents of sparseImageMemoryBinds
+// No synchronization is added to the VkBindSparseInfo object, the application
+// is responsible for adding the proper semaphore before calling vkQueueBindSparse
+void nvvk::SparseImage::updateSparseBindInfo()
+{
+  // Set the pointers before calling vkQueueBindSparse
+  applySparseMemoryBinds(*this);
+}
+
+// Get the index of the beginning of a mip level in the page list
+uint32_t nvvk::SparseImage::mipStartIndex(uint32_t mipLevel)
+{
+  return sparseMipStartIndices[mipLevel];
+}
+
+// Compute the indices of the children of a page, representing the same area of the image at a finer mip level
+std::vector<uint32_t> nvvk::SparseImage::pageChildIndices(const SparseImagePage& p)
+{
+  std::vector<uint32_t> res(p.extent.depth <= 1 ? 4 : 8, NVVK_SPARSE_IMAGE_INVALID_INDEX);
+
+  if(p.mipLevel == 0)
+  {
+    return res;
+  }
+
+  if(p.extent.width == 0u || p.extent.height == 0u || p.extent.depth == 0u)
+  {
+    LOGE("pageChildIndices: Invalid page extent");
+    assert(false);
+    return res;
+  }
+
+  // Get the index from which the pages of the next mip level
+  // are defined, and sanity check the result
+  uint32_t mipStart = mipStartIndex(p.mipLevel - 1);
+  if(mipStart == NVVK_SPARSE_IMAGE_INVALID_INDEX)
+  {
+    LOGE("pageChildIndices: Invalid mip start index");
+    assert(false);
+    return res;
+  }
+
+  // Compute the size of the child mip level in texels, defined by originalSize/(2^level)
+  glm::uvec3 mipSize(std::max(size.width >> (p.mipLevel - 1), 1u), std::max(size.height >> (p.mipLevel - 1), 1u),
+                     std::max(size.depth >> (p.mipLevel - 1), 1u));
+  // Compute the location of the beginning of the child list in the next mip level, where each dimension contains
+  // twice as many pages as the parent level
+  glm::uvec3 location(2 * p.offset.x / p.extent.width, 2 * p.offset.y / p.extent.height, 2 * p.offset.z / p.extent.depth);
+
+
+  uint32_t pageWidth  = p.extent.width;
+  uint32_t pageHeight = p.extent.height;
+  uint32_t pageDepth  = std::max(1u, p.extent.depth);
+
+  // Number of pages along one row (X) of the texture, and within one slice (X*Y) of the texture
+  uint32_t pagesPerRow   = (mipSize.x / pageWidth);
+  uint32_t pagesPerSlice = (mipSize.x * mipSize.y) / (pageWidth * pageHeight);
+
+  // Build and return the child list
+  for(uint32_t z = 0; z < (pageDepth > 1 ? 2u : 1u); z++)
+  {
+    for(uint32_t y = 0; y < (pageHeight > 1 ? 2u : 1u); y++)
+    {
+      for(uint32_t x = 0; x < 2; x++)
+      {
+        res[x + 2 * (y + 2 * z)] = (location.z + z) * pagesPerSlice + mipStart + location.x + x + (location.y + y) * pagesPerRow;
+      }
+    }
+  }
+  return res;
+}
+
+// Create the page information from its page index and layer
+nvvk::SparseImagePage nvvk::SparseImage::createPageInfo(uint32_t pageIndex, uint32_t layer)
+{
+  uint32_t dimensionCount = 1;
+  if(size.height != 0)
+    dimensionCount++;
+  if(size.depth != 0)
+    dimensionCount++;
+
+  std::vector<uint32_t>& startIndices = sparseMipStartIndices;
+
+  // Find the mip level from the global page index by comparing the
+  // start indices of the mip levels with the page index
+  // There are at most 32 levels (including the mip tail)
+  // so a linear search is fast enough
+  uint32_t mipLevel = 0;
+  for(size_t i = 0; i < startIndices.size(); i++)
+  {
+    size_t currentMipLevel = startIndices.size() - i - 1;
+    if(pageIndex >= startIndices[currentMipLevel] && (currentMipLevel == 0 || pageIndex < startIndices[currentMipLevel - 1]))
+    {
+      mipLevel = uint32_t(currentMipLevel);
+      break;
+    }
+  }
+  // Get the local index of the page within its mip level
+  uint32_t indexInMip = pageIndex - startIndices[mipLevel];
+
+  // Resolution of the mip level, defined by the sparse image total size / 2^level
+  VkExtent3D mipResolution{std::max(size.width >> mipLevel, 1u), std::max(size.height >> mipLevel, 1u),
+                           std::max(size.depth >> mipLevel, 1u)};
+
+  // Compute the number of pages required in each dimension for the mip level
+  glm::uvec3 sparseBindCounts = alignedDivision(mipResolution, imageGranularity);
+
+  // Compute the page index in each dimension and deduce the offset of the page
+  // in texels based on the page granularity
+  uint32_t   x = indexInMip % sparseBindCounts.x;
+  uint32_t   y = (indexInMip / sparseBindCounts.x) % sparseBindCounts.y;
+  uint32_t   z = indexInMip / (sparseBindCounts.x * sparseBindCounts.y);
+  VkOffset3D offset{int32_t(x * imageGranularity.width), int32_t(y * imageGranularity.height),
+                    int32_t(z * imageGranularity.depth)};
+
+  // Compute the size of the last page on each dimension in the case the image has non-power-of-two dimension
+  glm::uvec3 lastBlockExtent;
+  lastBlockExtent.x = (mipResolution.width % imageGranularity.width) ? mipResolution.width % imageGranularity.width :
+                                                                       imageGranularity.width;
+  lastBlockExtent.y = (mipResolution.height % imageGranularity.height) ? mipResolution.height % imageGranularity.height :
+                                                                         imageGranularity.height;
+  lastBlockExtent.z = (mipResolution.depth % imageGranularity.depth) ? mipResolution.depth % imageGranularity.depth :
+                                                                       imageGranularity.depth;
+
+
+  // Size of the page, including the nonuniform size on the edges of the image
+  VkExtent3D pageSize{(x == sparseBindCounts.x - 1) ? lastBlockExtent.x : imageGranularity.width,
+                      (y == sparseBindCounts.y - 1) ? lastBlockExtent.y : imageGranularity.height,
+                      (z == sparseBindCounts.z - 1) ? lastBlockExtent.z : imageGranularity.depth};
+
+  // Set and return the page information, with empty memory allocation
+  VkImageSubresource subresource{VK_IMAGE_ASPECT_COLOR_BIT, mipLevel, layer};
+
+  SparseImagePage newPage{};
+  newPage.offset                      = offset;
+  newPage.extent                      = pageSize;
+  newPage.size                        = memoryReqs.alignment;
+  newPage.mipLevel                    = mipLevel;
+  newPage.layer                       = layer;
+  newPage.imageMemoryBind.offset      = offset;
+  newPage.imageMemoryBind.extent      = pageSize;
+  newPage.imageMemoryBind.subresource = subresource;
+  newPage.index                       = pageIndex;
+  newPage.allocationFlags             = SparseImagePage::eNone;
+  newPage.timeStamp                   = ~0u;
+  return newPage;
+}
--- a/raytracer/nvpro_core/nvvk/sparse_image_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/sparse_image_vk.hpp
@ -0,0 +1,287 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <array>
+#include <algorithm>
+#include <vector>
+#include <unordered_map>
+#include <vulkan/vulkan_core.h>
+
+#include <glm/glm.hpp>
+#include "nvvk/memorymanagement_vk.hpp"
+#include "nvh/nvprint.hpp"
+#include "nvh/container_utils.hpp"
+
+
+// Mip level indexing relies on 32-bit unsigned integers
+#define NVVK_SPARSE_IMAGE_MAX_MIP_LEVELS 32u
+
+// Special error value used to catch indexing issues
+#define NVVK_SPARSE_IMAGE_INVALID_INDEX (~0u)
+namespace nvvk {
+
+/** @DOC_START
+# struct nvvk::SparseImagePage
+>  Virtual texture page as a part of the partially resident texture. Contains memory bindings, offsets and status information.
+
+The virtual texture page is a part of the sparse texture, and contains information about its memory binding, location in the texture, and status information.
+@DOC_END  */
+struct SparseImagePage
+{
+  // Allocation flags to keep track of the next action
+  // to take on the page memory
+  enum AllocationFlagBits
+  {
+    // No action, keep the page in memory
+    eNone = 0,
+    // The page will have to be discarded once
+    // no image references it
+    eMarkedForDeletion = 1
+  };
+  typedef uint32_t AllocationFlags;
+
+  // Offset in the mip level of the sparse texture, in texels
+  VkOffset3D offset{};
+  // Page extent, in texels
+  VkExtent3D extent{};
+  // Sparse image memory bind for this page
+  VkSparseImageMemoryBind imageMemoryBind{};
+  // Size in bytes of the page
+  VkDeviceSize size{};
+  // Mip level of the page
+  uint32_t mipLevel{NVVK_SPARSE_IMAGE_INVALID_INDEX};
+  // Layer the page belongs to
+  uint32_t layer{NVVK_SPARSE_IMAGE_INVALID_INDEX};
+
+  nvvk::MemHandle allocation;
+
+  // Index of the page based on its location in the sparse texture
+  // index = mipStartIndex + location.x + pageCount.x*(location.y + pageCount.z*location.z)
+  // where mipStartIndex is the index of the first page of the mip level,
+  // location is the 3D index of the page in the mip, and pageCount is the number of pages
+  // of the mip in each dimension
+  uint32_t index{0};
+
+  // Application-managed timestamp, typically used for cache management
+  uint32_t timeStamp{~0u};
+
+  // Allocation flags for the page, either eNone for a page that is currently in use,
+  // or eMarkedForDeletion, for pages that will be destroyed as soon as the sparse image
+  // binding stops referencing them
+  AllocationFlags allocationFlags{eNone};
+
+  // Create the host-side data for the virtual page
+  inline void bindDeviceMemory(VkDeviceMemory mem, VkDeviceSize memOffset)
+  {
+    imageMemoryBind.memoryOffset = memOffset;
+    imageMemoryBind.memory       = mem;
+  }
+
+  inline bool hasBoundMemory() const { return imageMemoryBind.memory != VkDeviceMemory(); }
+};
+
+
+/** @DOC_START
+# struct nvvk::SparseImage
+>  Sparse image object, containing the virtual texture pages and memory bindings.
+
+The sparse image object contains the virtual texture pages and memory bindings, as well as the memory requirements for the mip tail and the image granularity.
+@DOC_END */
+struct SparseImage
+{
+  // Number of VkImages referencing the sparse memory bindings
+  // This allows updating the bindings of one image while
+  // rendering with the other in another thread
+  static const size_t s_sparseImageCount{2};
+
+  // Texture image handles (see above)
+  std::array<VkImage, s_sparseImageCount> images;
+  // Index of the image that can be used for rendering
+  uint32_t currentImage{0u};
+
+
+  // Opaque memory bindings for the mip tail
+  std::vector<VkSparseMemoryBind> opaqueMemoryBinds;
+  // Memory allocation for the mip tail. This memory is allocated
+  // upon creating the sparse image, and will remain allocated
+  // even after a flush call
+  std::vector<nvvk::MemHandle> mipTailAllocations;
+
+  // Memory properties for the sparse texture allocations
+  VkMemoryPropertyFlags memoryProperties{};
+
+  // Sparse queue binding information
+  VkBindSparseInfo bindSparseInfo{};
+
+  // Memory bindings for virtual addressing
+  std::vector<VkSparseImageMemoryBind> sparseImageMemoryBinds;
+
+  // Page identifier, defined by its layer and its page index, which
+  // is defined as mipStartIndex + location.x + pageCount.x*(location.y + pageCount.z*location.z)
+  // where mipStartIndex is the index of the first page of the mip level,
+  // location is the 3D index of the page in the mip, and pageCount is the number of pages
+  // of the mip in each dimension
+  struct PageId
+  {
+    uint32_t layer{};
+    uint32_t page{};
+  };
+
+
+  // Storage for the currently allocated pages
+  std::unordered_map<PageId, SparseImagePage, nvh::HashAligned32<PageId>, nvh::EqualMem<PageId>> allocatedPages;
+
+
+  // Binding information for sparse texture pages
+  VkSparseImageMemoryBindInfo imageMemoryBindInfo{};
+  // Binding information for the mip tail
+  VkSparseImageOpaqueMemoryBindInfo opaqueMemoryBindInfo{};
+
+  // First mip level in mip tail
+  uint32_t mipTailStart{NVVK_SPARSE_IMAGE_INVALID_INDEX};
+
+  // Total sparse texture resolution
+  VkExtent3D size{};
+  // Number of possible mip levels of the image
+  uint32_t mipLevelCount{NVVK_SPARSE_IMAGE_INVALID_INDEX};
+
+  // Number of layers
+  uint32_t layerCount{NVVK_SPARSE_IMAGE_INVALID_INDEX};
+
+  // Memory requirements for page and mip tail allocations
+  VkMemoryRequirements memoryReqs{};
+
+  // Granularity of the image, representing the extent of the pages
+  VkExtent3D imageGranularity{0u, 0u, 0u};
+
+  // Get the number of pages currently allocated on the device
+  size_t getAllocatedPageCount() const { return allocatedPages.size(); }
+
+  // Get the image handle for rendering
+  VkImage getCurrentImage() { return images[currentImage]; }
+  // Get the image handle for update work
+  VkImage getWorkImage() { return images[(currentImage + 1) % s_sparseImageCount]; }
+  // Swap the current and work images
+  void nextImage()
+  {
+    currentImage              = (currentImage + 1) % s_sparseImageCount;
+    imageMemoryBindInfo.image = getWorkImage();
+  }
+
+  // Add mip tail information to the image, return the requested memory requirements for the mip tail
+  VkMemoryRequirements addMipTail(VkMemoryRequirements             generalMemoryReqs,
+                                  VkSparseImageMemoryRequirements& sparseMemoryReq,
+                                  uint32_t                         layer = 0u);
+
+  // Compute and store the number of pages contained in each mip level
+  void computeMipPageCounts();
+  // Create the sparse image, return the memory requirements for the mip tail(s)
+  std::vector<VkMemoryRequirements> create(VkDevice                                      device,
+                                           const std::array<VkImage, s_sparseImageCount> imageDesc,
+                                           uint32_t                                      mipLevels,
+                                           uint32_t                                      arrayLayers,
+                                           const VkExtent3D&                             extent);
+  // Bind device memory to the mip tail(s)
+  void bindMipTailMemory(std::vector<std::pair<VkDeviceMemory, VkDeviceSize>> mipTailMemory);
+
+  // Unbind device memory from the mip tail(s)
+  void unbindMipTailMemory();
+  // Add a page to the sparse image
+  void addPage(VkImageSubresource subresource, VkOffset3D offset, VkExtent3D extent, const VkDeviceSize size, const uint32_t mipLevel, uint32_t layer);
+
+
+  // Update the contents of sparseImageMemoryBinds based on the vector of updated page indices and
+  // set the pointers in the VkBindSparseInfo
+  // Call before sparse binding to update memory bind list etc.
+  // No synchronization is added to the VkBindSparseInfo object, the application
+  // is responsible for adding the proper semaphore before calling vkQueueBindSparse
+  void updateSparseBindInfo(const std::vector<uint32_t>& updatedPageIndices, uint32_t layer = 0);
+
+  // Set the pointers in the VkBindSparseInfo using the contents of sparseImageMemoryBinds
+  // No synchronization is added to the VkBindSparseInfo object, the application
+  // is responsible for adding the proper semaphore before calling vkQueueBindSparse
+  void updateSparseBindInfo();
+
+  // Get the index of the beginning of a mip level in the page list
+  uint32_t mipStartIndex(uint32_t mipLevel);
+
+  // Compute the index of a page within a mip level in the page list
+  inline uint32_t indexInMip(const SparseImagePage& p)
+  {
+    glm::uvec3 mipSize(std::max(size.width >> p.mipLevel, 1u), std::max(size.height >> p.mipLevel, 1u),
+                       std::max(size.depth >> p.mipLevel, 1u));
+    glm::uvec3 location(p.offset.x / mipSize.x, p.offset.y / mipSize.y, p.offset.z / mipSize.z);
+    uint32_t   pageWidth  = p.extent.width;
+    uint32_t   pageHeight = std::max(1u, p.extent.height);
+    if(pageWidth == 0 || pageHeight == 0)
+    {
+      LOGE("indexInMip: Invalid page dimensions");
+      assert(false);
+      return NVVK_SPARSE_IMAGE_INVALID_INDEX;
+    }
+    uint32_t index = location.x + (mipSize.x / pageWidth) * (location.y + location.z * (mipSize.y / pageHeight));
+    return index;
+  }
+
+  // Compute the index of a page in the page list
+  inline uint32_t pageIndex(const SparseImagePage& p)
+  {
+    uint32_t index = indexInMip(p);
+    return pageIndex(p.mipLevel, index);
+  }
+
+  // Compute the index of a page in the page list based on its mip level and index within
+  // that mip level
+  inline uint32_t pageIndex(uint32_t mipLevel, uint32_t indexInMip)
+  {
+    uint32_t mipStart = mipStartIndex(mipLevel);
+    if(mipStart == NVVK_SPARSE_IMAGE_INVALID_INDEX)
+    {
+      LOGE("pageIndex: invalid mip level");
+      assert(false);
+      return NVVK_SPARSE_IMAGE_INVALID_INDEX;
+    }
+
+    if(indexInMip == NVVK_SPARSE_IMAGE_INVALID_INDEX)
+    {
+      LOGE("pageIndex: cannot find page index in mip level");
+      assert(false);
+      return NVVK_SPARSE_IMAGE_INVALID_INDEX;
+    }
+    return mipStart + indexInMip;
+  }
+
+
+  // Compute the indices of the children of a page, representing the same area of the image at a finer mip level
+  std::vector<uint32_t> pageChildIndices(const SparseImagePage& p);
+
+  // Create the page information from its page index and layer
+  SparseImagePage createPageInfo(uint32_t pageIndex, uint32_t layer);
+
+
+private:
+  // Start index of each mip level
+  std::vector<uint32_t> sparseMipStartIndices;
+  // Number of pages in each mip level
+  std::vector<uint32_t> sparseMipPageCounts;
+  // Total page count for the sparse image
+  uint32_t sparseMipTotalPageCount{};
+};
+
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/specialization.hpp
+++ b/raytracer/nvpro_core/nvvk/specialization.hpp
@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+#include <vector>
+#include "vulkan/vulkan_core.h"
+
+namespace nvvk {
+
+/** @DOC_START
+# class nvvk::Specialization
+>  Helper to generate specialization info
+
+  Examples:
+  ```cpp
+    nvvk::Specialization specialization;
+    specialization.add(0, 5); // Adding value 5 to constant_id=0
+    VkPipelineShaderStageCreateInfo info;
+    ...
+    info.pSpecializationInfo = specialization.getSpecialization();
+    createPipeline();
+  ```
+
+ Note: this is adding information in a vector, therefor add all values
+       before calling getSpecialization(). Construct the pipeline before
+       specialization get out of scope, or pointer getting invalidated
+       by adding new values or clearing the vector of data.
+@DOC_END */
+class Specialization
+{
+public:
+  void add(uint32_t constantID, int32_t value)
+  {
+    m_specValues.push_back(value);
+    VkSpecializationMapEntry entry;
+    entry.constantID = constantID;
+    entry.size       = sizeof(int32_t);
+    entry.offset     = static_cast<uint32_t>(m_specEntries.size() * sizeof(int32_t));
+    m_specEntries.emplace_back(entry);
+  }
+
+  void add(const std::vector<std::pair<uint32_t, int32_t>>& const_values)
+  {
+    for(const auto& v : const_values)
+    {
+      add(v.first, v.second);
+    }
+  }
+
+  VkSpecializationInfo* getSpecialization()
+  {
+    m_specInfo.dataSize      = static_cast<uint32_t>(m_specValues.size() * sizeof(int32_t));
+    m_specInfo.pData         = m_specValues.data();
+    m_specInfo.mapEntryCount = static_cast<uint32_t>(m_specEntries.size());
+    m_specInfo.pMapEntries   = m_specEntries.data();
+    return &m_specInfo;
+  }
+
+  void clear()
+  {
+    m_specValues.clear();
+    m_specEntries.clear();
+    m_specInfo = {};
+  }
+
+private:
+  std::vector<int32_t>                  m_specValues;
+  std::vector<VkSpecializationMapEntry> m_specEntries;
+  VkSpecializationInfo                  m_specInfo{};
+};
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/stagingmemorymanager_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/stagingmemorymanager_vk.cpp
@ -0,0 +1,304 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <nvvk/stagingmemorymanager_vk.hpp>
+
+#include <nvh/nvprint.hpp>
+#include <nvvk/debug_util_vk.hpp>
+#include <nvvk/error_vk.hpp>
+
+namespace nvvk {
+
+void StagingMemoryManager::init(MemAllocator* memAllocator, VkDeviceSize stagingBlockSize /*= 64 * 1024 * 1024*/)
+{
+  assert(!m_device);
+  m_device = memAllocator->getDevice();
+
+  m_subToDevice.init(memAllocator, stagingBlockSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+                     VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, true);
+  m_subFromDevice.init(memAllocator, stagingBlockSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+                       VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+                       true);
+
+  m_freeStagingIndex = INVALID_ID_INDEX;
+  m_stagingIndex     = newStagingIndex();
+
+  setFreeUnusedOnRelease(true);
+}
+
+void StagingMemoryManager::deinit()
+{
+  if(!m_device)
+    return;
+
+  free(false);
+
+  m_subFromDevice.deinit();
+  m_subToDevice.deinit();
+
+  m_sets.clear();
+  m_device = VK_NULL_HANDLE;
+}
+
+bool StagingMemoryManager::fitsInAllocated(VkDeviceSize size, bool toDevice /*= true*/) const
+{
+  return toDevice ? m_subToDevice.fitsInAllocated(size) : m_subFromDevice.fitsInAllocated(size);
+}
+
+void* StagingMemoryManager::cmdToImage(VkCommandBuffer                 cmd,
+                                       VkImage                         image,
+                                       const VkOffset3D&               offset,
+                                       const VkExtent3D&               extent,
+                                       const VkImageSubresourceLayers& subresource,
+                                       VkDeviceSize                    size,
+                                       const void*                     data,
+                                       VkImageLayout                   layout)
+{
+  if(!image)
+    return nullptr;
+
+  VkBuffer     srcBuffer;
+  VkDeviceSize srcOffset;
+
+  void* mapping = getStagingSpace(size, srcBuffer, srcOffset, true);
+
+  assert(mapping);
+
+  if(data)
+  {
+    memcpy(mapping, data, size);
+  }
+
+  VkBufferImageCopy cpy;
+  cpy.bufferOffset      = srcOffset;
+  cpy.bufferRowLength   = 0;
+  cpy.bufferImageHeight = 0;
+  cpy.imageSubresource  = subresource;
+  cpy.imageOffset       = offset;
+  cpy.imageExtent       = extent;
+
+  vkCmdCopyBufferToImage(cmd, srcBuffer, image, layout, 1, &cpy);
+
+  return data ? nullptr : mapping;
+}
+
+void* StagingMemoryManager::cmdToBuffer(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, const void* data)
+{
+  if(!size || !buffer)
+  {
+    return nullptr;
+  }
+
+  VkBuffer     srcBuffer;
+  VkDeviceSize srcOffset;
+
+  void* mapping = getStagingSpace(size, srcBuffer, srcOffset, true);
+
+  assert(mapping);
+
+  if(data)
+  {
+    memcpy(mapping, data, size);
+  }
+
+  VkBufferCopy cpy;
+  cpy.size      = size;
+  cpy.srcOffset = srcOffset;
+  cpy.dstOffset = offset;
+
+  vkCmdCopyBuffer(cmd, srcBuffer, buffer, 1, &cpy);
+
+  return data ? nullptr : (void*)mapping;
+}
+
+const void* StagingMemoryManager::cmdFromBuffer(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size)
+{
+  VkBuffer     dstBuffer;
+  VkDeviceSize dstOffset;
+  void*        mapping = getStagingSpace(size, dstBuffer, dstOffset, false);
+
+  VkBufferCopy cpy;
+  cpy.size      = size;
+  cpy.srcOffset = offset;
+  cpy.dstOffset = dstOffset;
+
+  vkCmdCopyBuffer(cmd, buffer, dstBuffer, 1, &cpy);
+
+  return mapping;
+}
+
+const void* StagingMemoryManager::cmdFromImage(VkCommandBuffer                 cmd,
+                                               VkImage                         image,
+                                               const VkOffset3D&               offset,
+                                               const VkExtent3D&               extent,
+                                               const VkImageSubresourceLayers& subresource,
+                                               VkDeviceSize                    size,
+                                               VkImageLayout                   layout)
+{
+  VkBuffer     dstBuffer;
+  VkDeviceSize dstOffset;
+  void*        mapping = getStagingSpace(size, dstBuffer, dstOffset, false);
+
+  VkBufferImageCopy cpy;
+  cpy.bufferOffset      = dstOffset;
+  cpy.bufferRowLength   = 0;
+  cpy.bufferImageHeight = 0;
+  cpy.imageSubresource  = subresource;
+  cpy.imageOffset       = offset;
+  cpy.imageExtent       = extent;
+
+  vkCmdCopyImageToBuffer(cmd, image, layout, dstBuffer, 1, &cpy);
+
+  return mapping;
+}
+
+void StagingMemoryManager::finalizeResources(VkFence fence)
+{
+  if(m_sets[m_stagingIndex].entries.empty())
+    return;
+
+  m_sets[m_stagingIndex].fence     = fence;
+  m_sets[m_stagingIndex].manualSet = false;
+  m_stagingIndex                   = newStagingIndex();
+}
+
+StagingMemoryManager::SetID StagingMemoryManager::finalizeResourceSet()
+{
+  SetID setID;
+
+  if(m_sets[m_stagingIndex].entries.empty())
+    return setID;
+
+  setID.index = m_stagingIndex;
+
+  m_sets[m_stagingIndex].fence     = nullptr;
+  m_sets[m_stagingIndex].manualSet = true;
+  m_stagingIndex                   = newStagingIndex();
+
+  return setID;
+}
+
+void* StagingMemoryManager::getStagingSpace(VkDeviceSize size, VkBuffer& buffer, VkDeviceSize& offset, bool toDevice)
+{
+  assert(m_sets[m_stagingIndex].index == m_stagingIndex && "illegal index, did you forget finalizeResources");
+
+  BufferSubAllocator::Handle handle = toDevice ? m_subToDevice.subAllocate(size) : m_subFromDevice.subAllocate(size);
+  assert(handle);
+
+  BufferSubAllocator::Binding info = toDevice ? m_subToDevice.getSubBinding(handle) : m_subFromDevice.getSubBinding(handle);
+  buffer = info.buffer;
+  offset = info.offset;
+
+  // append used space to current staging set list
+  m_sets[m_stagingIndex].entries.push_back({handle, toDevice});
+
+  return toDevice ? m_subToDevice.getSubMapping(handle) : m_subFromDevice.getSubMapping(handle);
+}
+
+void StagingMemoryManager::releaseResources(uint32_t stagingID)
+{
+  if(stagingID == INVALID_ID_INDEX)
+    return;
+
+  StagingSet& set = m_sets[stagingID];
+  assert(set.index == stagingID);
+
+  // free used allocation ranges
+  for(auto& itentry : set.entries)
+  {
+    if(itentry.toDevice)
+    {
+      m_subToDevice.subFree(itentry.handle);
+    }
+    else
+    {
+      m_subFromDevice.subFree(itentry.handle);
+    }
+  }
+  set.entries.clear();
+
+  // update the set.index with the current head of the free list
+  // pop its old value
+  m_freeStagingIndex = setIndexValue(set.index, m_freeStagingIndex);
+}
+
+void StagingMemoryManager::releaseResources()
+{
+  for(auto& itset : m_sets)
+  {
+    if(!itset.entries.empty() && !itset.manualSet && (!itset.fence || vkGetFenceStatus(m_device, itset.fence) == VK_SUCCESS))
+    {
+      releaseResources(itset.index);
+      itset.fence     = NULL;
+      itset.manualSet = false;
+    }
+  }
+  // special case for ease of use if there is only one
+  if(m_stagingIndex == 0 && m_freeStagingIndex == 0)
+  {
+    m_freeStagingIndex = setIndexValue(m_sets[0].index, 0);
+  }
+}
+
+
+float StagingMemoryManager::getUtilization(VkDeviceSize& allocatedSize, VkDeviceSize& usedSize) const
+{
+  VkDeviceSize aSize = 0;
+  VkDeviceSize uSize = 0;
+  m_subFromDevice.getUtilization(aSize, uSize);
+
+  allocatedSize = aSize;
+  usedSize      = uSize;
+  m_subToDevice.getUtilization(aSize, uSize);
+  allocatedSize += aSize;
+  usedSize += uSize;
+
+  return float(double(usedSize) / double(allocatedSize));
+}
+
+void StagingMemoryManager::free(bool unusedOnly)
+{
+  m_subToDevice.free(unusedOnly);
+  m_subFromDevice.free(unusedOnly);
+}
+
+uint32_t StagingMemoryManager::newStagingIndex()
+{
+  // find free slot
+  if(m_freeStagingIndex != INVALID_ID_INDEX)
+  {
+    uint32_t newIndex = m_freeStagingIndex;
+    // this updates the free link-list
+    m_freeStagingIndex = setIndexValue(m_sets[newIndex].index, newIndex);
+    assert(m_sets[newIndex].index == newIndex);
+    return m_sets[newIndex].index;
+  }
+
+  // otherwise push to end
+  uint32_t newIndex = (uint32_t)m_sets.size();
+
+  StagingSet info;
+  info.index = newIndex;
+  m_sets.push_back(info);
+
+  assert(m_sets[newIndex].index == newIndex);
+  return newIndex;
+}
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/stagingmemorymanager_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/stagingmemorymanager_vk.hpp
@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2019-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include <vulkan/vulkan_core.h>
+#include "buffersuballocator_vk.hpp"
+
+namespace nvvk {
+
+#define NVVK_DEFAULT_STAGING_BLOCKSIZE (VkDeviceSize(64) * 1024 * 1024)
+
+//////////////////////////////////////////////////////////////////
+/** @DOC_START
+  # class nvvk::StagingMemoryManager
+
+  nvvk::StagingMemoryManager class is a utility that manages host visible
+  buffers and their allocations in an opaque fashion to assist
+  asynchronous transfers between device and host.
+  The memory for this is allocated using the provided 
+  [nvvk::MemAllocator](#class-nvvkmemallocator).
+
+  The collection of the transfer resources is represented by nvvk::StagingID.
+
+  The necessary buffer space is sub-allocated and recycled by using one
+  [nvvk::BufferSubAllocator](#class-nvvkbuffersuballocator) per transfer direction (to or from device).
+
+  > **WARNING:**
+  > - cannot manage a copy > 4 GB
+
+  Usage:
+  - Enqueue transfers into your VkCommandBuffer and then finalize the copy operations.
+  - Associate the copy operations with a VkFence or retrieve a SetID
+  - The release of the resources allows to safely recycle the buffer space for future transfers.
+  
+  > We use fences as a way to garbage collect here, however a more robust solution
+  > may be implementing some sort of ticketing/timeline system.
+  > If a fence is recycled, then this class may not be aware that the fence represents a different
+  > submission, likewise if the fence is deleted elsewhere problems can occur.
+  > You may want to use the manual "SetID" system in that case.
+
+  Example :
+
+  ```cpp
+  StagingMemoryManager  staging;
+  staging.init(memAllocator);
+
+
+  // Enqueue copy operations of data to target buffer.
+  // This internally manages the required staging resources
+  staging.cmdToBuffer(cmd, targetBufer, 0, targetSize, targetData);
+
+  // you can also get access to a temporary mapped pointer and fill
+  // the staging buffer directly
+  vertices = staging.cmdToBufferT<Vertex>(cmd, targetBufer, 0, targetSize);
+
+  // OPTION A:
+  // associate all previous copy operations with a fence (or not)
+  staging.finalizeResources( fence );
+  ..
+  // every once in a while call
+  staging.releaseResources();
+  // this will release all those without fence, or those
+  // who had a fence that completed (but never manual SetIDs, see next).
+
+  // OPTION B
+  // alternatively manage the resource release yourself.
+  // The SetID represents the staging resources
+  // since any last finalize.
+  sid = staging.finalizeResourceSet();
+
+  ... 
+  // You need to ensure these transfers and their staging
+  // data access completed yourself prior releasing the set.
+  //
+  // This is particularly useful for managing downloads from
+  // device. The "from" functions return a pointer  where the 
+  // data will be copied to. You want to use this pointer
+  // after the device-side transfer completed, and then
+  // release its resources once you are done using it.
+
+  staging.releaseResourceSet(sid);
+
+  ```
+@DOC_END */
+
+class StagingMemoryManager
+{
+public:
+  static const uint32_t INVALID_ID_INDEX = ~0;
+
+  //////////////////////////////////////////////////////////////////////////
+  class SetID
+  {
+    friend StagingMemoryManager;
+
+  private:
+    uint32_t index = INVALID_ID_INDEX;
+  };
+
+  StagingMemoryManager(StagingMemoryManager const&)            = delete;
+  StagingMemoryManager& operator=(StagingMemoryManager const&) = delete;
+
+  StagingMemoryManager() { m_debugName = "nvvk::StagingMemManager:" + std::to_string((uint64_t)this); }
+  StagingMemoryManager(MemAllocator* memAllocator, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE)
+  {
+    init(memAllocator, stagingBlockSize);
+  }
+
+  virtual ~StagingMemoryManager() { deinit(); }
+
+  void init(MemAllocator* memAllocator, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+  void deinit();
+
+  void setDebugName(const std::string& name) { m_debugName = name; }
+
+  // if true (default) we free the memory completely when released
+  // otherwise we would keep blocks for re-use around, unless freeUnused() is called
+  void setFreeUnusedOnRelease(bool state)
+  {
+    m_subToDevice.setKeepLastBlockOnFree(!state);
+    m_subFromDevice.setKeepLastBlockOnFree(!state);
+  }
+
+  // test if there is enough space in current allocations
+  bool fitsInAllocated(VkDeviceSize size, bool toDevice = true) const;
+
+  // if data != nullptr memcpies to mapping and returns nullptr
+  // otherwise returns temporary mapping (valid until "complete" functions)
+  void* cmdToImage(VkCommandBuffer                 cmd,
+                   VkImage                         image,
+                   const VkOffset3D&               offset,
+                   const VkExtent3D&               extent,
+                   const VkImageSubresourceLayers& subresource,
+                   VkDeviceSize                    size,
+                   const void*                     data,
+                   VkImageLayout                   layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+
+  template <class T>
+  T* cmdToImageT(VkCommandBuffer                 cmd,
+                 VkImage                         image,
+                 const VkOffset3D&               offset,
+                 const VkExtent3D&               extent,
+                 const VkImageSubresourceLayers& subresource,
+                 VkDeviceSize                    size,
+                 const void*                     data,
+                 VkImageLayout                   layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
+  {
+    return (T*)cmdToImage(cmd, image, offset, extent, subresource, size, data, layout);
+  }
+
+  // pointer can be used after cmd execution but only valid until associated resources haven't been released
+  const void* cmdFromImage(VkCommandBuffer                 cmd,
+                           VkImage                         image,
+                           const VkOffset3D&               offset,
+                           const VkExtent3D&               extent,
+                           const VkImageSubresourceLayers& subresource,
+                           VkDeviceSize                    size,
+                           VkImageLayout                   layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+
+  template <class T>
+  const T* cmdFromImageT(VkCommandBuffer                 cmd,
+                         VkImage                         image,
+                         const VkOffset3D&               offset,
+                         const VkExtent3D&               extent,
+                         const VkImageSubresourceLayers& subresource,
+                         VkDeviceSize                    size,
+                         VkImageLayout                   layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
+  {
+    return (const T*)cmdFromImage(cmd, image, offset, extent, subresource, size, layout);
+  }
+
+  // if data != nullptr memcpies to mapping and returns nullptr
+  // otherwise returns temporary mapping (valid until appropriate release)
+  void* cmdToBuffer(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, const void* data);
+
+  template <class T>
+  T* cmdToBufferT(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size)
+  {
+    return (T*)cmdToBuffer(cmd, buffer, offset, size, nullptr);
+  }
+
+  // pointer can be used after cmd execution but only valid until associated resources haven't been released
+  const void* cmdFromBuffer(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size);
+
+  template <class T>
+  const T* cmdFromBufferT(VkCommandBuffer cmd, VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size)
+  {
+    return (const T*)cmdFromBuffer(cmd, buffer, offset, size);
+  }
+
+  // closes the batch of staging resources since last finalize call
+  // and associates it with a fence for later release.
+  void finalizeResources(VkFence fence = VK_NULL_HANDLE);
+
+  // releases the staging resources whose fences have completed
+  // and those who had no fence at all, skips resourceSets.
+  void releaseResources();
+
+  // closes the batch of staging resources since last finalize call
+  // and returns a resource set handle that can be used to release them
+  SetID finalizeResourceSet();
+
+  // releases the staging resources from this particular
+  // resource set.
+  void releaseResourceSet(SetID setid) { releaseResources(setid.index); }
+
+  // frees staging memory no longer in use
+  void freeUnused() { free(true); }
+
+  float getUtilization(VkDeviceSize& allocatedSize, VkDeviceSize& usedSize) const;
+
+protected:
+  // The implementation uses two major arrays:
+  // - Block stores VkBuffers that we sub-allocate the staging space from
+  // - StagingSet stores all such sub-allocations that were used
+  //   in one batch of operations. Each batch is closed with
+  //   finalizeResources, and typically associated with a fence.
+  //   As such the resources are given by for recycling if the fence completed.
+
+  // To recycle StagingSet structures within the arrays
+  // we use a linked list of array indices. The "index" element
+  // in the struct refers to the next free list item, or itself
+  // when in use.
+
+  struct Entry
+  {
+    BufferSubAllocator::Handle handle;
+    bool                       toDevice;
+  };
+
+  struct StagingSet
+  {
+    uint32_t           index     = INVALID_ID_INDEX;
+    VkFence            fence     = VK_NULL_HANDLE;
+    bool               manualSet = false;
+    std::vector<Entry> entries;
+  };
+
+  VkDevice m_device = VK_NULL_HANDLE;
+
+  BufferSubAllocator m_subToDevice;
+  BufferSubAllocator m_subFromDevice;
+
+  std::vector<StagingSet> m_sets;
+
+  // active staging Index, must be valid at all items
+  uint32_t m_stagingIndex;
+  // linked-list to next free staging set
+  uint32_t m_freeStagingIndex;
+
+  std::string m_debugName;
+
+  uint32_t setIndexValue(uint32_t& index, uint32_t newValue)
+  {
+    uint32_t oldValue = index;
+    index             = newValue;
+    return oldValue;
+  }
+
+  void free(bool unusedOnly);
+
+  uint32_t newStagingIndex();
+
+  void* getStagingSpace(VkDeviceSize size, VkBuffer& buffer, VkDeviceSize& offset, bool toDevice);
+
+  void releaseResources(uint32_t stagingID);
+};
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/structs_vk.bat
+++ b/raytracer/nvpro_core/nvvk/structs_vk.bat
@ -0,0 +1,2 @@
+..\..\nvpro_internal\luajit\win_x64\luajit.exe structs_vk.lua
+pause
--- a/raytracer/nvpro_core/nvvk/structs_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/structs_vk.hpp
--- a/raytracer/nvpro_core/nvvk/structs_vk.lua
+++ b/raytracer/nvpro_core/nvvk/structs_vk.lua
@ -0,0 +1,350 @@
+local header = 
+[[
+/*
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2018-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+//////////////////////////////////////////////////////////////////////////
+/**
+  # function nvvk::make, nvvk::clear
+  Contains templated `nvvk::make<T>` and `nvvk::clear<T>` functions that are 
+  auto-generated by `structs.lua`. The functions provide default 
+  structs for the Vulkan C api by initializing the `VkStructureType sType`
+  field (also for nested structs) and clearing the rest to zero.
+
+  ``` c++
+  auto compCreateInfo = nvvk::make<VkComputePipelineCreateInfo>;
+  ```
+*/
+ 
+#pragma once
+]]
+
+-- HOW TO USE
+--
+-- 1. Setup environment variable NVVK_VULKAN_XML pointing to vk.xml
+--    or use VULKAN_SDK >= 1.2.135.0
+--
+-- 2. Modify the extension subset (allowlist)
+--
+-- 3. Check out this and the other structs_vk files for write access
+--
+-- 4. Run with a lua5.1 compatible lua runtime and the lua2xml project
+--    https://github.com/manoelcampos/xml2lua
+--    (shared_internal has all the files).
+--
+--    lua structs_vk.lua
+--
+--    within this directory.
+
+local VULKAN_XML = os.getenv("NVVK_VULKAN_XML") or os.getenv("VULKAN_SDK").."/share/vulkan/registry/vk.xml"
+local extensionSubset = [[
+    VK_KHR_acceleration_structure
+    VK_KHR_ray_query
+    VK_KHR_ray_tracing_pipeline
+    VK_KHR_push_descriptor
+    VK_KHR_8bit_storage
+    VK_KHR_create_renderpass2
+    VK_KHR_depth_stencil_resolve
+    VK_KHR_draw_indirect_count
+    VK_KHR_driver_properties
+    VK_KHR_pipeline_executable_properties
+    
+    VK_NV_compute_shader_derivatives
+    VK_NV_cooperative_matrix
+    VK_NV_corner_sampled_image
+    VK_NV_coverage_reduction_mode
+    VK_NV_dedicated_allocation_image_aliasing
+    VK_NV_mesh_shader
+    VK_NV_ray_tracing
+    VK_NV_representative_fragment_test
+    VK_NV_shading_rate_image
+    VK_NV_viewport_array2
+    VK_NV_viewport_swizzle
+    VK_NV_scissor_exclusive
+    VK_NV_device_generated_commands
+    
+    VK_EXT_buffer_device_address
+    VK_EXT_debug_marker
+    VK_EXT_calibrated_timestamps
+    VK_EXT_conservative_rasterization
+    VK_EXT_descriptor_indexing
+    VK_EXT_depth_clip_enable
+    VK_EXT_memory_budget
+    VK_EXT_memory_priority
+    VK_EXT_pci_bus_info
+    VK_EXT_sample_locations
+    VK_EXT_sampler_filter_minmax
+    VK_EXT_texel_buffer_alignment
+    VK_EXT_debug_utils
+    VK_EXT_host_query_reset
+    
+    VK_KHR_external_memory_win32
+    VK_KHR_external_semaphore_win32
+    VK_KHR_external_fence_win32
+
+    VK_KHR_external_memory_fd
+    VK_KHR_external_semaphore_fd
+    
+    VK_EXT_validation_features
+    VK_KHR_swapchain
+    ]]
+    
+
+local function generate(outfilename, header, whitelist)
+  
+  local override = {
+    VkRayTracingShaderGroupCreateInfoNV = 
+[[
+  template<> inline VkRayTracingShaderGroupCreateInfoNV make<VkRayTracingShaderGroupCreateInfoNV>(){
+    VkRayTracingShaderGroupCreateInfoNV ret = {VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV};
+    ret.generalShader = VK_SHADER_UNUSED_NV;
+    ret.closestHitShader = VK_SHADER_UNUSED_NV;
+    ret.anyHitShader = VK_SHADER_UNUSED_NV;
+    ret.intersectionShader = VK_SHADER_UNUSED_NV;
+    return ret;
+  }
+]],
+    VkRayTracingShaderGroupCreateInfoKHR = 
+[[
+  template<> inline VkRayTracingShaderGroupCreateInfoKHR make<VkRayTracingShaderGroupCreateInfoKHR>(){
+    VkRayTracingShaderGroupCreateInfoKHR ret = {VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR};
+    ret.generalShader = VK_SHADER_UNUSED_KHR;
+    ret.closestHitShader = VK_SHADER_UNUSED_KHR;
+    ret.anyHitShader = VK_SHADER_UNUSED_KHR;
+    ret.intersectionShader = VK_SHADER_UNUSED_KHR;
+    return ret;
+  }
+]],
+}
+
+  local function toTab(str)
+    local tab = {}
+    for name in str:gmatch("[%w_]+") do 
+      tab[name] = true
+    end
+    return tab
+  end
+  
+  local whitelist = whitelist and toTab(whitelist)
+  
+  local xml2lua = require("xml2lua")
+  local handler = require("xmlhandler.tree")
+  local filename = VULKAN_XML
+  local f = io.open(filename,"rt")
+  assert(f, filename.." not found")
+  
+  local xml = f:read("*a")
+  f:close()
+  
+  -- Bug workaround https://github.com/manoelcampos/xml2lua/issues/35
+  xml = xml:gsub("(<member>)(<type>[%w_]+</type>)%* ", function(p,typ)
+        -- add _ dummy symbol
+        return "<member>_"..typ.."* "
+      end)
+    
+  local parser = xml2lua.parser(handler)
+  parser:parse(xml)
+  
+  local version = xml:match("VK_HEADER_VERSION</name> (%d+)")
+  assert(version)
+  
+  local structenums = {}
+  local structextensions = {}
+  
+  local function enumID(name)
+    name = name:lower()
+    name = name:gsub("_","")
+    return name
+  end
+  
+  for name in xml:gmatch('"VK_STRUCTURE_TYPE_([%w_]-)"') do
+    structenums[enumID(name)] = "VK_STRUCTURE_TYPE_"..name
+  end
+  
+  xml = nil 
+
+  local types   = handler.root.registry.types
+  local commands   = handler.root.registry.commands
+  local extensions = handler.root.registry.extensions.extension
+  
+  -- debugging
+  if (false) then
+    local serpent = require "serpent"
+    local f = io.open(filename..".types.lua", "wt")
+    f:write(serpent.block(types))
+    local f = io.open(filename..".exts.lua", "wt")
+    f:write(serpent.block(extensions))
+  end
+  
+  -- build list struct types with structure type init
+  local lktypes = {}
+  local lkall = {}
+  local lkcore = {}
+  for _,v in ipairs(types.type) do
+    if (v._attr.category == "struct") then
+      local alias = v._attr.alias
+      local name  = v._attr.name
+      if (alias) then
+        lktypes[name] = lktypes[alias]
+      else
+        local members = type(v.member[1]) == "table" and v.member or {v.member}
+        local tab = {name=name, members=members} 
+        if (members[1].type == "VkStructureType") then
+          lktypes[name] = tab
+          lkcore[name] = true
+        end
+        lkall[name] = tab
+      end
+    end
+  end
+  
+  
+  local platforms = {
+    ggp = "VK_USE_PLATFORM_GGP",
+    win32 = "VK_USE_PLATFORM_WIN32_KHR",
+    vi = "VK_USE_PLATFORM_VI_NN",
+    ios = "VK_USE_PLATFORM_IOS_MVK",
+    macos = "VK_USE_PLATFORM_MACOS_MVK",
+    android = "VK_USE_PLATFORM_ANDROID_KHR",
+    fuchsia = "VK_USE_PLATFORM_FUCHSIA",
+    metal = "VK_USE_PLATFORM_METAL_EXT",
+    xlib = "VK_USE_PLATFORM_XLIB_KHR",
+    xcb = "VK_USE_PLATFORM_XCB_KHR",
+    wayland = "VK_USE_PLATFORM_WAYLAND_KHR",
+    xlib_xrandr = "VK_USE_PLATFORM_XLIB_XRANDR_EXT",
+  }
+  
+  -- fill extension list
+  local extLists = {}
+  
+  for _,v in ipairs(extensions) do
+    if (v.require) then
+      local reqs = v.require[1] and v.require or {v.require}
+      local list = {}
+      local valid = false
+      for _,r in ipairs(reqs) do
+        if (r.type) then
+          local types = r.type[1] and r.type or {r.type}
+          for _,t in ipairs(types) do
+            local tname = t._attr.name
+            if (lktypes[tname]) then
+              lkcore[tname] = false
+              table.insert(list, tname)
+              valid = true
+            end
+          end
+        end
+      end
+      if (valid and ((whitelist and whitelist[v._attr.name]) or not whitelist))  then
+        table.insert(extLists, {list=list, ext=v._attr.name, platform=platforms[v._attr.platform or "_"] })
+      end
+    end
+  end
+  
+  -- fill core list
+  local coreList = {}
+  for _,v in ipairs(types.type) do
+    if (v._attr.category == "struct" and lkcore[v._attr.name]) then
+      table.insert(coreList, v._attr.name)
+    end
+  end
+  
+  local out = ""
+  out = out.."  template <class T> T make(){ return T(); }\n"
+  out = out.."  template <class T> void clear(T& ref){ ref = make<T>(); }\n"
+  
+  local function process(t)
+    local ext = nil
+    
+    for _,sname in ipairs(t.list) do
+      local enum   = structenums[enumID(sname:match("Vk(.*)"))]
+      local struct = lktypes[sname]
+      if (enum and struct and not struct.exported) then
+        if ((not ext) and t.ext) then
+          out = out.."#if "..t.ext.."\n"
+          ext = t.ext
+        end
+        
+        local complex = ""
+        
+        local function addComplex(prefix, members)
+          for _,m in ipairs(members) do
+            local mvar    = m.name
+            local mtype   = m.type
+            local mstruct = lkall[mtype]
+            -- skip pointers
+            if (mstruct and not m[1]) then
+              local mexp     = mstruct.exported
+              local mmembers = mstruct.members
+              if (mexp == true) then
+                complex = complex..prefix..mvar.." = make<"..mtype..">();\n"
+              elseif (mexp) then
+                complex = complex..prefix..mvar.." = {"..mexp.."};\n"
+              elseif (mmembers) then
+                addComplex(prefix..mvar..".", mmembers)
+              end
+            end
+          end
+        end      
+        addComplex("    ret.", struct.members)
+        
+        
+        if (override[sname]) then
+          out = out..override[sname]
+          print("override", sname)
+          struct.exported = true
+        elseif (complex ~= "") then
+          out = out.."  template<> inline "..sname.." make<"..sname..">(){\n    "..sname.." ret = {"..enum.."};\n"..complex.."    return ret;\n  }\n"
+          print("complex", sname)
+          struct.exported = true
+        else
+          out = out.."  template<> inline "..sname.." make<"..sname..">(){\n    return "..sname.."{"..enum.."};\n  }\n"
+          struct.exported = enum
+        end
+        
+        
+      end
+    end
+   
+    if (ext) then
+      out = out.."#endif\n"
+    end
+  end
+
+  -- process core
+  process({list=coreList})
+  -- process whitelisted extensions
+  for _,ext in ipairs(extLists) do
+    process(ext)
+  end
+  
+  local outfile = io.open(outfilename, "wt")
+  assert(outfile, "could not open "..outfilename.." for writing")
+
+  outfile:write("/* based on VK_HEADER_VERSION "..version.." */\n")
+  outfile:write(header)
+  outfile:write("namespace nvvk {\n")
+  outfile:write(out)
+  outfile:write("}\n")
+  outfile:flush()
+  outfile:close()
+end
+
+generate("structs_vk.hpp", header, extensionSubset)
--- a/raytracer/nvpro_core/nvvk/swapchain_vk.cpp
+++ b/raytracer/nvpro_core/nvvk/swapchain_vk.cpp
@ -0,0 +1,468 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#include "swapchain_vk.hpp"
+#include "error_vk.hpp"
+
+#include <assert.h>
+
+#include <nvvk/debug_util_vk.hpp>
+namespace nvvk {
+bool SwapChain::init(VkDevice          device,
+                     VkPhysicalDevice  physicalDevice,
+                     VkQueue           queue,
+                     uint32_t          queueFamilyIndex,
+                     VkSurfaceKHR      surface,
+                     VkFormat          format,
+                     VkImageUsageFlags imageUsage)
+{
+  assert(!m_device);
+  m_device           = device;
+  m_physicalDevice   = physicalDevice;
+  m_swapchain        = VK_NULL_HANDLE;
+  m_queue            = queue;
+  m_queueFamilyIndex = queueFamilyIndex;
+  m_changeID         = 0;
+  m_currentSemaphore = 0;
+  m_surface          = surface;
+  m_imageUsage       = imageUsage;
+
+  VkResult result;
+
+  // Get the list of VkFormat's that are supported:
+  uint32_t formatCount;
+  result = vkGetPhysicalDeviceSurfaceFormatsKHR(m_physicalDevice, m_surface, &formatCount, nullptr);
+  assert(!result);
+
+  std::vector<VkSurfaceFormatKHR> surfFormats(formatCount);
+  result = vkGetPhysicalDeviceSurfaceFormatsKHR(m_physicalDevice, m_surface, &formatCount, surfFormats.data());
+  assert(!result);
+  // If the format list includes just one entry of VK_FORMAT_UNDEFINED,
+  // the surface has no preferred format.  Otherwise, at least one
+  // supported format will be returned.
+
+  m_surfaceFormat = VK_FORMAT_B8G8R8A8_UNORM;
+  m_surfaceColor  = surfFormats[0].colorSpace;
+
+  for(uint32_t i = 0; i < formatCount; i++)
+  {
+    if(surfFormats[i].format == format)
+    {
+      m_surfaceFormat = format;
+      m_surfaceColor  = surfFormats[i].colorSpace;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+VkExtent2D SwapChain::update(int width, int height, bool vsync)
+{
+  m_changeID++;
+
+  VkResult       err;
+  VkSwapchainKHR oldSwapchain = m_swapchain;
+
+  err = waitIdle();
+  if(nvvk::checkResult(err, __FILE__, __LINE__))
+  {
+    exit(-1);
+  }
+  // Check the surface capabilities and formats
+  VkSurfaceCapabilitiesKHR surfCapabilities;
+  err = vkGetPhysicalDeviceSurfaceCapabilitiesKHR(m_physicalDevice, m_surface, &surfCapabilities);
+  assert(!err);
+
+  uint32_t presentModeCount;
+  err = vkGetPhysicalDeviceSurfacePresentModesKHR(m_physicalDevice, m_surface, &presentModeCount, nullptr);
+  assert(!err);
+  std::vector<VkPresentModeKHR> presentModes(presentModeCount);
+  err = vkGetPhysicalDeviceSurfacePresentModesKHR(m_physicalDevice, m_surface, &presentModeCount, presentModes.data());
+  assert(!err);
+
+  VkExtent2D swapchainExtent;
+  // width and height are either both -1, or both not -1.
+  if(surfCapabilities.currentExtent.width == (uint32_t)-1)
+  {
+    // If the surface size is undefined, the size is set to
+    // the size of the images requested.
+    swapchainExtent.width  = width;
+    swapchainExtent.height = height;
+  }
+  else
+  {
+    // If the surface size is defined, the swap chain size must match
+    swapchainExtent = surfCapabilities.currentExtent;
+  }
+
+  // test against valid size, typically hit when windows are minimized, the app must
+  // prevent triggering this code accordingly
+  assert(swapchainExtent.width && swapchainExtent.height);
+
+  // everyone must support FIFO mode
+  VkPresentModeKHR swapchainPresentMode = VK_PRESENT_MODE_FIFO_KHR;
+  // no vsync try to find a faster alternative to FIFO
+  if(!vsync)
+  {
+    for(uint32_t i = 0; i < presentModeCount; i++)
+    {
+      if(presentModes[i] == VK_PRESENT_MODE_MAILBOX_KHR)
+      {
+        swapchainPresentMode = VK_PRESENT_MODE_MAILBOX_KHR;
+      }
+      if(presentModes[i] == VK_PRESENT_MODE_IMMEDIATE_KHR)
+      {
+        swapchainPresentMode = VK_PRESENT_MODE_IMMEDIATE_KHR;
+      }
+      if(swapchainPresentMode == m_preferredVsyncOffMode)
+      {
+        break;
+      }
+    }
+  }
+
+  // Determine the number of VkImage's to use in the swap chain (we desire to
+  // own only 1 image at a time, besides the images being displayed and
+  // queued for display):
+  uint32_t desiredNumberOfSwapchainImages = surfCapabilities.minImageCount + 1;
+  if((surfCapabilities.maxImageCount > 0) && (desiredNumberOfSwapchainImages > surfCapabilities.maxImageCount))
+  {
+    // Application must settle for fewer images than desired:
+    desiredNumberOfSwapchainImages = surfCapabilities.maxImageCount;
+  }
+
+  VkSurfaceTransformFlagBitsKHR preTransform;
+  if(surfCapabilities.supportedTransforms & VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR)
+  {
+    preTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
+  }
+  else
+  {
+    preTransform = surfCapabilities.currentTransform;
+  }
+
+  VkSwapchainCreateInfoKHR swapchain = {VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR};
+  swapchain.surface                  = m_surface;
+  swapchain.minImageCount            = desiredNumberOfSwapchainImages;
+  swapchain.imageFormat              = m_surfaceFormat;
+  swapchain.imageColorSpace          = m_surfaceColor;
+  swapchain.imageExtent              = swapchainExtent;
+  swapchain.imageUsage               = m_imageUsage;
+  swapchain.preTransform             = preTransform;
+  swapchain.compositeAlpha           = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
+  swapchain.imageArrayLayers         = 1;
+  swapchain.imageSharingMode         = VK_SHARING_MODE_EXCLUSIVE;
+  swapchain.queueFamilyIndexCount    = 1;
+  swapchain.pQueueFamilyIndices      = &m_queueFamilyIndex;
+  swapchain.presentMode              = swapchainPresentMode;
+  swapchain.oldSwapchain             = oldSwapchain;
+  swapchain.clipped                  = true;
+
+  err = vkCreateSwapchainKHR(m_device, &swapchain, nullptr, &m_swapchain);
+  assert(!err);
+
+  nvvk::DebugUtil debugUtil(m_device);
+
+  debugUtil.setObjectName(m_swapchain, "SwapChain::m_swapchain");
+
+  // If we just re-created an existing swapchain, we should destroy the old
+  // swapchain at this point.
+  // Note: destroying the swapchain also cleans up all its associated
+  // presentable images once the platform is done with them.
+  if(oldSwapchain != VK_NULL_HANDLE)
+  {
+    for(auto it : m_entries)
+    {
+      vkDestroyImageView(m_device, it.imageView, nullptr);
+    }
+
+    for(auto it : m_semaphores)
+    {
+      vkDestroySemaphore(m_device, it.readSemaphore, nullptr);
+      vkDestroySemaphore(m_device, it.writtenSemaphore, nullptr);
+    }
+
+    vkDestroySwapchainKHR(m_device, oldSwapchain, nullptr);
+  }
+
+  err = vkGetSwapchainImagesKHR(m_device, m_swapchain, &m_imageCount, nullptr);
+  assert(!err);
+
+
+  m_entries.resize(m_imageCount);
+  m_barriers.resize(m_imageCount);
+
+  std::vector<VkImage> images(m_imageCount);
+
+  err = vkGetSwapchainImagesKHR(m_device, m_swapchain, &m_imageCount, images.data());
+  assert(!err);
+  //
+  // Image views
+  //
+  for(uint32_t i = 0; i < m_imageCount; i++)
+  {
+    Entry& entry = m_entries[i];
+
+    // image
+    entry.image = images[i];
+
+    // imageview
+    VkImageViewCreateInfo viewCreateInfo = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+                                            nullptr,
+                                            0,
+                                            entry.image,
+                                            VK_IMAGE_VIEW_TYPE_2D,
+                                            m_surfaceFormat,
+                                            {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A},
+                                            {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}};
+
+    err = vkCreateImageView(m_device, &viewCreateInfo, nullptr, &entry.imageView);
+    assert(!err);
+
+    // initial barriers
+    VkImageSubresourceRange range = {0};
+    range.aspectMask              = VK_IMAGE_ASPECT_COLOR_BIT;
+    range.baseMipLevel            = 0;
+    range.levelCount              = VK_REMAINING_MIP_LEVELS;
+    range.baseArrayLayer          = 0;
+    range.layerCount              = VK_REMAINING_ARRAY_LAYERS;
+
+    VkImageMemoryBarrier memBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
+    memBarrier.sType                = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+    memBarrier.dstAccessMask        = 0;
+    memBarrier.srcAccessMask        = 0;
+    memBarrier.oldLayout            = VK_IMAGE_LAYOUT_UNDEFINED;
+    memBarrier.newLayout            = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
+    memBarrier.image                = entry.image;
+    memBarrier.subresourceRange     = range;
+
+    m_barriers[i] = memBarrier;
+
+    debugUtil.setObjectName(entry.image, "swapchainImage:" + std::to_string(i));
+    debugUtil.setObjectName(entry.imageView, "swapchainImageView:" + std::to_string(i));
+  }
+
+  m_semaphores.resize(getSemaphoreCycleCount());
+
+  for(uint32_t i = 0; i < getSemaphoreCycleCount(); i++)
+  {
+    SemaphoreEntry& entry = m_semaphores[i];
+    // semaphore
+    VkSemaphoreCreateInfo semCreateInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO};
+
+    err = vkCreateSemaphore(m_device, &semCreateInfo, nullptr, &entry.readSemaphore);
+    assert(!err);
+    err = vkCreateSemaphore(m_device, &semCreateInfo, nullptr, &entry.writtenSemaphore);
+    assert(!err);
+
+    debugUtil.setObjectName(entry.readSemaphore, "swapchainReadSemaphore:" + std::to_string(i));
+    debugUtil.setObjectName(entry.writtenSemaphore, "swapchainWrittenSemaphore:" + std::to_string(i));
+  }
+
+  m_updateWidth  = width;
+  m_updateHeight = height;
+  m_vsync        = vsync;
+  m_extent       = swapchainExtent;
+
+  m_currentSemaphore = 0;
+  m_currentImage     = 0;
+
+  return swapchainExtent;
+}
+
+void SwapChain::deinitResources()
+{
+  if(!m_device)
+    return;
+
+  VkResult result = waitIdle();
+  if(nvvk::checkResult(result, __FILE__, __LINE__))
+  {
+    exit(-1);
+  }
+
+  for(auto it : m_entries)
+  {
+    vkDestroyImageView(m_device, it.imageView, nullptr);
+  }
+
+  for(auto it : m_semaphores)
+  {
+    vkDestroySemaphore(m_device, it.readSemaphore, nullptr);
+    vkDestroySemaphore(m_device, it.writtenSemaphore, nullptr);
+  }
+
+  if(m_swapchain)
+  {
+    vkDestroySwapchainKHR(m_device, m_swapchain, nullptr);
+    m_swapchain = VK_NULL_HANDLE;
+  }
+
+  m_entries.clear();
+  m_barriers.clear();
+}
+
+void SwapChain::deinit()
+{
+  deinitResources();
+
+  m_physicalDevice = VK_NULL_HANDLE;
+  m_device         = VK_NULL_HANDLE;
+  m_surface        = VK_NULL_HANDLE;
+  m_changeID       = 0;
+}
+
+bool SwapChain::acquire(bool* pRecreated, SwapChainAcquireState* pOut)
+{
+  return acquireCustom(VK_NULL_HANDLE, m_updateWidth, m_updateHeight, pRecreated, pOut);
+}
+
+bool SwapChain::acquireAutoResize(int width, int height, bool* pRecreated, SwapChainAcquireState* pOut)
+{
+  return acquireCustom(VK_NULL_HANDLE, width, height, pRecreated, pOut);
+}
+
+bool SwapChain::acquireCustom(VkSemaphore argSemaphore, bool* pRecreated, SwapChainAcquireState* pOut)
+{
+  return acquireCustom(argSemaphore, m_updateWidth, m_updateHeight, pRecreated, pOut);
+}
+
+bool SwapChain::acquireCustom(VkSemaphore argSemaphore, int width, int height, bool* pRecreated, SwapChainAcquireState* pOut)
+{
+  bool didRecreate = false;
+
+  if(width != m_updateWidth || height != m_updateHeight)
+  {
+    deinitResources();
+    update(width, height);
+    m_updateWidth  = width;
+    m_updateHeight = height;
+    didRecreate    = true;
+  }
+  if(pRecreated != nullptr)
+  {
+    *pRecreated = didRecreate;
+  }
+
+  // try recreation a few times
+  for(int i = 0; i < 2; i++)
+  {
+    VkSemaphore semaphore = argSemaphore ? argSemaphore : getActiveReadSemaphore();
+    VkResult    result;
+    result = vkAcquireNextImageKHR(m_device, m_swapchain, UINT64_MAX, semaphore, (VkFence)VK_NULL_HANDLE, &m_currentImage);
+
+    if(result == VK_SUCCESS)
+    {
+      if(pOut != nullptr)
+      {
+        pOut->image     = getActiveImage();
+        pOut->view      = getActiveImageView();
+        pOut->index     = getActiveImageIndex();
+        pOut->waitSem   = getActiveReadSemaphore();
+        pOut->signalSem = getActiveWrittenSemaphore();
+      }
+      return true;
+    }
+    else if(result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR)
+    {
+      deinitResources();
+      update(width, height, m_vsync);
+    }
+    else
+    {
+      return false;
+    }
+  }
+
+  return false;
+}
+
+VkSemaphore SwapChain::getActiveWrittenSemaphore() const
+{
+  return m_semaphores[(m_currentSemaphore % getSemaphoreCycleCount())].writtenSemaphore;
+}
+
+VkSemaphore SwapChain::getActiveReadSemaphore() const
+{
+  return m_semaphores[(m_currentSemaphore % getSemaphoreCycleCount())].readSemaphore;
+}
+
+VkImage SwapChain::getActiveImage() const
+{
+  return m_entries[m_currentImage].image;
+}
+
+VkImageView SwapChain::getActiveImageView() const
+{
+  return m_entries[m_currentImage].imageView;
+}
+
+VkImage SwapChain::getImage(uint32_t i) const
+{
+  if(i >= m_imageCount)
+    return nullptr;
+  return m_entries[i].image;
+}
+
+void SwapChain::present(VkQueue queue)
+{
+  VkResult         result;
+  VkPresentInfoKHR presentInfo;
+
+  presentCustom(presentInfo);
+
+  result = vkQueuePresentKHR(queue, &presentInfo);
+  //assert(result == VK_SUCCESS); // can fail on application exit
+}
+
+void SwapChain::presentCustom(VkPresentInfoKHR& presentInfo)
+{
+  VkSemaphore& written = m_semaphores[(m_currentSemaphore % getSemaphoreCycleCount())].writtenSemaphore;
+
+  presentInfo                    = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR};
+  presentInfo.swapchainCount     = 1;
+  presentInfo.waitSemaphoreCount = 1;
+  presentInfo.pWaitSemaphores    = &written;
+  presentInfo.pSwapchains        = &m_swapchain;
+  presentInfo.pImageIndices      = &m_currentImage;
+
+  m_currentSemaphore++;
+}
+
+void SwapChain::cmdUpdateBarriers(VkCommandBuffer cmd) const
+{
+  vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
+                       nullptr, m_imageCount, m_barriers.data());
+}
+
+uint32_t SwapChain::getChangeID() const
+{
+  return m_changeID;
+}
+
+VkImageView SwapChain::getImageView(uint32_t i) const
+{
+  if(i >= m_imageCount)
+    return nullptr;
+  return m_entries[i].imageView;
+}
+
+}  // namespace nvvk
--- a/raytracer/nvpro_core/nvvk/swapchain_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/swapchain_vk.hpp
@ -0,0 +1,385 @@
+/*
+ * Copyright (c) 2014-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2021 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#ifndef NV_VK_SWAPCHAIN_INCLUDED
+#define NV_VK_SWAPCHAIN_INCLUDED
+
+
+#include <stdio.h>
+#include <string>
+#include <vector>
+#include <vulkan/vulkan_core.h>
+
+namespace nvvk {
+
+/** @DOC_START
+# class nvvk::SwapChain
+
+>  nvvk::SwapChain is a helper to handle swapchain setup and use
+
+In Vulkan, we have to use `VkSwapchainKHR` to request a swap chain
+(front and back buffers) from the operating system and manually
+synchronize our and OS's access to the images within the swap chain.
+This helper abstracts that process.
+
+For each swap chain image there is an ImageView, and one read and write
+semaphore synchronizing it (see `SwapChainAcquireState`).
+
+To start, you need to call `init`, then `update` with the window's
+initial framebuffer size (for example, use `glfwGetFramebufferSize`).
+Then, in your render loop, you need to call `acquire()` to get the
+swap chain image to draw to, draw your frame (waiting and signalling
+the appropriate semaphores), and call `present()`.
+
+Sometimes, the swap chain needs to be re-created (usually due to
+window resizes). `nvvk::SwapChain` detects this automatically and
+re-creates the swap chain for you. Every new swap chain is assigned a
+unique ID (`getChangeID()`), allowing you to detect swap chain
+re-creations. This usually triggers a `VkDeviceWaitIdle`; however, if
+this is not appropriate, see `setWaitQueue()`.
+
+Finally, there is a utility function to setup the image transitions
+from VK_IMAGE_LAYOUT_UNDEFINED to VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
+which is the format an image must be in before it is presented.
+
+Example in combination with nvvk::Context :
+
+* get the window handle
+* create its related surface
+* make sure the Queue is the one we need to render in this surface
+
+```cpp
+// could {.cpp}be arguments of a function/method :
+nvvk::Context ctx;
+NVPWindow     win;
+...
+
+// get the surface of the window in which to render
+VkWin32SurfaceCreateInfoKHR createInfo = {};
+... populate the fields of createInfo ...
+createInfo.hwnd = glfwGetWin32Window(win.m_internal);
+result = vkCreateWin32SurfaceKHR(ctx.m_instance, &createInfo, nullptr, &m_surface);
+
+...
+// make sure we assign the proper Queue to m_queueGCT, from what the surface tells us
+ctx.setGCTQueueWithPresent(m_surface);
+```
+
+The initialization can happen now :
+
+```cpp
+m_swapChain.init(ctx.m_device, ctx.m_physicalDevice, ctx.m_queueGCT, ctx.m_queueGCT.familyIndex,
+                 m_surface, VK_FORMAT_B8G8R8A8_UNORM);
+...
+// after init or update you also have to setup the image layouts at some point
+VkCommandBuffer cmd = ...
+m_swapChain.cmdUpdateBarriers(cmd);
+```
+
+During a resizing of a window, you can update the swapchain as well :
+
+```cpp
+bool WindowSurface::resize(int w, int h)
+{
+...
+  m_swapChain.update(w, h);
+  // be cautious to also transition the image layouts
+...
+}
+```
+
+
+A typical renderloop would look as follows:
+
+```cpp
+  // handles vkAcquireNextImageKHR and setting the active image
+  // w,h only needed if update(w,h) not called reliably.
+  int w, h;
+  bool recreated;
+  glfwGetFramebufferSize(window, &w, &h);
+  if(!m_swapChain.acquire(w, h, &recreated, [, optional SwapChainAcquireState ptr]))
+  {
+    ... handle acquire error (shouldn't happen)
+  }
+
+  VkCommandBuffer cmd = ...
+
+  // acquire might have recreated the swap chain: respond if needed here.
+  // NOTE: you can also check the recreated variable above, but this
+  // only works if the swap chain was recreated this frame.
+  if (m_swapChain.getChangeID() != lastChangeID){
+    // after init or resize you have to setup the image layouts
+    m_swapChain.cmdUpdateBarriers(cmd);
+
+    lastChangeID = m_swapChain.getChangeID();
+  }
+
+  // do render operations either directly using the imageview
+  VkImageView swapImageView = m_swapChain.getActiveImageView();
+
+  // or you may always render offline int your own framebuffer
+  // and then simply blit into the backbuffer. NOTE: use
+  // m_swapChain.getWidth() / getHeight() to get blit dimensions,
+  // actual swap chain image size may differ from requested width/height.
+  VkImage swapImage = m_swapChain.getActiveImage();
+  vkCmdBlitImage(cmd, ... swapImage ...);
+
+  // setup submit
+  VkSubmitInfo submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO};
+  submitInfo.commandBufferCount = 1;
+  submitInfo.pCommandBuffers    = &cmd;
+
+  // we need to ensure to wait for the swapchain image to have been read already
+  // so we can safely blit into it
+
+  VkSemaphore swapchainReadSemaphore      = m_swapChain->getActiveReadSemaphore();
+  VkPipelineStageFlags swapchainReadFlags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+  submitInfo.waitSemaphoreCount = 1;
+  submitInfo.pWaitSemaphores    = &swapchainReadSemaphore;
+  submitInfo.pWaitDstStageMask  = &swapchainReadFlags);
+
+  // once this submit completed, it means we have written the swapchain image
+  VkSemaphore swapchainWrittenSemaphore = m_swapChain->getActiveWrittenSemaphore();
+  submitInfo.signalSemaphoreCount = 1;
+  submitInfo.pSignalSemaphores    = &swapchainWrittenSemaphore;
+
+  // submit it
+  vkQueueSubmit(m_queue, 1, &submitInfo, fence);
+
+  // present via a queue that supports it
+  // this will also setup the dependency for the appropriate written semaphore
+  // and bump the semaphore cycle
+  m_swapChain.present(m_queue);
+```
+
+@DOC_END */
+
+// What SwapChain::acquire produces: a swap chain image plus
+// semaphores protecting it.
+struct SwapChainAcquireState
+{
+  // The image and its view and index in the swap chain.
+  VkImage     image;
+  VkImageView view;
+  uint32_t    index;
+  // MUST wait on this semaphore before writing to the image. ("The
+  // system" signals this semaphore when it's done presenting the
+  // image and can safely be reused).
+  VkSemaphore waitSem;
+  // MUST signal this semaphore when done writing to the image, and
+  // before presenting it. (The system waits for this before presenting).
+  VkSemaphore signalSem;
+};
+
+
+class SwapChain
+{
+private:
+  struct Entry
+  {
+    VkImage     image{};
+    VkImageView imageView{};
+  };
+
+  struct SemaphoreEntry
+  {
+    // be aware semaphore index may not match active image index
+    VkSemaphore readSemaphore{};
+    VkSemaphore writtenSemaphore{};
+  };
+
+  VkDevice         m_device         = VK_NULL_HANDLE;
+  VkPhysicalDevice m_physicalDevice = VK_NULL_HANDLE;
+
+  VkQueue  m_queue{};
+  VkQueue  m_waitQueue{};  // See waitIdle and setWaitQueue.
+  uint32_t m_queueFamilyIndex{0};
+
+  VkSurfaceKHR    m_surface{};
+  VkFormat        m_surfaceFormat{};
+  VkColorSpaceKHR m_surfaceColor{};
+
+  uint32_t       m_imageCount{0};
+  VkSwapchainKHR m_swapchain{};
+
+  std::vector<Entry>                m_entries;
+  std::vector<SemaphoreEntry>       m_semaphores;
+  std::vector<VkImageMemoryBarrier> m_barriers;
+
+  // index for current image, returned by vkAcquireNextImageKHR
+  // vk spec: The order in which images are acquired is implementation-dependent,
+  // and may be different than the order the images were presented
+  uint32_t m_currentImage{0};
+  // index for current semaphore, incremented by `SwapChain::present`
+  uint32_t m_currentSemaphore{0};
+  // incremented by `SwapChain::update`, use to update other resources or track changes
+  uint32_t m_changeID{0};
+  // surface
+  VkExtent2D m_extent{0, 0};
+  // requested on update
+  uint32_t m_updateWidth{0};
+  uint32_t m_updateHeight{0};
+  // if the swap operation is sync'ed with monitor
+  bool m_vsync = false;
+  // if vsync is off which mode to prefer
+  VkPresentModeKHR m_preferredVsyncOffMode = VK_PRESENT_MODE_MAILBOX_KHR;
+  // usage flags for swapchain images
+  VkImageUsageFlags m_imageUsage{};
+
+  VkResult waitIdle()
+  {
+    if(m_waitQueue)
+      return vkQueueWaitIdle(m_waitQueue);
+    else
+      return vkDeviceWaitIdle(m_device);
+  }
+
+  // triggers device/queue wait idle
+  void deinitResources();
+
+public:
+  SwapChain(SwapChain const&)            = delete;
+  SwapChain& operator=(SwapChain const&) = delete;
+
+  SwapChain() {}
+
+  static constexpr VkFormat s_defaultImageFormat = VK_FORMAT_B8G8R8A8_UNORM;
+
+  static constexpr VkImageUsageFlags s_defaultImageUsage =
+      VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
+
+  SwapChain(VkDevice          device,
+            VkPhysicalDevice  physicalDevice,
+            VkQueue           queue,
+            uint32_t          queueFamilyIndex,
+            VkSurfaceKHR      surface,
+            VkFormat          format     = s_defaultImageFormat,
+            VkImageUsageFlags imageUsage = s_defaultImageUsage)
+  {
+    init(device, physicalDevice, queue, queueFamilyIndex, surface, format, imageUsage);
+  }
+  ~SwapChain() { deinit(); }
+
+  bool init(VkDevice          device,
+            VkPhysicalDevice  physicalDevice,
+            VkQueue           queue,
+            uint32_t          queueFamilyIndex,
+            VkSurfaceKHR      surface,
+            VkFormat          format     = s_defaultImageFormat,
+            VkImageUsageFlags imageUsage = s_defaultImageUsage);
+
+  // triggers queue/device wait idle
+  void deinit();
+
+  // update the swapchain configuration
+  // (must be called at least once after init)
+  // triggers queue/device wait idle
+  // returns actual swapchain dimensions, which may differ from requested
+  VkExtent2D update(int width, int height, bool vsync);
+  VkExtent2D update(int width, int height) { return update(width, height, m_vsync); }
+
+  // Returns true on success.
+  //
+  // Sets active index to the next swap chain image to draw to.
+  // The handles and semaphores for this image are optionally written to *pOut.
+  //
+  // `acquire` and `acquireAutoResize` use getActiveReadSemaphore();
+  // `acquireCustom` allows you to provide your own semaphore.
+  //
+  // If the swap chain was invalidated (window resized, etc.), the
+  // swap chain will be recreated, which triggers queue/device wait
+  // idle.  If you are not calling `update` manually on window resize,
+  // you must pass the new swap image size explicitly.
+  //
+  // WARNING: The actual swap image size might not match what is
+  // requested; use getWidth/getHeight to check actual swap image
+  // size.
+  //
+  // If the swap chain was recreated, *pRecreated is set to true (if
+  // pRecreated != nullptr); otherwise, set to false.
+  //
+  // WARNING the swap chain could be spontaneously recreated, even if
+  // you are calling `update` whenever the window is resized.
+  bool acquire(bool* pRecreated = nullptr, SwapChainAcquireState* pOut = nullptr);
+  bool acquireAutoResize(int width, int height, bool* pRecreated, SwapChainAcquireState* pOut = nullptr);
+
+  // Can be made public if this functionality is needed again.
+private:
+  bool acquireCustom(VkSemaphore semaphore, bool* pRecreated = nullptr, SwapChainAcquireState* pOut = nullptr);
+  bool acquireCustom(VkSemaphore semaphore, int width, int height, bool* pRecreated, SwapChainAcquireState* pOut = nullptr);
+
+  // add one to avoid accidentally missing proper fence wait prior acquire
+  uint32_t getSemaphoreCycleCount() const { return m_imageCount + 1; }
+
+public:
+  // all present functions bump semaphore cycle
+
+  // present on provided queue
+  void present(VkQueue queue);
+  // present using a default queue from init time
+  void present() { present(m_queue); }
+  // present via a custom function
+  // (e.g. when extending via VkDeviceGroupPresentInfoKHR)
+  // fills in defaults for provided presentInfo
+  // with getActiveImageIndex()
+  // and getActiveWrittenSemaphore()
+  void presentCustom(VkPresentInfoKHR& outPresentInfo);
+
+  VkSemaphore getActiveReadSemaphore() const;
+  VkSemaphore getActiveWrittenSemaphore() const;
+  VkImage     getActiveImage() const;
+  VkImageView getActiveImageView() const;
+  uint32_t    getActiveImageIndex() const { return m_currentImage; }
+
+  uint32_t    getImageCount() const { return m_imageCount; }
+  VkImage     getImage(uint32_t i) const;
+  VkImageView getImageView(uint32_t i) const;
+  VkFormat    getFormat() const { return m_surfaceFormat; }
+
+  // Get the actual size of the swap chain images.
+  uint32_t   getWidth() const { return m_extent.width; }
+  uint32_t   getHeight() const { return m_extent.height; }
+  VkExtent2D getExtent() const { return m_extent; }
+
+  // Get the requested size of the swap chain images. THIS IS RARELY USEFUL.
+  uint32_t getUpdateWidth() const { return m_updateWidth; }
+  uint32_t getUpdateHeight() const { return m_updateHeight; }
+
+  bool           getVsync() const { return m_vsync; }
+  VkSwapchainKHR getSwapchain() const { return m_swapchain; }
+
+  // does a vkCmdPipelineBarrier for VK_IMAGE_LAYOUT_UNDEFINED to VK_IMAGE_LAYOUT_PRESENT_SRC_KHR
+  // must apply resource transitions after update calls
+  void cmdUpdateBarriers(VkCommandBuffer cmd) const;
+
+  uint32_t getChangeID() const;
+
+  // Ordinarily, `SwapChain` calls vkDeviceWaitIdle before recreating
+  // the swap chain. However, if setWaitQueue is called with a
+  // non-null queue, we only wait for that queue instead of the whole
+  // device.  This may be needed if you are using queues in other CPU
+  // threads that are not synchronized to the render loop.
+  void setWaitQueue(VkQueue waitQueue = VK_NULL_HANDLE) { m_waitQueue = waitQueue; }
+
+  // typically either VK_PRESENT_MODE_MAILBOX_KHR or VK_PRESENT_MODE_IMMEDIATE_KHR
+  void setPreferredVsyncOffMode(VkPresentModeKHR mode) { m_preferredVsyncOffMode = mode; }
+};
+}  // namespace nvvk
+#endif
--- a/raytracer/nvpro_core/nvvk/vulkanhppsupport.cpp
+++ b/raytracer/nvpro_core/nvvk/vulkanhppsupport.cpp
@ -0,0 +1,201 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "vulkanhppsupport.hpp"
+#include "memallocator_dedicated_vk.hpp"
+#include "memorymanagement_vk.hpp"
+
+namespace nvvk {
+bool checkResult(vk::Result result, const char* message)
+{
+  return nvvk::checkResult(VkResult(result), message);
+}
+
+bool checkResult(vk::Result result, const char* file, int32_t line)
+{
+  return nvvk::checkResult((VkResult)result, file, line);
+}
+}  // namespace nvvk
+
+namespace nvvkpp {
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ExportResourceAllocator::ExportResourceAllocator(VkDevice device, VkPhysicalDevice physicalDevice, nvvk::MemAllocator* memAllocator, VkDeviceSize stagingBlockSize)
+    : ResourceAllocator(device, physicalDevice, memAllocator, stagingBlockSize)
+{
+}
+
+void ExportResourceAllocator::CreateBufferEx(const VkBufferCreateInfo& info_, VkBuffer* buffer)
+{
+  VkBufferCreateInfo               info = info_;
+  VkExternalMemoryBufferCreateInfo infoEx{VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO};
+#ifdef WIN32
+  infoEx.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
+#else
+  infoEx.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
+#endif
+  info.pNext = &infoEx;
+  NVVK_CHECK(vkCreateBuffer(m_device, &info, nullptr, buffer));
+}
+
+void ExportResourceAllocator::CreateImageEx(const VkImageCreateInfo& info_, VkImage* image)
+{
+  auto                            info = info_;
+  VkExternalMemoryImageCreateInfo infoEx{VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO};
+#ifdef WIN32
+  infoEx.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
+#else
+  infoEx.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
+#endif
+  info.pNext = &infoEx;
+  NVVK_CHECK(vkCreateImage(m_device, &info, nullptr, image));
+}
+
+nvvk::MemHandle ExportResourceAllocator::AllocateMemory(const nvvk::MemAllocateInfo& allocateInfo)
+{
+  nvvk::MemAllocateInfo exportAllocateInfo(allocateInfo);
+  exportAllocateInfo.setExportable(true);
+  return ResourceAllocator::AllocateMemory(exportAllocateInfo);
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+ExportResourceAllocatorDedicated::ExportResourceAllocatorDedicated(VkDevice         device,
+                                                                   VkPhysicalDevice physicalDevice,
+                                                                   VkDeviceSize stagingBlockSize /*= NVVK_DEFAULT_STAGING_BLOCKSIZE*/)
+{
+  init(device, physicalDevice, stagingBlockSize);
+}
+
+ExportResourceAllocatorDedicated::~ExportResourceAllocatorDedicated()
+{
+  deinit();
+}
+
+
+void ExportResourceAllocatorDedicated::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize /*= NVVK_DEFAULT_STAGING_BLOCKSIZE*/)
+{
+  m_memAlloc = std::make_unique<nvvk::DedicatedMemoryAllocator>(device, physicalDevice);
+  ExportResourceAllocator::init(device, physicalDevice, m_memAlloc.get(), stagingBlockSize);
+}
+
+void ExportResourceAllocatorDedicated::deinit()
+{
+  ExportResourceAllocator::deinit();
+  m_memAlloc.reset();
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ExplicitDeviceMaskResourceAllocator::ExplicitDeviceMaskResourceAllocator(VkDevice            device,
+                                                                         VkPhysicalDevice    physicalDevice,
+                                                                         nvvk::MemAllocator* memAlloc,
+                                                                         uint32_t            deviceMask)
+{
+  init(device, physicalDevice, memAlloc, deviceMask);
+}
+
+void ExplicitDeviceMaskResourceAllocator::init(VkDevice device, VkPhysicalDevice physicalDevice, nvvk::MemAllocator* memAlloc, uint32_t deviceMask)
+{
+  ResourceAllocator::init(device, physicalDevice, memAlloc);
+  m_deviceMask = deviceMask;
+}
+
+nvvk::MemHandle ExplicitDeviceMaskResourceAllocator::AllocateMemory(const nvvk::MemAllocateInfo& allocateInfo)
+{
+  nvvk::MemAllocateInfo deviceMaskAllocateInfo(allocateInfo);
+  deviceMaskAllocateInfo.setDeviceMask(m_deviceMask);
+
+  return ResourceAllocator::AllocateMemory(deviceMaskAllocateInfo);
+}
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ResourceAllocatorDma::ResourceAllocatorDma(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize, VkDeviceSize memBlockSize)
+{
+  init(device, physicalDevice, stagingBlockSize, memBlockSize);
+}
+
+ResourceAllocatorDma::~ResourceAllocatorDma()
+{
+  deinit();
+}
+
+void ResourceAllocatorDma::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize, VkDeviceSize memBlockSize)
+{
+  m_dma = std::make_unique<nvvk::DeviceMemoryAllocator>(device, physicalDevice, memBlockSize);
+  ResourceAllocator::init(device, physicalDevice, m_dma.get(), stagingBlockSize);
+}
+
+
+void ResourceAllocatorDma::init(VkInstance, VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize, VkDeviceSize memBlockSize)
+{
+  init(device, physicalDevice, stagingBlockSize, memBlockSize);
+}
+
+void ResourceAllocatorDma::deinit()
+{
+  ResourceAllocator::deinit();
+  m_dma.reset();
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+ResourceAllocatorDedicated::ResourceAllocatorDedicated(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize)
+{
+  init(device, physicalDevice, stagingBlockSize);
+}
+
+
+ResourceAllocatorDedicated::~ResourceAllocatorDedicated()
+{
+  deinit();
+}
+
+void ResourceAllocatorDedicated::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize)
+{
+  m_memAlloc = std::make_unique<nvvk::DedicatedMemoryAllocator>(device, physicalDevice);
+  ResourceAllocator::init(device, physicalDevice, m_memAlloc.get(), stagingBlockSize);
+}
+
+
+void ResourceAllocatorDedicated::init(VkInstance,  // unused
+                                      VkDevice         device,
+                                      VkPhysicalDevice physicalDevice,
+                                      VkDeviceSize     stagingBlockSize /*= NVVK_DEFAULT_STAGING_BLOCKSIZE*/)
+{
+  init(device, physicalDevice, stagingBlockSize);
+}
+
+void ResourceAllocatorDedicated::deinit()
+{
+  ResourceAllocator::deinit();
+  m_memAlloc.reset();
+}
+
+}  // namespace nvvkpp
--- a/raytracer/nvpro_core/nvvk/vulkanhppsupport.hpp
+++ b/raytracer/nvpro_core/nvvk/vulkanhppsupport.hpp
--- a/raytracer/nvpro_core/nvvk/vulkanhppsupport_vkgl.cpp
+++ b/raytracer/nvpro_core/nvvk/vulkanhppsupport_vkgl.cpp
@ -0,0 +1,58 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if NVP_SUPPORTS_OPENGL
+
+#include "vulkanhppsupport_vkgl.hpp"
+
+namespace nvvkpp {
+
+ResourceAllocatorGLInterop::ResourceAllocatorGLInterop(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize)
+{
+  init(device, physicalDevice, stagingBlockSize);
+}
+
+ResourceAllocatorGLInterop::~ResourceAllocatorGLInterop()
+{
+  deinit();
+}
+
+void ResourceAllocatorGLInterop::init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize)
+{
+  m_dmaGL = std::make_unique<nvvk::DeviceMemoryAllocatorGL>(device, physicalDevice);
+  nvvkpp::ExportResourceAllocator::init(device, physicalDevice, m_dmaGL.get(), stagingBlockSize);
+
+  // The staging will only use DMA, without export functionality.
+  m_dma = std::make_unique<nvvk::DeviceMemoryAllocator>(device, physicalDevice);
+  m_staging = std::make_unique<nvvk::StagingMemoryManager>(dynamic_cast<nvvk::MemAllocator*>(m_dma.get()), stagingBlockSize);
+}
+
+void ResourceAllocatorGLInterop::deinit()
+{
+  nvvkpp::ExportResourceAllocator::deinit();
+  m_dmaGL.reset();
+  m_dma.reset();
+}
+
+nvvk::AllocationGL ResourceAllocatorGLInterop::getAllocationGL(nvvk::MemHandle memHandle) const
+{
+  return m_dmaGL->getAllocationGL(m_dmaGL->getAllocationID(memHandle));
+}
+
+}  // namespace nvvkpp
+
+#endif
--- a/raytracer/nvpro_core/nvvk/vulkanhppsupport_vkgl.hpp
+++ b/raytracer/nvpro_core/nvvk/vulkanhppsupport_vkgl.hpp
@ -0,0 +1,56 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if NVP_SUPPORTS_OPENGL
+
+#pragma once
+
+#include "memorymanagement_vkgl.hpp"  // This needs to be first to not break the build
+
+#include "nvvk/vulkanhppsupport.hpp"
+
+namespace nvvkpp {
+/** @DOC_START
+# class nvvkpp::ResourceAllocatorGLInterop
+
+>  ResourceAllocatorGLInterop is a helper class to manage Vulkan and OpenGL memory allocation and interop.
+
+This class is a wrapper around the `nvvk::DeviceMemoryAllocatorGL` and `nvvk::DeviceMemoryAllocator` classes, which are used to allocate memory for Vulkan and OpenGL resources.
+
+@DOC_END */
+
+class ResourceAllocatorGLInterop : public ExportResourceAllocator
+{
+public:
+  ResourceAllocatorGLInterop() = default;
+  ResourceAllocatorGLInterop(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+  ~ResourceAllocatorGLInterop();
+
+  void init(VkDevice device, VkPhysicalDevice physicalDevice, VkDeviceSize stagingBlockSize = NVVK_DEFAULT_STAGING_BLOCKSIZE);
+  void deinit();
+
+  nvvk::DeviceMemoryAllocatorGL& getDmaGL() const { return *m_dmaGL; }
+  nvvk::AllocationGL             getAllocationGL(nvvk::MemHandle memHandle) const;
+
+protected:
+  std::unique_ptr<nvvk::DeviceMemoryAllocatorGL> m_dmaGL;
+  std::unique_ptr<nvvk::DeviceMemoryAllocator>   m_dma;
+};
+
+}  // namespace nvvkpp
+
+#endif