framework update 4/29/2020

2020-04-29 13:59:03 +02:00 · 2020-04-29 13:59:03 +02:00 · 60103dd1ce
commit 60103dd1ce
parent 21fc655237
62 changed files with 2931 additions and 2743 deletions
--- a/docs/vkrt_tuto_animation.md.htm
+++ b/docs/vkrt_tuto_animation.md.htm
@ -76,12 +76,15 @@ Next, we update the buffer that describes the scene, which is used by the raster
  memcpy(gInst, m_objInstance.data(), bufferSize);
  m_alloc.unmap(stagingBuffer);
  // Copy staging buffer to the Scene Description buffer
-  nvvkpp::SingleCommandBuffer genCmdBuf(m_device, m_graphicsQueueIndex);
-  vk::CommandBuffer           cmdBuf = genCmdBuf.createCommandBuffer();
+  nvvk::CommandPool genCmdBuf(m_device, m_graphicsQueueIndex);
+  vk::CommandBuffer cmdBuf = genCmdBuf.createCommandBuffer();
  cmdBuf.copyBuffer(stagingBuffer.buffer, m_sceneDesc.buffer, vk::BufferCopy(0, 0, bufferSize));
  m_debug.endLabel(cmdBuf);
-  genCmdBuf.flushCommandBuffer(cmdBuf);
+  genCmdBuf.submitAndWait(cmdBuf);
  m_alloc.destroy(stagingBuffer);
+
+  m_rtBuilder.updateTlasMatrices(m_tlas);
+  m_rtBuilder.updateBlas(2);
 }
 ~~~~
 <script type="preformatted">
@ -114,10 +117,10 @@ they will still be at their original positions in the ray traced version. We wil

 Since we want to update the transformation matrices in the TLAS, we need to keep some of the objects used to create it.

-First, move the vector of `nvvkpp::RaytracingBuilder::Instance` objects from `HelloVulkan::createTopLevelAS()` to the 
+First, move the vector of `nvvk::RaytracingBuilder::Instance` objects from `HelloVulkan::createTopLevelAS()` to the 
 `HelloVulkan` class. 
 ~~~~ C++
-std::vector<nvvkpp::RaytracingBuilder::Instance> m_tlas;
+std::vector<nvvk::RaytracingBuilder::Instance> m_tlas;
 ~~~~ 

 Make sure to rename it to `m_tlas`, instead of `tlas`. 
@ -131,12 +134,12 @@ void HelloVulkan::createTopLevelAS()
  m_tlas.reserve(m_objInstance.size());
  for(int i = 0; i < static_cast<int>(m_objInstance.size()); i++)
  {
-    nvvkpp::RaytracingBuilder::Instance rayInst;
+    nvvk::RaytracingBuilder::Instance rayInst;
    rayInst.transform  = m_objInstance[i].transform;  // Position of the instance
    rayInst.instanceId = i;                           // gl_InstanceID
    rayInst.blasId     = m_objInstance[i].objIndex;
    rayInst.hitGroupId = m_objInstance[i].hitgroup;
-    rayInst.flags      = vk::GeometryInstanceFlagBitsKHR::eTriangleCullDisable;
+    rayInst.flags      = VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_DISABLE_BIT_NV;
    m_tlas.emplace_back(rayInst);
  }
  m_rtBuilder.buildTlas(m_tlas, vk::BuildAccelerationStructureFlagBitsKHR::ePreferFastTrace
@ -145,12 +148,12 @@ void HelloVulkan::createTopLevelAS()
 ~~~~ 

 Back in `HelloVulkan::animationInstances()`, we need to copy the new computed transformation 
-matrices to the vector of `nvvkpp::RaytracingBuilder::Instance` objects.
+matrices to the vector of `nvvk::RaytracingBuilder::Instance` objects.

 In the `for` loop, add at the end

 ~~~~ C++
-   nvvkpp::RaytracingBuilder::Instance& tinst = m_tlas[wusonIdx];
+   nvvk::RaytracingBuilder::Instance& tinst = m_tlas[wusonIdx];
   tinst.transform                            = inst.transform;
 ~~~~

@ -162,12 +165,12 @@ m_rtBuilder.updateTlasMatrices(m_tlas);

 ![](Images/animation1.gif)

-## nvvkpp::RaytracingBuilder::updateTlasMatrices (Implementation)
+## nvvk::RaytracingBuilder::updateTlasMatrices (Implementation)

-We currently use `nvvkpp::RaytracingBuilder` to update the matrices for convenience, but
+We currently use `nvvk::RaytracingBuilder` to update the matrices for convenience, but
 this could be done more efficiently if one kept some of the buffer and memory references. Using a 
 memory allocator, such as the one described in the [Many Objects Tutorial](vkrt_tuto_instances.md.htm),
-could also be an alternative for avoiding multiple reallocations. Here's the implementation of `nvvkpp::RaytracingBuilder::updateTlasMatrices`.
+could also be an alternative for avoiding multiple reallocations. Here's the implementation of `nvvk::RaytracingBuilder::updateTlasMatrices`.

 ### Staging Buffer

@ -177,18 +180,18 @@ building the TLAS.
 ~~~~ C++
  void updateTlasMatrices(const std::vector<Instance>& instances)
  {
-    VkDeviceSize bufferSize = instances.size() * sizeof(vk::AccelerationStructureInstanceKHR);
+    VkDeviceSize bufferSize = instances.size() * sizeof(VkAccelerationStructureInstanceKHR);
    // Create a staging buffer on the host to upload the new instance data
-    nvvkBuffer stagingBuffer = m_alloc.createBuffer(bufferSize, vk::BufferUsageFlagBits::eTransferSrc,
+    nvvkBuffer stagingBuffer = m_alloc.createBuffer(bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
 #if defined(ALLOC_VMA)
                                                    VmaMemoryUsage::VMA_MEMORY_USAGE_CPU_TO_GPU
 #else
-                                                    vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent
+                                                    VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
 #endif
    );

    // Copy the instance data into the staging buffer
-    auto* gInst = reinterpret_cast<vk::AccelerationStructureInstanceKHR*>(m_alloc.map(stagingBuffer));
+    auto* gInst = reinterpret_cast<VkAccelerationStructureInstanceKHR*>(m_alloc.map(stagingBuffer));
    for(int i = 0; i < instances.size(); i++)
    {
      gInst[i] = instanceToVkGeometryInstanceKHR(instances[i]);
@ -201,16 +204,23 @@ Building the TLAS always needs scratch memory, and so we need to request it. If
 we hadn't set the `eAllowUpdate` flag, the returned size would be zero and the rest of the code
 would fail.
 ~~~~ C++
-// Compute the amount of scratch memory required by the AS builder to update the TLAS
-vk::AccelerationStructureMemoryRequirementsInfoKHR memoryRequirementsInfo{
-    vk::AccelerationStructureMemoryRequirementsTypeKHR::eUpdateScratch,
-    vk::AccelerationStructureBuildTypeKHR::eDevice, m_tlas.as.accel};
-vk::DeviceSize scratchSize =
-    m_device.getAccelerationStructureMemoryRequirementsKHR(memoryRequirementsInfo).memoryRequirements.size;
-// Allocate the scratch buffer
-nvvkBuffer        scratchBuffer  = m_alloc.createBuffer(scratchSize, vk::BufferUsageFlagBits::eRayTracingKHR
-                                                                 | vk::BufferUsageFlagBits::eShaderDeviceAddress);
-vk::DeviceAddress scratchAddress = m_device.getBufferAddress({scratchBuffer.buffer});
+    // Compute the amount of scratch memory required by the AS builder to update
+    VkAccelerationStructureMemoryRequirementsInfoKHR memoryRequirementsInfo{
+        VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_KHR};
+    memoryRequirementsInfo.type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_UPDATE_SCRATCH_KHR;
+    memoryRequirementsInfo.accelerationStructure = m_tlas.as.accel;
+    memoryRequirementsInfo.buildType             = VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR;
+
+    VkMemoryRequirements2 reqMem{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+    vkGetAccelerationStructureMemoryRequirementsKHR(m_device, &memoryRequirementsInfo, &reqMem);
+    VkDeviceSize scratchSize = reqMem.memoryRequirements.size;
+
+    // Allocate the scratch buffer
+    nvvkBuffer scratchBuffer =
+        m_alloc.createBuffer(scratchSize, VK_BUFFER_USAGE_RAY_TRACING_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
+    VkBufferDeviceAddressInfo bufferInfo{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO};
+    bufferInfo.buffer              = scratchBuffer.buffer;
+    VkDeviceAddress scratchAddress = vkGetBufferDeviceAddress(m_device, &bufferInfo);
 ~~~~

 ### Update the Buffer
@ -218,19 +228,25 @@ In a new command buffer, we copy the staging buffer to the device buffer and
 add a barrier to make sure the memory finishes copying before updating the TLAS.

 ~~~~ C++
-// Update the instance buffer on the device side and build the TLAS
-nvvkpp::SingleCommandBuffer genCmdBuf(m_device, m_queueIndex);
-vk::CommandBuffer           cmdBuf = genCmdBuf.createCommandBuffer();
+    // Update the instance buffer on the device side and build the TLAS
+    nvvk::CommandPool genCmdBuf(m_device, m_queueIndex);
+    VkCommandBuffer   cmdBuf = genCmdBuf.createCommandBuffer();

-cmdBuf.copyBuffer(stagingBuffer.buffer, m_instBuffer.buffer, vk::BufferCopy(0, 0, bufferSize));
+    VkBufferCopy region{0, 0, bufferSize};
+    vkCmdCopyBuffer(cmdBuf, stagingBuffer.buffer, m_instBuffer.buffer, 1, &region);

-vk::DeviceAddress instanceAddress = m_device.getBufferAddress(m_instBuffer.buffer);
+    //VkBufferDeviceAddressInfo bufferInfo{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO};
+    bufferInfo.buffer               = m_instBuffer.buffer;
+    VkDeviceAddress instanceAddress = vkGetBufferDeviceAddress(m_device, &bufferInfo);

-// Make sure the copy of the instance buffer are copied before triggering the
-// acceleration structure build
-vk::MemoryBarrier barrier(vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eAccelerationStructureWriteKHR);
-cmdBuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAccelerationStructureBuildKHR,
-                       vk::DependencyFlags(), {barrier}, {}, {});
+
+    // Make sure the copy of the instance buffer are copied before triggering the
+    // acceleration structure build
+    VkMemoryBarrier barrier{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
+    barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+    barrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
+    vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
+                         0, 1, &barrier, 0, nullptr, 0, nullptr);
 ~~~~

 ### Update Acceleration Structure
@ -239,30 +255,36 @@ We update the TLAS using the same acceleration structure for source and
 destination to update it in place, and using the VK_TRUE parameter to trigger the update.

 ~~~~ C++
-vk::AccelerationStructureGeometryKHR topASGeometry{vk::GeometryTypeKHR::eInstances};
-topASGeometry.geometry.instances.arrayOfPointers      = VK_FALSE;
-topASGeometry.geometry.instances.data                 = instanceAddress;
-const vk::AccelerationStructureGeometryKHR* pGeometry = &topASGeometry;
+    VkAccelerationStructureGeometryDataKHR geometry{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR};
+    geometry.instances.arrayOfPointers    = VK_FALSE;
+    geometry.instances.data.deviceAddress = instanceAddress;
+    VkAccelerationStructureGeometryKHR topASGeometry{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR};
+    topASGeometry.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR;
+    topASGeometry.geometry     = geometry;

+    const VkAccelerationStructureGeometryKHR* pGeometry = &topASGeometry;

-vk::AccelerationStructureBuildGeometryInfoKHR topASInfo;
-topASInfo.setFlags(m_tlas.flags);
-topASInfo.setUpdate(VK_TRUE);
-topASInfo.setSrcAccelerationStructure(m_tlas.as.accel);
-topASInfo.setDstAccelerationStructure(m_tlas.as.accel);
-topASInfo.setGeometryArrayOfPointers(VK_FALSE);
-topASInfo.setGeometryCount(1);
-topASInfo.setPpGeometries(&pGeometry);
-topASInfo.setScratchData(scratchAddress);
+    VkAccelerationStructureBuildGeometryInfoKHR topASInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR};
+    topASInfo.flags                     = m_tlas.flags;
+    topASInfo.update                    = VK_TRUE;
+    topASInfo.srcAccelerationStructure  = m_tlas.as.accel;
+    topASInfo.dstAccelerationStructure  = m_tlas.as.accel;
+    topASInfo.geometryArrayOfPointers   = VK_FALSE;
+    topASInfo.geometryCount             = 1;
+    topASInfo.ppGeometries              = &pGeometry;
+    topASInfo.scratchData.deviceAddress = scratchAddress;

-uint32_t                                           nbInstances      = (uint32_t)instances.size();
-vk::AccelerationStructureBuildOffsetInfoKHR        buildOffsetInfo  = {nbInstances, 0, 0, 0};
-const vk::AccelerationStructureBuildOffsetInfoKHR* pBuildOffsetInfo = &buildOffsetInfo;
+    uint32_t                                         nbInstances      = (uint32_t)instances.size();
+    VkAccelerationStructureBuildOffsetInfoKHR        buildOffsetInfo  = {nbInstances, 0, 0, 0};
+    const VkAccelerationStructureBuildOffsetInfoKHR* pBuildOffsetInfo = &buildOffsetInfo;

-// Update the acceleration structure. Note the VK_TRUE parameter to trigger the update,
-// and the existing TLAS being passed and updated in place
-cmdBuf.buildAccelerationStructureKHR(1, &topASInfo, &pBuildOffsetInfo);
-genCmdBuf.flushCommandBuffer(cmdBuf);
+    // Build the TLAS
+
+    // Update the acceleration structure. Note the VK_TRUE parameter to trigger the update,
+    // and the existing TLAS being passed and updated in place
+    vkCmdBuildAccelerationStructureKHR(cmdBuf, 1, &topASInfo, &pBuildOffsetInfo);
+
+    genCmdBuf.submitAndWait(cmdBuf);
 ~~~~

 ### Cleanup
@ -310,12 +332,12 @@ Add all of the following members to the `HelloVulkan` class:
  void updateCompDescriptors(nvvkBuffer& vertex);
  void createCompPipelines();

-  std::vector<vk::DescriptorSetLayoutBinding> m_compDescSetLayoutBind;
-  vk::DescriptorPool                          m_compDescPool;
-  vk::DescriptorSetLayout                     m_compDescSetLayout;
-  vk::DescriptorSet                           m_compDescSet;
-  vk::Pipeline                                m_compPipeline;
-  vk::PipelineLayout                          m_compPipelineLayout;
+  nvvk::DescriptorSetBindings m_compDescSetLayoutBind;
+  vk::DescriptorPool          m_compDescPool;
+  vk::DescriptorSetLayout     m_compDescSetLayout;
+  vk::DescriptorSet           m_compDescSet;
+  vk::Pipeline                m_compPipeline;
+  vk::PipelineLayout          m_compPipelineLayout;
 ~~~~

 The compute shader will work on a single `VertexObj` buffer.
@ -323,12 +345,12 @@ The compute shader will work on a single `VertexObj` buffer.
 ~~~~ C++
 void HelloVulkan::createCompDescriptors()
 {
-  m_compDescSetLayoutBind.emplace_back(vk::DescriptorSetLayoutBinding(
+  m_compDescSetLayoutBind.addBinding(vk::DescriptorSetLayoutBinding(
      0, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute));

-  m_compDescSetLayout = nvvkpp::util::createDescriptorSetLayout(m_device, m_compDescSetLayoutBind);
-  m_compDescPool      = nvvkpp::util::createDescriptorPool(m_device, m_compDescSetLayoutBind, 1);
-  m_compDescSet = nvvkpp::util::createDescriptorSet(m_device, m_compDescPool, m_compDescSetLayout);
+  m_compDescSetLayout = m_compDescSetLayoutBind.createLayout(m_device);
+  m_compDescPool      = m_compDescSetLayoutBind.createPool(m_device, 1);
+  m_compDescSet       = nvvk::allocateDescriptorSet(m_device, m_compDescPool, m_compDescSetLayout);
 }
 ~~~~

@ -339,8 +361,7 @@ void HelloVulkan::updateCompDescriptors(nvvkBuffer& vertex)
 {
  std::vector<vk::WriteDescriptorSet> writes;
  vk::DescriptorBufferInfo            dbiUnif{vertex.buffer, 0, VK_WHOLE_SIZE};
-  writes.emplace_back(
-      nvvkpp::util::createWrite(m_compDescSet, m_compDescSetLayoutBind[0], &dbiUnif));
+  writes.emplace_back(m_compDescSetLayoutBind.makeWrite(m_compDescSet, 0, dbiUnif));
  m_device.updateDescriptorSets(static_cast<uint32_t>(writes.size()), writes.data(), 0, nullptr);
 }
 ~~~~
@ -358,10 +379,10 @@ void HelloVulkan::createCompPipelines()
  vk::ComputePipelineCreateInfo computePipelineCreateInfo{{}, {}, m_compPipelineLayout};

  computePipelineCreateInfo.stage =
-      nvvkpp::util::loadShader(m_device,
-                               nvh::loadFile("shaders/anim.comp.spv", true, defaultSearchPaths),
-                               vk::ShaderStageFlagBits::eCompute);
-  m_compPipeline = m_device.createComputePipelines({}, computePipelineCreateInfo, nullptr)[0];
+      nvvk::createShaderStageInfo(m_device,
+                                  nvh::loadFile("shaders/anim.comp.spv", true, defaultSearchPaths),
+                                  VK_SHADER_STAGE_COMPUTE_BIT);
+  m_compPipeline = m_device.createComputePipeline({}, computePipelineCreateInfo, nullptr);
  m_device.destroy(computePipelineCreateInfo.stage.module);
 }
 ~~~~
@ -445,8 +466,8 @@ void HelloVulkan::animationObject(float time)

  updateCompDescriptors(model.vertexBuffer);

-  nvvkpp::SingleCommandBuffer genCmdBuf(m_device, m_graphicsQueueIndex);
-  vk::CommandBuffer           cmdBuf = genCmdBuf.createCommandBuffer();
+  nvvk::CommandPool genCmdBuf(m_device, m_graphicsQueueIndex);
+  vk::CommandBuffer cmdBuf = genCmdBuf.createCommandBuffer();

  cmdBuf.bindPipeline(vk::PipelineBindPoint::eCompute, m_compPipeline);
  cmdBuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, m_compPipelineLayout, 0,
@ -454,7 +475,7 @@ void HelloVulkan::animationObject(float time)
  cmdBuf.pushConstants(m_compPipelineLayout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(float),
                       &time);
  cmdBuf.dispatch(model.nbVertices, 1, 1);
-  genCmdBuf.flushCommandBuffer(cmdBuf);
+  genCmdBuf.submitAndWait(cmdBuf);
 }
 ~~~~

@ -478,7 +499,7 @@ In the rendering loop, after the call to `animationInstances`, call the object a

 ## Update BLAS

-In `nvvkpp::RaytracingBuilder` in `raytrace_vkpp.hpp`, we can add a function to update a BLAS whose vertex buffer was previously updated. This function is very similar to the one used for instances, but in this case, there is no buffer transfer to do.
+In `nvvk::RaytracingBuilder` in `raytrace_vkpp.hpp`, we can add a function to update a BLAS whose vertex buffer was previously updated. This function is very similar to the one used for instances, but in this case, there is no buffer transfer to do.

 ~~~~ C++
  //--------------------------------------------------------------------------------------------------
@ -488,57 +509,63 @@ In `nvvkpp::RaytracingBuilder` in `raytrace_vkpp.hpp`, we can add a function to
  {
    Blas& blas = m_blas[blasIdx];

-    // Compute the amount of scratch memory required by the AS builder to update the TLAS
-    vk::AccelerationStructureMemoryRequirementsInfoKHR memoryRequirementsInfo{
-        vk::AccelerationStructureMemoryRequirementsTypeKHR::eUpdateScratch,
-        vk::AccelerationStructureBuildTypeKHR::eDevice, blas.as.accel};
-    vk::DeviceSize scratchSize =
-        m_device.getAccelerationStructureMemoryRequirementsKHR(memoryRequirementsInfo).memoryRequirements.size;
+    // Compute the amount of scratch memory required by the AS builder to update    the BLAS
+    VkAccelerationStructureMemoryRequirementsInfoKHR memoryRequirementsInfo{
+        VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_KHR};
+    memoryRequirementsInfo.type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_UPDATE_SCRATCH_KHR;
+    memoryRequirementsInfo.accelerationStructure = blas.as.accel;
+    memoryRequirementsInfo.buildType             = VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR;
+
+    VkMemoryRequirements2 reqMem{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+    vkGetAccelerationStructureMemoryRequirementsKHR(m_device, &memoryRequirementsInfo, &reqMem);
+    VkDeviceSize scratchSize = reqMem.memoryRequirements.size;
+
    // Allocate the scratch buffer
-    nvvkBuffer        scratchBuffer  = m_alloc.createBuffer(scratchSize, vk::BufferUsageFlagBits::eRayTracingKHR
-                                                                     | vk::BufferUsageFlagBits::eShaderDeviceAddress);
-    vk::DeviceAddress scratchAddress = m_device.getBufferAddress({scratchBuffer.buffer});
+    nvvkBuffer scratchBuffer =
+        m_alloc.createBuffer(scratchSize, VK_BUFFER_USAGE_RAY_TRACING_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
+    VkBufferDeviceAddressInfo bufferInfo{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO};
+    bufferInfo.buffer              = scratchBuffer.buffer;
+    VkDeviceAddress scratchAddress = vkGetBufferDeviceAddress(m_device, &bufferInfo);


-    const vk::AccelerationStructureGeometryKHR*   pGeometry = blas.asGeometry.data();
-    vk::AccelerationStructureBuildGeometryInfoKHR asInfo{vk::AccelerationStructureTypeKHR::eBottomLevel};
-    asInfo.setFlags(blas.flags);
-    asInfo.setUpdate(VK_TRUE);
-    asInfo.setSrcAccelerationStructure(blas.as.accel);
-    asInfo.setDstAccelerationStructure(blas.as.accel);
-    asInfo.setGeometryArrayOfPointers(VK_FALSE);
-    asInfo.setGeometryCount((uint32_t)blas.asGeometry.size());
-    asInfo.setPpGeometries(&pGeometry);
-    asInfo.setScratchData(scratchAddress);
+    const VkAccelerationStructureGeometryKHR*   pGeometry = blas.asGeometry.data();
+    VkAccelerationStructureBuildGeometryInfoKHR asInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR};
+    asInfo.type                      = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
+    asInfo.flags                     = blas.flags;
+    asInfo.update                    = VK_TRUE;
+    asInfo.srcAccelerationStructure  = blas.as.accel;
+    asInfo.dstAccelerationStructure  = blas.as.accel;
+    asInfo.geometryArrayOfPointers   = VK_FALSE;
+    asInfo.geometryCount             = (uint32_t)blas.asGeometry.size();
+    asInfo.ppGeometries              = &pGeometry;
+    asInfo.scratchData.deviceAddress = scratchAddress;

-
-    std::vector<const vk::AccelerationStructureBuildOffsetInfoKHR*> pBuildOffset(blas.asBuildOffsetInfo.size());
+    std::vector<const VkAccelerationStructureBuildOffsetInfoKHR*> pBuildOffset(blas.asBuildOffsetInfo.size());
    for(size_t i = 0; i < blas.asBuildOffsetInfo.size(); i++)
      pBuildOffset[i] = &blas.asBuildOffsetInfo[i];

-
    // Update the instance buffer on the device side and build the TLAS
-    nvvkpp::SingleCommandBuffer genCmdBuf(m_device, m_queueIndex);
-    vk::CommandBuffer           cmdBuf = genCmdBuf.createCommandBuffer();
+    nvvk::CommandPool genCmdBuf(m_device, m_queueIndex);
+    VkCommandBuffer   cmdBuf = genCmdBuf.createCommandBuffer();
+

    // Update the acceleration structure. Note the VK_TRUE parameter to trigger the update,
    // and the existing BLAS being passed and updated in place
-    cmdBuf.buildAccelerationStructureKHR(asInfo, pBuildOffset);
+    vkCmdBuildAccelerationStructureKHR(cmdBuf, 1, &asInfo, pBuildOffset.data());

-
-    genCmdBuf.flushCommandBuffer(cmdBuf);
+    genCmdBuf.submitAndWait(cmdBuf);
    m_alloc.destroy(scratchBuffer);
  }
 ~~~~

 The previous function (`updateBlas`) uses geometry information stored in `m_blas`. 
-To be able to re-use this information, we need to keep the structure of `nvvkpp::RaytracingBuilderKHR::Blas` objects
+To be able to re-use this information, we need to keep the structure of `nvvk::RaytracingBuilderKHR::Blas` objects
 used for its creation.

-Move the `nvvkpp::RaytracingBuilderKHR::Blas` vector from `HelloVulkan::createBottomLevelAS()` to the `HelloVulkan` class, renaming it to `m_blas`.
+Move the `nvvk::RaytracingBuilderKHR::Blas` vector from `HelloVulkan::createBottomLevelAS()` to the `HelloVulkan` class, renaming it to `m_blas`.

 ~~~~ C++
-  std::vector<nvvkpp::RaytracingBuilderKHR::Blas>         m_blas;
+  std::vector<nvvk::RaytracingBuilderKHR::Blas>         m_blas;
 ~~~~

 As with the TLAS, the BLAS needs to allow updates. We will also enable the 
--- a/docs/vkrt_tuto_anyhit.md.htm
+++ b/docs/vkrt_tuto_anyhit.md.htm
@ -111,8 +111,8 @@ In `createRtPipeline()`, after loading `raytrace.rchit.spv`, load `raytrace.rahi

 ~~~~ C++
  vk::ShaderModule ahitSM =
-      nvvkpp::util::createShaderModule(m_device,  //
-                                       nvh::loadFile("shaders/raytrace.rahit.spv", true, paths));
+      nvvk::createShaderModule(m_device,  //
+                               nvh::loadFile("shaders/raytrace.rahit.spv", true, paths));
 ~~~~

 add the any hit shader to the hit group
--- a/docs/vkrt_tuto_callable.md.html
+++ b/docs/vkrt_tuto_callable.md.html
@ -65,30 +65,29 @@ In `HelloVulkan::createRtPipeline()`, immediately after adding the closest-hit s
 3 callable shaders, for each type of light. 

 ~~~~ C++
-// Callable shaders
-vk::RayTracingShaderGroupCreateInfoKHR callGroup{vk::RayTracingShaderGroupTypeKHR::eGeneral,
-                                                VK_SHADER_UNUSED_KHR, VK_SHADER_UNUSED_KHR,
-                                                VK_SHADER_UNUSED_KHR, VK_SHADER_UNUSED_KHR};
+  // Callable shaders
+  vk::RayTracingShaderGroupCreateInfoKHR callGroup{vk::RayTracingShaderGroupTypeKHR::eGeneral,
+                                                   VK_SHADER_UNUSED_KHR, VK_SHADER_UNUSED_KHR,
+                                                   VK_SHADER_UNUSED_KHR, VK_SHADER_UNUSED_KHR};

-vk::ShaderModule call0 =
-    nvvkpp::util::createShaderModule(m_device,
-                                     nvh::loadFile("shaders/light_point.rcall.spv", true, paths));
-vk::ShaderModule call1 =
-    nvvkpp::util::createShaderModule(m_device,
-                                     nvh::loadFile("shaders/light_spot.rcall.spv", true, paths));
-vk::ShaderModule call2 =
-    nvvkpp::util::createShaderModule(m_device,
-                                     nvh::loadFile("shaders/light_inf.rcall.spv", true, paths));
+  vk::ShaderModule call0 =
+      nvvk::createShaderModule(m_device,
+                               nvh::loadFile("shaders/light_point.rcall.spv", true, paths));
+  vk::ShaderModule call1 =
+      nvvk::createShaderModule(m_device,
+                               nvh::loadFile("shaders/light_spot.rcall.spv", true, paths));
+  vk::ShaderModule call2 =
+      nvvk::createShaderModule(m_device, nvh::loadFile("shaders/light_inf.rcall.spv", true, paths));

-stages.push_back({{}, vk::ShaderStageFlagBits::eCallableKHR, call0, "main"});
-callGroup.setGeneralShader(static_cast<uint32_t>(stages.size() - 1));
-m_rtShaderGroups.push_back(callGroup);
-stages.push_back({{}, vk::ShaderStageFlagBits::eCallableKHR, call1, "main"});
-callGroup.setGeneralShader(static_cast<uint32_t>(stages.size() - 1));
-m_rtShaderGroups.push_back(callGroup);
-stages.push_back({{}, vk::ShaderStageFlagBits::eCallableKHR, call2, "main"});
-callGroup.setGeneralShader(static_cast<uint32_t>(stages.size() - 1));
-m_rtShaderGroups.push_back(callGroup);
+  stages.push_back({{}, vk::ShaderStageFlagBits::eCallableKHR, call0, "main"});
+  callGroup.setGeneralShader(static_cast<uint32_t>(stages.size() - 1));
+  m_rtShaderGroups.push_back(callGroup);
+  stages.push_back({{}, vk::ShaderStageFlagBits::eCallableKHR, call1, "main"});
+  callGroup.setGeneralShader(static_cast<uint32_t>(stages.size() - 1));
+  m_rtShaderGroups.push_back(callGroup);
+  stages.push_back({{}, vk::ShaderStageFlagBits::eCallableKHR, call2, "main"});
+  callGroup.setGeneralShader(static_cast<uint32_t>(stages.size() - 1));
+  m_rtShaderGroups.push_back(callGroup);
 ~~~~

 And at the end of the function, delete the shaders.
--- a/docs/vkrt_tuto_instances.md.htm
+++ b/docs/vkrt_tuto_instances.md.htm
@ -117,13 +117,13 @@ Replace the definition of buffers and textures and include the right allocator.

 ~~~~ C++
 #if defined(ALLOC_DEDICATED)
-#include "nvvkpp/allocator_dedicated_vkpp.hpp"
-using nvvkBuffer  = nvvkpp::BufferDedicated;
-using nvvkTexture = nvvkpp::TextureDedicated;
+#include "nvvk/allocator_dedicated_vk.hpp"
+using nvvkBuffer  = nvvk::BufferDedicated;
+using nvvkTexture = nvvk::TextureDedicated;
 #elif defined(ALLOC_DMA)
-#include "nvvkpp/allocator_dma_vkpp.hpp"
-using nvvkBuffer  = nvvkpp::BufferDma;
-using nvvkTexture = nvvkpp::TextureDma;
+#include "nvvk/allocator_dma_vk.hpp"
+using nvvkBuffer  = nvvk::BufferDma;
+using nvvkTexture = nvvk::TextureDma;
 #endif
 ~~~~

@ -131,10 +131,11 @@ And do the same for the allocator

 ~~~~ C++
 #if defined(ALLOC_DEDICATED)
-  nvvkpp::AllocatorDedicated m_alloc;  // Allocator for buffer, images, acceleration structures
+  nvvk::AllocatorDedicated m_alloc;  // Allocator for buffer, images, acceleration structures
 #elif defined(ALLOC_DMA)
-  nvvkpp::AllocatorDma        m_alloc;  // Allocator for buffer, images, acceleration structures
-  nvvk::DeviceMemoryAllocator m_dmaAllocator;
+  nvvk::AllocatorDma            m_alloc;  // Allocator for buffer, images, acceleration structures
+  nvvk::DeviceMemoryAllocator   m_memAllocator;
+  nvvk::StagingMemoryManagerDma m_staging;
 #endif
 ~~~~

@ -148,34 +149,24 @@ DMA needs to be initialized, which will be done in the `setup()` function:
 #if defined(ALLOC_DEDICATED)
  m_alloc.init(device, physicalDevice);
 #elif defined(ALLOC_DMA)
-  m_dmaAllocator.init(device, physicalDevice);
-  m_alloc.init(device, &m_dmaAllocator);
+  m_memAllocator.init(device, physicalDevice);
+  m_memAllocator.setAllocateFlags(VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR, true);
+  m_staging.init(m_memAllocator);
+  m_alloc.init(device, m_memAllocator, m_staging);
 #endif
 ~~~~

-When using DMA, memory buffer mapping is done through the DMA interface (instead of the VKDevice). Therefore, change the lines at the end of `updateUniformBuffer()` to
+When using DMA, memory buffer mapping is done through the DMA interface (instead of the VKDevice). 
+Therefore, change the lines at the end of `updateUniformBuffer()` to use the common allocator interface.

 ~~~~ C++
-#if defined(ALLOC_DEDICATED)
-  void* data = m_device.mapMemory(m_cameraMat.allocation, 0, sizeof(CameraMatrices));
-  memcpy(data, &ubo, sizeof(ubo));
-  m_device.unmapMemory(m_cameraMat.allocation);
-#elif defined(ALLOC_DMA)
-  void* data = m_dmaAllocator.map(m_cameraMat.allocation);
-  memcpy(data, &ubo, sizeof(ubo));
-  m_dmaAllocator.unmap(m_cameraMat.allocation);
-#endif
+void* data = m_alloc.map(m_cameraMat);
+memcpy(data, &ubo, sizeof(ubo));
+m_alloc.unmap(m_cameraMat);
 ~~~~

-The RaytracerBuilder was made to allow various allocators, but we still need to pass the right one in its setup function. Change the last line of `initRayTracing()` to
+The RaytracerBuilder was made to allow various allocators, therefore nothing to change in the call to `m_rtBuilder.setup()`

-~~~~ C++
-#if defined(ALLOC_DEDICATED)
-  m_rtBuilder.setup(m_device, m_physicalDevice, m_graphicsQueueIndex);
-#elif defined(ALLOC_DMA)
-  m_rtBuilder.setup(m_device, m_dmaAllocator, m_graphicsQueueIndex);
-#endif
-~~~~

 ## Destruction

@ -204,7 +195,7 @@ We can also modify the code to use the [Vulkan Memory Allocator](https://github.

 Download [vk_mem_alloc.h](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator/blob/master/src/vk_mem_alloc.h) from GitHub and add this to the `shared_sources` folder.

-There is already a variation of the allocator for VMA, which is located under [nvpro-samples](https://github.com/nvpro-samples/shared_sources/tree/master/nvvkpp). This allocator has the same simple interface as the `AllocatorDedicated` class in `allocator_dedicated_vkpp.hpp`, but will use VMA for memory management.
+There is already a variation of the allocator for VMA, which is located under [nvpro-samples](https://github.com/nvpro-samples/shared_sources/tree/master/nvvk). This allocator has the same simple interface as the `AllocatorDedicated` class in `allocator_dedicated_vkpp.hpp`, but will use VMA for memory management.

 VMA might use dedicated memory, which we do, so you need to add the following extension to the 
 creation of the context in `main.cpp`.
@ -223,15 +214,16 @@ Follow the changes done before and add the following

 ~~~~ C++
 #elif defined(ALLOC_VMA)
-#include "nvvkpp/allocator_vma_vkpp.hpp"
-using nvvkBuffer  = nvvkpp::BufferVma;
-using nvvkTexture = nvvkpp::TextureVma;
+#include "nvvk/allocator_vma_vk.hpp"
+using nvvkBuffer  = nvvk::BufferVma;
+using nvvkTexture = nvvk::TextureVma;
 ~~~~

 ~~~~ C++ 
 #elif defined(ALLOC_VMA)
-  nvvkpp::AllocatorVma m_alloc;  // Allocator for buffer, images, acceleration structures
-  VmaAllocator         m_vmaAllocator;
+  nvvk::AllocatorVma            m_alloc;  // Allocator for buffer, images, acceleration structures
+  nvvk::StagingMemoryManagerVma m_staging;
+  VmaAllocator                  m_memAllocator;
 ~~~~


@ -247,21 +239,13 @@ In `setup()`
 ~~~~ C++
 #elif defined(ALLOC_VMA)
  VmaAllocatorCreateInfo allocatorInfo = {};
+  allocatorInfo.instance               = instance;
  allocatorInfo.physicalDevice         = physicalDevice;
  allocatorInfo.device                 = device;
  allocatorInfo.flags                  = VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT;
-  vmaCreateAllocator(&allocatorInfo, &m_vmaAllocator);
-  m_alloc.init(device, m_vmaAllocator);
-~~~~
-
-In `updateUniformBuffer()`
-
-~~~~ C++
-#elif defined(ALLOC_VMA)
-  void* data;
-  vmaMapMemory(m_vmaAllocator, m_cameraMat.allocation, &data);
-  memcpy(data, &ubo, sizeof(ubo));
-  vmaUnmapMemory(m_vmaAllocator, m_cameraMat.allocation);
+  vmaCreateAllocator(&allocatorInfo, &m_memAllocator);
+  m_staging.init(device, physicalDevice, m_memAllocator);
+  m_alloc.init(device, m_memAllocator, m_staging);
 ~~~~

 In `destroyResources()`
@ -271,25 +255,6 @@ In `destroyResources()`
  vmaDestroyAllocator(m_vmaAllocator);
 ~~~~

-In `initRayTracing()`
-
-~~~~ C++
-#elif defined(ALLOC_VMA)
-  m_rtBuilder.setup(m_device, m_vmaAllocator, m_graphicsQueueIndex);
-~~~~
-
-Additionally, VMA has its own usage flags, so since `VMA_MEMORY_USAGE_CPU_TO_GPU` maps to `vkMP::eHostVisible` and `vkMP::eHostCoherent`, change the call to `m_alloc.createBuffer` in `HelloVulkan::createUniformBuffer()` to
-
-~~~~ C++
-  m_cameraMat = m_alloc.createBuffer(sizeof(CameraMatrices), vkBU::eUniformBuffer,
-#if defined(ALLOC_DEDICATED) || defined(ALLOC_DMA)
-                                     vkMP::eHostVisible | vkMP::eHostCoherent
-#elif defined(ALLOC_VMA)
-                                     VMA_MEMORY_USAGE_CPU_TO_GPU
-#endif
-  );
-~~~~
-

 # Final Code

--- a/docs/vkrt_tuto_intersection.md.html
+++ b/docs/vkrt_tuto_intersection.md.html
@ -65,8 +65,8 @@ All the information will need to be hold in buffers, which will be available to
 Finally, there are two functions, one to create the spheres, and one that will create the intermediate structure for the BLAS.

 ~~~~ C++
-  void                                createSpheres();
-  nvvkpp::RaytracingBuilderKHR::Blas  sphereToVkGeometryKHR();
+  void                              createSpheres();
+  nvvk::RaytracingBuilderKHR::Blas  sphereToVkGeometryKHR();
 ~~~~

 The following implementation will create 2.000.000 spheres at random positions and radius. It will create the Aabb from the sphere definition, two materials which will be assigned alternatively to each object. All the created information will be moved to Vulkan buffers to be accessed by the intersection and closest shaders.
@ -120,13 +120,13 @@ void HelloVulkan::createSpheres()

  // Creating all buffers
  using vkBU = vk::BufferUsageFlagBits;
-  nvvkpp::SingleCommandBuffer genCmdBuf(m_device, m_graphicsQueueIndex);
-  auto                        cmdBuf = genCmdBuf.createCommandBuffer();
-  m_spheresBuffer         = m_alloc.createBuffer(cmdBuf, m_spheres, vkBU::eStorageBuffer);
-  m_spheresAabbBuffer     = m_alloc.createBuffer(cmdBuf, aabbs);
-  m_spheresMatIndexBuffer = m_alloc.createBuffer(cmdBuf, matIdx, vkBU::eStorageBuffer);
-  m_spheresMatColorBuffer = m_alloc.createBuffer(cmdBuf, materials, vkBU::eStorageBuffer);
-  genCmdBuf.flushCommandBuffer(cmdBuf);
+  nvvk::CommandPool genCmdBuf(m_device, m_graphicsQueueIndex);
+  auto              cmdBuf = genCmdBuf.createCommandBuffer();
+  m_spheresBuffer          = m_alloc.createBuffer(cmdBuf, m_spheres, vkBU::eStorageBuffer);
+  m_spheresAabbBuffer      = m_alloc.createBuffer(cmdBuf, aabbs, vkBU::eShaderDeviceAddress);
+  m_spheresMatIndexBuffer  = m_alloc.createBuffer(cmdBuf, matIdx, vkBU::eStorageBuffer);
+  m_spheresMatColorBuffer  = m_alloc.createBuffer(cmdBuf, materials, vkBU::eStorageBuffer);
+  genCmdBuf.submitAndWait(cmdBuf);

  // Debug information
  m_debug.setObjectName(m_spheresBuffer.buffer, "spheres");
@ -153,7 +153,7 @@ What is changing compare to triangle primitive is the Aabb data (see Aabb struct
 //--------------------------------------------------------------------------------------------------
 // Returning the ray tracing geometry used for the BLAS, containing all spheres
 //
-nvvkpp::RaytracingBuilderKHR::Blas HelloVulkan::sphereToVkGeometryKHR()
+nvvk::RaytracingBuilderKHR::Blas HelloVulkan::sphereToVkGeometryKHR()
 {
  vk::AccelerationStructureCreateGeometryTypeInfoKHR asCreate;
  asCreate.setGeometryType(vk::GeometryTypeKHR::eAabbs);
@ -181,7 +181,7 @@ nvvkpp::RaytracingBuilderKHR::Blas HelloVulkan::sphereToVkGeometryKHR()
  offset.setPrimitiveOffset(0);
  offset.setTransformOffset(0);

-  nvvkpp::RaytracingBuilderKHR::Blas blas;
+  nvvk::RaytracingBuilderKHR::Blas blas;
  blas.asGeometry.emplace_back(asGeom);
  blas.asCreateGeometryInfo.emplace_back(asCreate);
  blas.asBuildOffsetInfo.emplace_back(offset);
@ -217,7 +217,7 @@ The function `createBottomLevelAS()` is creating a BLAS per OBJ, the following m
 void HelloVulkan::createBottomLevelAS()
 {
  // BLAS - Storing each primitive in a geometry
-  std::vector<nvvkpp::RaytracingBuilderKHR::Blas> allBlas;
+  std::vector<nvvk::RaytracingBuilderKHR::Blas> allBlas;
  allBlas.reserve(m_objModel.size());
  for(const auto& obj : m_objModel)
  {
@ -248,7 +248,7 @@ Just before building the TLAS, we need to add the following
 ~~~~ C++
  // Add the blas containing all spheres
  {
-    nvvkpp::RaytracingBuilder::Instance rayInst;
+    nvvk::RaytracingBuilder::Instance rayInst;
    rayInst.transform  = m_objInstance[0].transform;          // Position of the instance
    rayInst.instanceId = static_cast<uint32_t>(tlas.size());  // gl_InstanceID
    rayInst.blasId     = static_cast<uint32_t>(m_objModel.size());
@ -301,7 +301,7 @@ Then write the buffer for the spheres

 ~~~~ C++
  vk::DescriptorBufferInfo dbiSpheres{m_spheresBuffer.buffer, 0, VK_WHOLE_SIZE};
-  writes.emplace_back(nvvkpp::util::createWrite(m_descSet, m_descSetLayoutBind[7], &dbiSpheres));
+  writes.emplace_back(m_descSetLayoutBind.makeWrite(m_descSet, 7, dbiSpheres));
 ~~~~

 ## Intersection Shader
@ -313,13 +313,13 @@ Here is how the two hit group looks like:
 ~~~~ C++
  // Hit Group0 - Closest Hit
  vk::ShaderModule chitSM =
-      nvvkpp::util::createShaderModule(m_device,  //
-                                       nvh::loadFile("shaders/raytrace.rchit.spv", true, paths));
+      nvvk::createShaderModule(m_device,  //
+                               nvh::loadFile("shaders/raytrace.rchit.spv", true, paths));

  {
    vk::RayTracingShaderGroupCreateInfoKHR hg{vk::RayTracingShaderGroupTypeKHR::eTrianglesHitGroup,
-                                             VK_SHADER_UNUSED_KHR, VK_SHADER_UNUSED_KHR,
-                                             VK_SHADER_UNUSED_KHR, VK_SHADER_UNUSED_KHR};
+                                              VK_SHADER_UNUSED_KHR, VK_SHADER_UNUSED_KHR,
+                                              VK_SHADER_UNUSED_KHR, VK_SHADER_UNUSED_KHR};
    stages.push_back({{}, vk::ShaderStageFlagBits::eClosestHitKHR, chitSM, "main"});
    hg.setClosestHitShader(static_cast<uint32_t>(stages.size() - 1));
    m_rtShaderGroups.push_back(hg);
@ -327,15 +327,15 @@ Here is how the two hit group looks like:

  // Hit Group1 - Closest Hit + Intersection (procedural)
  vk::ShaderModule chit2SM =
-      nvvkpp::util::createShaderModule(m_device,  //
-                                       nvh::loadFile("shaders/raytrace2.rchit.spv", true, paths));
+      nvvk::createShaderModule(m_device,  //
+                               nvh::loadFile("shaders/raytrace2.rchit.spv", true, paths));
  vk::ShaderModule rintSM =
-      nvvkpp::util::createShaderModule(m_device,  //
-                                       nvh::loadFile("shaders/raytrace.rint.spv", true, paths));
+      nvvk::createShaderModule(m_device,  //
+                               nvh::loadFile("shaders/raytrace.rint.spv", true, paths));
  {
    vk::RayTracingShaderGroupCreateInfoKHR hg{vk::RayTracingShaderGroupTypeKHR::eProceduralHitGroup,
-                                             VK_SHADER_UNUSED_KHR, VK_SHADER_UNUSED_KHR,
-                                             VK_SHADER_UNUSED_KHR, VK_SHADER_UNUSED_KHR};
+                                              VK_SHADER_UNUSED_KHR, VK_SHADER_UNUSED_KHR,
+                                              VK_SHADER_UNUSED_KHR, VK_SHADER_UNUSED_KHR};
    stages.push_back({{}, vk::ShaderStageFlagBits::eClosestHitKHR, chit2SM, "main"});
    hg.setClosestHitShader(static_cast<uint32_t>(stages.size() - 1));
    stages.push_back({{}, vk::ShaderStageFlagBits::eIntersectionKHR, rintSM, "main"});
--- a/docs/vkrt_tuto_manyhits.md.htm
+++ b/docs/vkrt_tuto_manyhits.md.htm
@ -60,8 +60,8 @@ This new shader needs to be added to the raytracing pipeline. So, in `createRtPi

 ~~~~ C++
  vk::ShaderModule chit2SM =
-      nvvkpp::util::createShaderModule(m_device,  //
-                                       nvh::loadFile("shaders/raytrace2.rchit.spv", true, paths));
+      nvvk::createShaderModule(m_device,  //
+                               nvh::loadFile("shaders/raytrace2.rchit.spv", true, paths));
 ~~~~

 Then add a new hit group group immediately after adding the first hit group:
--- a/docs/vkrt_tuto_rayquery.md.htm
+++ b/docs/vkrt_tuto_rayquery.md.htm
@ -30,13 +30,13 @@ Remove most functions and members to keep only what is need to create the accele

 ~~~~ C++
 // #VKRay
-void                               initRayTracing();
-nvvkpp::RaytracingBuilderKHR::Blas objectToVkGeometryKHR(const ObjModel& model);
-void                               createBottomLevelAS();
-void                               createTopLevelAS();
+void                             initRayTracing();
+nvvk::RaytracingBuilderKHR::Blas objectToVkGeometryKHR(const ObjModel& model);
+void                             createBottomLevelAS();
+void                             createTopLevelAS();

 vk::PhysicalDeviceRayTracingPropertiesKHR m_rtProperties;
-nvvkpp::RaytracingBuilderKHR              m_rtBuilder;
+nvvk::RaytracingBuilderKHR                m_rtBuilder;
 ~~~~ 

 ## hello_vulkan (source)
@ -61,10 +61,11 @@ m_descSetLayoutBind.emplace_back(  //
 In `HelloVulkan::updateDescriptorSet`, write the value to the descriptor set.

 ~~~~ C++
-vk::WriteDescriptorSetAccelerationStructureKHR descASInfo;
-descASInfo.setAccelerationStructureCount(1);
-descASInfo.setPAccelerationStructures(&m_rtBuilder.getAccelerationStructure());
-writes.emplace_back(nvvkpp::util::createWrite(m_descSet, m_descSetLayoutBind[7], &descASInfo));
+  vk::AccelerationStructureKHR                   tlas = m_rtBuilder.getAccelerationStructure();
+  vk::WriteDescriptorSetAccelerationStructureKHR descASInfo;
+  descASInfo.setAccelerationStructureCount(1);
+  descASInfo.setPAccelerationStructures(&tlas);
+  writes.emplace_back(m_descSetLayoutBind.makeWrite(m_descSet, 7, descASInfo));
 ~~~~ 


--- a/docs/vkrt_tutorial.md.htm
+++ b/docs/vkrt_tutorial.md.htm
@ -14,11 +14,14 @@ methods and functions. The sections are organized by components, with subsection

 ![Final Result](Images/resultRaytraceShadowMedieval.png width="350px")

+!!! Note GitHub repository 
+    https://github.com/nvpro-samples/vk_raytracing_tutorial_KHR
+
 # Introduction
 <script type="preformatted">
 This tutorial highlights the steps to add ray tracing to an existing Vulkan application, and assumes a working knowledge
 of Vulkan in general. The code verbosity of classical components such as swapchain management, render passes etc. is
-reduced using [C++ API helpers](https://github.com/nvpro-samples/shared_sources/tree/master/nvvkpp) and
+reduced using [C++ API helpers](https://github.com/nvpro-samples/shared_sources/tree/master/nvvk) and
 NVIDIA's [nvpro-samples](https://github.com/nvpro-samples/build_all) framework. This framework contains many advanced
 examples and best practices for Vulkan and OpenGL. We also use a helper for the creation of the ray tracing acceleration
 structures, but we will document its contents extensively in this tutorial. The code is further simplified by using the
@ -36,17 +39,15 @@ verbosity and its potential for errors.

 ## Beta Installation

-If you are in the Beta period, install and compile all of the following
+The SDK 1.2.135 and up which can be found under https://vulkan.lunarg.com/sdk/home will work with this project.
+
+Nevertheless, if you are in the Beta period, it is suggested to install and compile all of the following and replace
+with the current environment.

 * Latest driver: https://developer.nvidia.com/vulkan-driver
 * Latest Vulkan headers: https://github.com/KhronosGroup/Vulkan-Headers
 * Latest glslangValidator: https://github.com/KhronosGroup/glslang 

-!!! Warning Beta
-    Copy/replace `glslangValidator.exe` in the VulkanSDK bin directory.<br>
-    Ex: `C:\VulkanSDK\1.2.131.1\Bin`
-
-
 ## Structure

 This tutorial is a modification of [`ray_tracing__before`](https://github.com/nvpro-samples/vk_raytracing_tutorial_KHR/tree/master/ray_tracing__before), which loads are render OBJ scenes with Vulkan rasterizer. 
@ -83,6 +84,10 @@ The directory structure should be looking like:
 !!! Warning Beta
    Modify `VULKAN > VULKAN_HEADERS_OVERRIDE_INCLUDE_DIR` to the path to beta vulkan headers.

+!!! Tip Visual Assist
+    To get auto-completion, edit vulkan.hpp and change two places from:<br>
+    `namespace VULKAN_HPP_NAMESPACE` to `namespace vk` 
+
 The starting project is a simple framework allowing us to load OBJ files and rasterize them
 using Vulkan.

@ -93,7 +98,7 @@ using Vulkan.
 # Ray Tracing Setup

 Go to the `main` function of the `main.cpp` file, and find where we request Vulkan extensions with
-`nvvkpp::ContextCreateInfo`.
+`nvvk::ContextCreateInfo`.
 To request ray tracing capabilities, we need to explicitly
 add the
 [VK_KHR_ray_tracing](https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/vkspec.html#VK_KHR_ray_tracing)
@ -183,32 +188,32 @@ In the header file, include the`raytrace_vkpp` helper
 ```` C
 // #VKRay
 #define ALLOC_DEDICATED
-#include "nvvkpp/raytrace_vkpp.hpp"
+#include "nvvk/raytrace_vk.hpp"
 ````

 so that we can add that helper as a member in the `HelloVulkan` class,

 ```` C
-nvvkpp::RaytracingBuilder m_rtBuilder;
+nvvk::RaytracingBuilder m_rtBuilder;
 ````

 and initialize it at the end of `initRaytracing()`:

 ```` C
-m_rtBuilder.setup(m_device, m_physicalDevice, m_graphicsQueueIndex);
+m_rtBuilder.setup(m_device, m_alloc, m_graphicsQueueIndex);
 ````

 ## Bottom-Level Acceleration Structure

 The first step of building a BLAS object consists in converting the geometry data of an `ObjModel` into a
 multiple structures than can be used by the AS builder. We are holding all those structure under 
-`nvvkpp::RaytracingBuilderKHR::Blas`
+`nvvk::RaytracingBuilderKHR::Blas`

 Add a new method to the `HelloVulkan`
 class:

 ```` C
-nvvkpp::RaytracingBuilderKHR::Blas objectToVkGeometryKHR(const ObjModel& model);
+nvvk::RaytracingBuilderKHR::Blas objectToVkGeometryKHR(const ObjModel& model);
 ````

 Its implementation will fill three structures
@ -226,7 +231,7 @@ potential optimization.
 //--------------------------------------------------------------------------------------------------
 // Converting a OBJ primitive to the ray tracing geometry used for the BLAS
 //
-nvvkpp::RaytracingBuilderKHR::Blas HelloVulkan::objectToVkGeometryKHR(const ObjModel& model)
+nvvk::RaytracingBuilderKHR::Blas HelloVulkan::objectToVkGeometryKHR(const ObjModel& model)
 {
  // Setting up the creation info of acceleration structure
  vk::AccelerationStructureCreateGeometryTypeInfoKHR asCreate;
@ -263,7 +268,7 @@ nvvkpp::RaytracingBuilderKHR::Blas HelloVulkan::objectToVkGeometryKHR(const ObjM
  offset.setTransformOffset(0);

  // Our blas is only one geometry, but could be made of many geometries
-  nvvkpp::RaytracingBuilderKHR::Blas blas;
+  nvvk::RaytracingBuilderKHR::Blas blas;
  blas.asGeometry.emplace_back(asGeom);
  blas.asCreateGeometryInfo.emplace_back(asCreate);
  blas.asBuildOffsetInfo.emplace_back(offset);
@ -273,13 +278,13 @@ nvvkpp::RaytracingBuilderKHR::Blas HelloVulkan::objectToVkGeometryKHR(const ObjM
 ````

 In the `HelloVulkan` class declaration, we can now add the `createBottomLevelAS()` method that will generate a
-`nvvkpp::RaytracingBuilderKHR::Blas` for each object, and trigger a BLAS build:
+`nvvk::RaytracingBuilderKHR::Blas` for each object, and trigger a BLAS build:

 ```` C
 void createBottomLevelAS();
 ````

-The implementation loops over all the loaded models and fills in an array of `nvvkpp::RaytracingBuilderKHR::Blas` before
+The implementation loops over all the loaded models and fills in an array of `nvvk::RaytracingBuilderKHR::Blas` before
 triggering a build of all BLAS's in a batch. The resulting acceleration structures will be stored
 within the helper in the order of construction, so that they can be directly referenced by index later.

@ -287,7 +292,7 @@ within the helper in the order of construction, so that they can be directly ref
 void HelloVulkan::createBottomLevelAS()
 {
  // BLAS - Storing each primitive in a geometry
-  std::vector<nvvkpp::RaytracingBuilderKHR::Blas> allBlas;
+  std::vector<nvvk::RaytracingBuilderKHR::Blas> allBlas;
  allBlas.reserve(m_objModel.size());
  for(const auto& obj : m_objModel)
  {
@ -308,27 +313,34 @@ part of the set of helpers provided by the [nvpro-samples](https://github.com/nv
 will generate one BLAS for each `RaytracingBuilderKHR::Blas`:

 ```` C
-void buildBlas(const std::vector<RaytracingBuilderKHR::Blas>& blas_,
-  vk::BuildAccelerationStructureFlagsKHR flags = vk::BuildAccelerationStructureFlagBitsKHR::ePreferFastTrace)
-{
-  m_blas = blas_;  // Keeping a copy
-
-  vk::DeviceSize maxScratch{0};
-
-  // Iterate over the groups of geometries, creating one BLAS for each group
-  int idx{0};
-  for(auto& blas : m_blas)
+  void buildBlas(const std::vector<RaytracingBuilderKHR::Blas>& blas_,
+                 VkBuildAccelerationStructureFlagsKHR flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR)
  {
+    m_blas = blas_;  // Keeping a copy
+
+    VkDeviceSize maxScratch{0};  // Largest scratch buffer for our BLAS
+
+    // Is compaction requested?
+    bool doCompaction = (flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR)
+                        == VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR;
+    std::vector<VkDeviceSize> originalSizes;
+    originalSizes.resize(m_blas.size());
+
+    // Iterate over the groups of geometries, creating one BLAS for each group
+    int idx{0};
+    for(auto& blas : m_blas)
+    {
 ````

 The creation of the acceleration structure needs all `vk::AccelerationStructureCreateGeometryTypeInfoKHR` previously set and 
 set into `vk::AccelerationStructureCreateInfoKHR`.

 ```` C
-vk::AccelerationStructureCreateInfoKHR asCreateInfo{{}, vk::AccelerationStructureTypeKHR::eBottomLevel};
-asCreateInfo.setFlags(flags);
-asCreateInfo.setMaxGeometryCount((uint32_t)blas.asCreateGeometryInfo.size());
-asCreateInfo.setPGeometryInfos(blas.asCreateGeometryInfo.data());
+VkAccelerationStructureCreateInfoKHR asCreateInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR};
+asCreateInfo.type             = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
+asCreateInfo.flags            = flags;
+asCreateInfo.maxGeometryCount = (uint32_t)blas.asCreateGeometryInfo.size();
+asCreateInfo.pGeometryInfos   = blas.asCreateGeometryInfo.data();
 ````

 The creation information is then passed to the allocator, that will internally create an acceleration structure handle.
@ -336,88 +348,186 @@ It will also query `vk::Device::getAccelerationStructureMemoryRequirementsKHR` t
 and allocate memory accordingly.

 ```` C
-    // Create an acceleration structure identifier and allocate memory to store the
-    // resulting structure data
-    blas.as = m_alloc.createAcceleration(createinfo);
-    m_debug.setObjectName(blas.as.accel, (std::string("Blas" + std::to_string(idx)).c_str()));
+// Create an acceleration structure identifier and allocate memory to
+// store the resulting structure data
+blas.as = m_alloc.createAcceleration(asCreateInfo);
+m_debug.setObjectName(blas.as.accel, (std::string("Blas" + std::to_string(idx)).c_str()));
 ````

 The acceleration structure builder requires some scratch memory to generate the BLAS. Since we generate all the
 BLAS's in a batch, we query the scratch memory requirements for each BLAS, and find the maximum such requirement.

 ```` C
-    // Estimate the amount of scratch memory required to build the BLAS, and update the
-    // size of the scratch buffer that will be allocated to sequentially build all BLASes
-    vk::AccelerationStructureMemoryRequirementsInfoKHR memoryRequirementsInfo{
-        vk::AccelerationStructureMemoryRequirementsTypeKHR::eBuildScratch,
-        vk::AccelerationStructureBuildTypeKHR::eDevice, blas.as.accel};
-    vk::DeviceSize scratchSize =
-        m_device.getAccelerationStructureMemoryRequirementsKHR(memoryRequirementsInfo).memoryRequirements.size;
-    
-    blas.flags = flags;
-    maxScratch = std::max(maxScratch, scratchSize);
-    idx++;
-  }
+// Estimate the amount of scratch memory required to build the BLAS, and
+// update the size of the scratch buffer that will be allocated to
+// sequentially build all BLASes
+VkAccelerationStructureMemoryRequirementsInfoKHR memoryRequirementsInfo{
+    VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_KHR};
+memoryRequirementsInfo.type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BUILD_SCRATCH_KHR;
+memoryRequirementsInfo.accelerationStructure = blas.as.accel;
+memoryRequirementsInfo.buildType             = VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR;
+
+VkMemoryRequirements2 reqMem{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+vkGetAccelerationStructureMemoryRequirementsKHR(m_device, &memoryRequirementsInfo, &reqMem);
+VkDeviceSize scratchSize = reqMem.memoryRequirements.size;
+
+
+blas.flags = flags;
+maxScratch = std::max(maxScratch, scratchSize);
+
+// Original size
+memoryRequirementsInfo.type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_KHR;
+vkGetAccelerationStructureMemoryRequirementsKHR(m_device, &memoryRequirementsInfo, &reqMem);
+originalSizes[idx] = reqMem.memoryRequirements.size;
+
+idx++;
+}
 ````

 Once that maximum has been found, we allocate a scratch buffer.

 ```` C
 // Allocate the scratch buffers holding the temporary data of the acceleration structure builder
-nvvkBuffer        scratchBuffer  = m_alloc.createBuffer(maxScratch, vk::BufferUsageFlagBits::eRayTracingKHR
-                                                                | vk::BufferUsageFlagBits::eShaderDeviceAddress);
-vk::DeviceAddress scratchAddress = m_device.getBufferAddress({scratchBuffer.buffer});
+nvvkBuffer scratchBuffer =
+    m_alloc.createBuffer(maxScratch, VK_BUFFER_USAGE_RAY_TRACING_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
+VkBufferDeviceAddressInfo bufferInfo{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO};
+bufferInfo.buffer              = scratchBuffer.buffer;
+VkDeviceAddress scratchAddress = vkGetBufferDeviceAddress(m_device, &bufferInfo);
 ````

+To know the size that the BLAS is really taking, we use queries.
+
+```` C
+// Query size of compact BLAS
+VkQueryPoolCreateInfo qpci{VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO};
+qpci.queryCount = (uint32_t)m_blas.size();
+qpci.queryType  = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR;
+VkQueryPool queryPool;
+vkCreateQueryPool(m_device, &qpci, nullptr, &queryPool);
+```` 
+
 We then use a one-time command buffer to launch all the BLAS builds. Note the barrier after each
 build call: this is required as we reuse the scratch space across builds, and hence need to ensure
 the previous build has completed before starting the next.

 ```` C
-// Create a command buffer containing all the BLAS builds
-nvvkpp::SingleCommandBuffer genCmdBuf(m_device, m_queueIndex);
-vk::CommandBuffer           cmdBuf = genCmdBuf.createCommandBuffer();
+// Query size of compact BLAS
+VkQueryPoolCreateInfo qpci{VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO};
+qpci.queryCount = (uint32_t)m_blas.size();
+qpci.queryType  = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR;
+VkQueryPool queryPool;
+vkCreateQueryPool(m_device, &qpci, nullptr, &queryPool);

+
+// Create a command buffer containing all the BLAS builds
+nvvk::CommandPool genCmdBuf(m_device, m_queueIndex);
+VkCommandBuffer   cmdBuf = genCmdBuf.createCommandBuffer();
+int               ctr{0};
 for(auto& blas : m_blas)
 {
-  const vk::AccelerationStructureGeometryKHR*   pGeometry = blas.asGeometry.data();
-  vk::AccelerationStructureBuildGeometryInfoKHR bottomASInfo{vk::AccelerationStructureTypeKHR::eBottomLevel};
-  bottomASInfo.setFlags(flags);
-  bottomASInfo.setUpdate(VK_FALSE);
-  bottomASInfo.setSrcAccelerationStructure({});
-  bottomASInfo.setDstAccelerationStructure(blas.as.accel);
-  bottomASInfo.setGeometryArrayOfPointers(VK_FALSE);
-  bottomASInfo.setGeometryCount((uint32_t)blas.asGeometry.size());
-  bottomASInfo.setPpGeometries(&pGeometry);
-  bottomASInfo.setScratchData(scratchAddress);
+  const VkAccelerationStructureGeometryKHR* pGeometry = blas.asGeometry.data();
+  VkAccelerationStructureBuildGeometryInfoKHR bottomASInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR};
+  bottomASInfo.type                      = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
+  bottomASInfo.flags                     = flags;
+  bottomASInfo.update                    = VK_FALSE;
+  bottomASInfo.srcAccelerationStructure  = VK_NULL_HANDLE;
+  bottomASInfo.dstAccelerationStructure  = blas.as.accel;
+  bottomASInfo.geometryArrayOfPointers   = VK_FALSE;
+  bottomASInfo.geometryCount             = (uint32_t)blas.asGeometry.size();
+  bottomASInfo.ppGeometries              = &pGeometry;
+  bottomASInfo.scratchData.deviceAddress = scratchAddress;

  // Pointers of offset
-  std::vector<const vk::AccelerationStructureBuildOffsetInfoKHR*> pBuildOffset(blas.asBuildOffsetInfo.size());
+  std::vector<const VkAccelerationStructureBuildOffsetInfoKHR*> pBuildOffset(blas.asBuildOffsetInfo.size());
  for(size_t i = 0; i < blas.asBuildOffsetInfo.size(); i++)
    pBuildOffset[i] = &blas.asBuildOffsetInfo[i];

  // Building the AS
-  cmdBuf.buildAccelerationStructureKHR(bottomASInfo, pBuildOffset);
+  vkCmdBuildAccelerationStructureKHR(cmdBuf, 1, &bottomASInfo, pBuildOffset.data());

  // Since the scratch buffer is reused across builds, we need a barrier to ensure one build
  // is finished before starting the next one
-  vk::MemoryBarrier barrier(vk::AccessFlagBits::eAccelerationStructureWriteKHR, vk::AccessFlagBits::eAccelerationStructureWriteKHR);
-  cmdBuf.pipelineBarrier(vk::PipelineStageFlagBits::eAccelerationStructureBuildKHR,
-                         vk::PipelineStageFlagBits::eAccelerationStructureBuildKHR, {}, {barrier}, {}, {});
+  VkMemoryBarrier barrier{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
+  barrier.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
+  barrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
+  vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
+                       VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, 0, 1, &barrier, 0, nullptr, 0, nullptr);
+
+  // Query the compact size
+  if(doCompaction)
+  {
+    vkCmdWriteAccelerationStructuresPropertiesKHR(cmdBuf, 1, &blas.as.accel,
+                                                  VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR, queryPool, ctr++);
+  }
+}
+genCmdBuf.submitAndWait(cmdBuf);
+````
+
+While this approach has the advantage of keeping all BLAS's independent, building many BLAS's efficiently would
+require allocating a larger scratch buffer, and launch several builds simultaneously. This tutorial also
+does not use compaction, which could reduce significantly the memory footprint of the acceleration structures. Both
+of those aspects will be part of a future advanced tutorial.
+We finally execute the command buffer and clean up the allocator's scratch memory and staging buffer:
+
+This part, which is optional, will compact the BLAS in the memory that it is really using. It needs to wait that all BLASes
+are constructred, to make a copy in the more fitted memory space.
+
+```` C
+
+// Compacting all BLAS
+if(doCompaction)
+{
+  cmdBuf = genCmdBuf.createCommandBuffer();
+
+  // Get the size result back
+  std::vector<VkDeviceSize> compactSizes(m_blas.size());
+  vkGetQueryPoolResults(m_device, queryPool, 0, (uint32_t)compactSizes.size(), compactSizes.size() * sizeof(VkDeviceSize),
+                        compactSizes.data(), sizeof(VkDeviceSize), VK_QUERY_RESULT_WAIT_BIT);
+
+
+  // Compacting
+  std::vector<nvvkAccel> cleanupAS(m_blas.size());
+  uint32_t               totOriginalSize{0}, totCompactSize{0};
+  for(int i = 0; i < m_blas.size(); i++)
+  {
+    LOGI("Reducing %i, from %d to %d \n", i, originalSizes[i], compactSizes[i]);
+    totOriginalSize += (uint32_t)originalSizes[i];
+    totCompactSize += (uint32_t)compactSizes[i];
+
+    // Creating a compact version of the AS
+    VkAccelerationStructureCreateInfoKHR asCreateInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR};
+    asCreateInfo.compactedSize = compactSizes[i];
+    asCreateInfo.type          = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
+    asCreateInfo.flags         = flags;
+    auto as                    = m_alloc.createAcceleration(asCreateInfo);
+
+    // Copy the original BLAS to a compact version
+    VkCopyAccelerationStructureInfoKHR copyInfo{VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR};
+    copyInfo.src  = m_blas[i].as.accel;
+    copyInfo.dst  = as.accel;
+    copyInfo.mode = VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR;
+    vkCmdCopyAccelerationStructureKHR(cmdBuf, &copyInfo);
+    cleanupAS[i] = m_blas[i].as;
+    m_blas[i].as = as;
+  }
+  genCmdBuf.submitAndWait(cmdBuf);
+
+  // Destroying the previous version
+  for(auto as : cleanupAS)
+    m_alloc.destroy(as);
+
+  LOGI("------------------\n");
+  LOGI("Total: %d -> %d = %d (%2.2f%s smaller) \n", totOriginalSize, totCompactSize,
+       totOriginalSize - totCompactSize, (totOriginalSize - totCompactSize) / float(totOriginalSize) * 100.f, "%%");
 }
 ````

- While this approach has the advantage of keeping all BLAS's independent, building many BLAS's efficiently would
- require allocating a larger scratch buffer, and launch several builds simultaneously. This tutorial also
- does not use compaction, which could reduce significantly the memory footprint of the acceleration structures. Both
- of those aspects will be part of a future advanced tutorial.
-
-We finally execute the command buffer and clean up the allocator's scratch memory and staging buffer:
+Finally, destroying what was allocated.

 ```` C
-  genCmdBuf.flushCommandBuffer(cmdBuf);
+  vkDestroyQueryPool(m_device, queryPool, nullptr);
  m_alloc.destroy(scratchBuffer);
-  m_alloc.flushStaging();
+  m_alloc.finalizeAndReleaseStaging();
 }
 ````

@ -430,7 +540,7 @@ to the `HelloVulkan` class:
 void createTopLevelAS();
 ````

-An instance is represented by a `nvvkpp::RaytracingBuilder::Instance`, which stores its transform matrix (`transform`)
+An instance is represented by a `nvvk::RaytracingBuilder::Instance`, which stores its transform matrix (`transform`)
 and the identifier of its corresponding BLAS (`blasId`). It also contains an instance identifier that will be available
 during shading as `gl_InstanceCustomIndex`, as well as the index of the hit group that represents the shaders that will be
 invoked upon hitting the object (`hitGroupId`).
@ -447,16 +557,16 @@ optimized for tracing performance (rather than AS size, for example).
 ```` C
 void HelloVulkan::createTopLevelAS()
 {
-  std::vector<nvvkpp::RaytracingBuilderKHR::Instance> tlas;
+  std::vector<nvvk::RaytracingBuilderKHR::Instance> tlas;
  tlas.reserve(m_objInstance.size());
  for(int i = 0; i < static_cast<int>(m_objInstance.size()); i++)
  {
-    nvvkpp::RaytracingBuilderKHR::Instance rayInst;
+    nvvk::RaytracingBuilderKHR::Instance rayInst;
    rayInst.transform  = m_objInstance[i].transform;  // Position of the instance
    rayInst.instanceId = i;                           // gl_InstanceID
    rayInst.blasId     = m_objInstance[i].objIndex;
    rayInst.hitGroupId = 0;  // We will use the same hit group for all objects
-    rayInst.flags      = vk::GeometryInstanceFlagBitsKHR::eTriangleCullDisable;
+    rayInst.flags      = VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR;
    tlas.emplace_back(rayInst);
  }
  m_rtBuilder.buildTlas(tlas, vk::BuildAccelerationStructureFlagBitsKHR::ePreferFastTrace);
@ -480,21 +590,21 @@ the number of instances it will hold, and flags indicating preferences for the b
 builds or better performance.

 ```` C
-void buildTlas(const std::vector<Instance>&          instances,
-                 vk::BuildAccelerationStructureFlagsKHR flags =
-                     vk::BuildAccelerationStructureFlagBitsKHR::ePreferFastTrace)
-{
-  m_tlas.flags = flags;
-
+  void buildTlas(const std::vector<Instance>&         instances,
+                 VkBuildAccelerationStructureFlagsKHR flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR)
  {
-    vk::AccelerationStructureCreateGeometryTypeInfoKHR geometryCreate{vk::GeometryTypeKHR::eInstances};
-    geometryCreate.setMaxPrimitiveCount(static_cast<uint32_t>(instances.size()));
-    geometryCreate.setAllowsTransforms(VK_TRUE);
-  
-    vk::AccelerationStructureCreateInfoKHR asCreateInfo{{}, vk::AccelerationStructureTypeKHR::eTopLevel};
-    asCreateInfo.setFlags(flags);
-    asCreateInfo.setMaxGeometryCount(1);
-    asCreateInfo.setPGeometryInfos(&geometryCreate);
+    m_tlas.flags = flags;
+
+    VkAccelerationStructureCreateGeometryTypeInfoKHR geometryCreate{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_GEOMETRY_TYPE_INFO_KHR};
+    geometryCreate.geometryType      = VK_GEOMETRY_TYPE_INSTANCES_KHR;
+    geometryCreate.maxPrimitiveCount = (static_cast<uint32_t>(instances.size()));
+    geometryCreate.allowsTransforms  = (VK_TRUE);
+
+    VkAccelerationStructureCreateInfoKHR asCreateInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR};
+    asCreateInfo.type             = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
+    asCreateInfo.flags            = flags;
+    asCreateInfo.maxGeometryCount = 1;
+    asCreateInfo.pGeometryInfos   = &geometryCreate;
 ````

 We then call the allocator, which will create an acceleration structure handle for the TLAS. It will also query the
@ -502,10 +612,10 @@ resulting size of the TLAS using `vk::Device::getAccelerationStructureMemoryRequ
 amount of memory:

 ```` C
-      // Create the acceleration structure object and allocate the memory required to hold the TLAS data
-      m_tlas.as = m_alloc.createAcceleration(asCreateInfo);
-      m_debug.setObjectName(m_tlas.as.accel, "Tlas");
-    }
+    // Create the acceleration structure object and allocate the memory
+    // required to hold the TLAS data
+    m_tlas.as = m_alloc.createAcceleration(asCreateInfo);
+    m_debug.setObjectName(m_tlas.as.accel, "Tlas");
 ````

 As with the BLAS, we also query the amount of scratch memory required by the builder to generate the TLAS,
@ -514,18 +624,23 @@ one buffer and thus saved an allocation. However, for the purpose of this tutori
 independent.

 ```` C
-  // Compute the amount of scratch memory required by the acceleration structure builder
-  vk::AccelerationStructureMemoryRequirementsInfoKHR memoryRequirementsInfo{
-      vk::AccelerationStructureMemoryRequirementsTypeKHR::eBuildScratch,
-      vk::AccelerationStructureBuildTypeKHR::eDevice, m_tlas.as.accel};
-  vk::DeviceSize scratchSize =
-      m_device.getAccelerationStructureMemoryRequirementsKHR(memoryRequirementsInfo).memoryRequirements.size;
-  
-  // Allocate the scratch memory
-  nvvkBuffer        scratchBuffer  = m_alloc.createBuffer(scratchSize, vk::BufferUsageFlagBits::eRayTracingKHR
-                                                                   | vk::BufferUsageFlagBits::eShaderDeviceAddress);
-  vk::DeviceAddress scratchAddress = m_device.getBufferAddress({scratchBuffer.buffer});
-  
+    // Compute the amount of scratch memory required by the acceleration structure builder
+    VkAccelerationStructureMemoryRequirementsInfoKHR memoryRequirementsInfo{
+        VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_KHR};
+    memoryRequirementsInfo.type                  = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BUILD_SCRATCH_KHR;
+    memoryRequirementsInfo.accelerationStructure = m_tlas.as.accel;
+    memoryRequirementsInfo.buildType             = VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR;
+
+    VkMemoryRequirements2 reqMem{VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2};
+    vkGetAccelerationStructureMemoryRequirementsKHR(m_device, &memoryRequirementsInfo, &reqMem);
+    VkDeviceSize scratchSize = reqMem.memoryRequirements.size;
+
+    // Allocate the scratch memory
+    nvvkBuffer scratchBuffer =
+        m_alloc.createBuffer(scratchSize, VK_BUFFER_USAGE_RAY_TRACING_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
+    VkBufferDeviceAddressInfo bufferInfo{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO};
+    bufferInfo.buffer              = scratchBuffer.buffer;
+    VkDeviceAddress scratchAddress = vkGetBufferDeviceAddress(m_device, &bufferInfo);
 ````

 An `Instance` object is nearly identical to a `VkGeometryInstanceKHR` object: the only difference is the transform
@ -535,13 +650,13 @@ application side allows us to use the more intuitive $4\times4$ matrices, making
 TLAS we then convert all the `Instance` objects to `VkGeometryInstanceKHR`:

 ```` C
-  // For each instance, build the corresponding instance descriptor
-  std::vector<vk::AccelerationStructureInstanceKHR> geometryInstances;
-  geometryInstances.reserve(instances.size());
-  for(const auto& inst : instances)
-  {
-    geometryInstances.push_back(instanceToVkGeometryInstanceKHR(inst));
-  }
+    // For each instance, build the corresponding instance descriptor
+    std::vector<VkAccelerationStructureInstanceKHR> geometryInstances;
+    geometryInstances.reserve(instances.size());
+    for(const auto& inst : instances)
+    {
+      geometryInstances.push_back(instanceToVkGeometryInstanceKHR(inst));
+    }
 ````

 We then upload the instance descriptions to the device using a one-time command buffer. This command buffer will also be
@ -549,54 +664,65 @@ used to generate the TLAS itself, and so we add a barrier after the copy to ensu
 TLAS build.

 ```` C
-// Building the TLAS
-nvvkpp::SingleCommandBuffer genCmdBuf(m_device, m_queueIndex);
-vk::CommandBuffer           cmdBuf = genCmdBuf.createCommandBuffer();
+    // Building the TLAS
+    nvvk::CommandPool genCmdBuf(m_device, m_queueIndex);
+    VkCommandBuffer   cmdBuf = genCmdBuf.createCommandBuffer();

-// Create a buffer holding the actual instance data for use by the AS builder
-VkDeviceSize instanceDescsSizeInBytes = instances.size() * sizeof(vk::AccelerationStructureInstanceKHR);
+    // Create a buffer holding the actual instance data for use by the AS
+    // builder
+    VkDeviceSize instanceDescsSizeInBytes = instances.size() * sizeof(VkAccelerationStructureInstanceKHR);

-// Allocate the instance buffer and copy its contents from host to device memory
-m_instBuffer = m_alloc.createBuffer(cmdBuf, geometryInstances,
-                                    vk::BufferUsageFlagBits::eRayTracingKHR | vk::BufferUsageFlagBits::eShaderDeviceAddress);
-m_debug.setObjectName(m_instBuffer.buffer, "TLASInstances");
-vk::DeviceAddress instanceAddress = m_device.getBufferAddress(m_instBuffer.buffer);
+    // Allocate the instance buffer and copy its contents from host to device
+    // memory
+    m_instBuffer = m_alloc.createBuffer(cmdBuf, geometryInstances,
+                                        VK_BUFFER_USAGE_RAY_TRACING_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
+    m_debug.setObjectName(m_instBuffer.buffer, "TLASInstances");
+    //VkBufferDeviceAddressInfo bufferInfo{VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO};
+    bufferInfo.buffer               = m_instBuffer.buffer;
+    VkDeviceAddress instanceAddress = vkGetBufferDeviceAddress(m_device, &bufferInfo);

-// Make sure the copy of the instance buffer are copied before triggering the
-// acceleration structure build
-vk::MemoryBarrier barrier(vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eAccelerationStructureWriteKHR);
-cmdBuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAccelerationStructureBuildKHR,
-                       vk::DependencyFlags(), {barrier}, {}, {});
+    // Make sure the copy of the instance buffer are copied before triggering the
+    // acceleration structure build
+    VkMemoryBarrier barrier{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
+    barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+    barrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
+    vkCmdPipelineBarrier(cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
+                         0, 1, &barrier, 0, nullptr, 0, nullptr);
 ````

 The build is then triggered, and we execute the command buffer before destroying the temporary buffers.

 ```` C
    // Build the TLAS
-    vk::AccelerationStructureGeometryKHR topASGeometry{vk::GeometryTypeKHR::eInstances};
-    topASGeometry.geometry.instances.setArrayOfPointers(VK_FALSE);
-    topASGeometry.geometry.instances.setData(instanceAddress);
+    VkAccelerationStructureGeometryDataKHR geometry{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR};
+    geometry.instances.arrayOfPointers    = VK_FALSE;
+    geometry.instances.data.deviceAddress = instanceAddress;
+    VkAccelerationStructureGeometryKHR topASGeometry{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR};
+    topASGeometry.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR;
+    topASGeometry.geometry     = geometry;

-    const vk::AccelerationStructureGeometryKHR*   pGeometry = &topASGeometry;
-    vk::AccelerationStructureBuildGeometryInfoKHR topASInfo;
-    topASInfo.setFlags(flags);
-    topASInfo.setUpdate(VK_FALSE);
-    topASInfo.setSrcAccelerationStructure({});
-    topASInfo.setDstAccelerationStructure(m_tlas.as.accel);
-    topASInfo.setGeometryArrayOfPointers(VK_FALSE);
-    topASInfo.setGeometryCount(1);
-    topASInfo.setPpGeometries(&pGeometry);
-    topASInfo.setScratchData(scratchAddress);
+
+    const VkAccelerationStructureGeometryKHR* pGeometry = &topASGeometry;
+    VkAccelerationStructureBuildGeometryInfoKHR topASInfo{VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR};
+    topASInfo.flags                     = flags;
+    topASInfo.update                    = VK_FALSE;
+    topASInfo.srcAccelerationStructure  = VK_NULL_HANDLE;
+    topASInfo.dstAccelerationStructure  = m_tlas.as.accel;
+    topASInfo.geometryArrayOfPointers   = VK_FALSE;
+    topASInfo.geometryCount             = 1;
+    topASInfo.ppGeometries              = &pGeometry;
+    topASInfo.scratchData.deviceAddress = scratchAddress;

    // Build Offsets info: n instances
-    vk::AccelerationStructureBuildOffsetInfoKHR buildOffsetInfo{static_cast<uint32_t>(instances.size()), 0, 0, 0};
-    const vk::AccelerationStructureBuildOffsetInfoKHR* pBuildOffsetInfo = &buildOffsetInfo;
+    VkAccelerationStructureBuildOffsetInfoKHR        buildOffsetInfo{static_cast<uint32_t>(instances.size()), 0, 0, 0};
+    const VkAccelerationStructureBuildOffsetInfoKHR* pBuildOffsetInfo = &buildOffsetInfo;

    // Build the TLAS
-    cmdBuf.buildAccelerationStructureKHR(1, &topASInfo, &pBuildOffsetInfo);
+    vkCmdBuildAccelerationStructureKHR(cmdBuf, 1, &topASInfo, &pBuildOffsetInfo);

-    genCmdBuf.flushCommandBuffer(cmdBuf);
-    m_alloc.flushStaging();
+
+    genCmdBuf.submitAndWait(cmdBuf);
+    m_alloc.finalizeAndReleaseStaging();
    m_alloc.destroy(scratchBuffer);
  }
 ````
@ -633,7 +759,7 @@ In the header, we declare the objects related to this additional descriptor set:
 ```` C
  void           createRtDescriptorSet();

-  std::vector<vk::DescriptorSetLayoutBinding>        m_rtDescSetLayoutBind;
+  nvvk::DescriptorSetBindings                        m_rtDescSetLayoutBind;
  vk::DescriptorPool                                 m_rtDescPool;
  vk::DescriptorSetLayout                            m_rtDescSetLayout;
  vk::DescriptorSet                                  m_rtDescSet;
@ -654,25 +780,25 @@ void HelloVulkan::createRtDescriptorSet()
  using vkSS   = vk::ShaderStageFlagBits;
  using vkDSLB = vk::DescriptorSetLayoutBinding;

-  m_rtDescSetLayoutBind.emplace_back(
-      vkDSLB(0, vkDT::eAccelerationStructureKHR, 1, vkSS::eRaygenKHR ));  // TLAS
-  m_rtDescSetLayoutBind.emplace_back(
+  m_rtDescSetLayoutBind.addBinding(vkDSLB(0, vkDT::eAccelerationStructureKHR, 1,
+                                          vkSS::eRaygenKHR | vkSS::eClosestHitKHR));  // TLAS
+  m_rtDescSetLayoutBind.addBinding(
      vkDSLB(1, vkDT::eStorageImage, 1, vkSS::eRaygenKHR));  // Output image

-  m_rtDescPool      = nvvkpp::util::createDescriptorPool(m_device, m_rtDescSetLayoutBind);
-  m_rtDescSetLayout = nvvkpp::util::createDescriptorSetLayout(m_device, m_rtDescSetLayoutBind);
+  m_rtDescPool      = m_rtDescSetLayoutBind.createPool(m_device);
+  m_rtDescSetLayout = m_rtDescSetLayoutBind.createLayout(m_device);
  m_rtDescSet       = m_device.allocateDescriptorSets({m_rtDescPool, 1, &m_rtDescSetLayout})[0];

+  vk::AccelerationStructureKHR                    tlas = m_rtBuilder.getAccelerationStructure();
  vk::WriteDescriptorSetAccelerationStructureKHR descASInfo;
  descASInfo.setAccelerationStructureCount(1);
-  descASInfo.setPAccelerationStructures(&m_rtBuilder.getAccelerationStructure());
+  descASInfo.setPAccelerationStructures(&tlas);
  vk::DescriptorImageInfo imageInfo{
      {}, m_offscreenColor.descriptor.imageView, vk::ImageLayout::eGeneral};

  std::vector<vk::WriteDescriptorSet> writes;
-  writes.emplace_back(
-      nvvkpp::util::createWrite(m_rtDescSet, m_rtDescSetLayoutBind[0], &descASInfo));
-  writes.emplace_back(nvvkpp::util::createWrite(m_rtDescSet, m_rtDescSetLayoutBind[1], &imageInfo));
+  writes.emplace_back(m_rtDescSetLayoutBind.makeWrite(m_rtDescSet, 0, descASInfo));
+  writes.emplace_back(m_rtDescSetLayoutBind.makeWrite(m_rtDescSet, 1, imageInfo));
  m_device.updateDescriptorSets(static_cast<uint32_t>(writes.size()), writes.data(), 0, nullptr);
 }
 ````
@ -687,25 +813,25 @@ descriptor set as they semantically fit the Scene descriptor set.

 ```` C
  // Camera matrices (binding = 0)
-  m_descSetLayoutBind.emplace_back(
+  m_descSetLayoutBind.addBinding(
      vkDS(0, vkDT::eUniformBuffer, 1, vkSS::eVertex | vkSS::eRaygenKHR));
  // Materials (binding = 1)
-  m_descSetLayoutBind.emplace_back(
+  m_descSetLayoutBind.addBinding(
      vkDS(1, vkDT::eStorageBuffer, nbObj, vkSS::eVertex | vkSS::eFragment | vkSS::eClosestHitKHR));
  // Scene description (binding = 2)
-  m_descSetLayoutBind.emplace_back(  //
+  m_descSetLayoutBind.addBinding(  //
      vkDS(2, vkDT::eStorageBuffer, 1, vkSS::eVertex | vkSS::eFragment | vkSS::eClosestHitKHR));
  // Textures (binding = 3)
-  m_descSetLayoutBind.emplace_back(
+  m_descSetLayoutBind.addBinding(
      vkDS(3, vkDT::eCombinedImageSampler, nbTxt, vkSS::eFragment | vkSS::eClosestHitKHR));
  // Materials (binding = 4)
-  m_descSetLayoutBind.emplace_back(
+  m_descSetLayoutBind.addBinding(
      vkDS(4, vkDT::eStorageBuffer, nbObj, vkSS::eFragment | vkSS::eClosestHitKHR));
  // Storing vertices (binding = 5)
-  m_descSetLayoutBind.emplace_back(  //
+  m_descSetLayoutBind.addBinding(  //
      vkDS(5, vkDT::eStorageBuffer, nbObj, vkSS::eClosestHitKHR));
  // Storing indices (binding = 6)
-  m_descSetLayoutBind.emplace_back(  //
+  m_descSetLayoutBind.addBinding(  //
      vkDS(6, vkDT::eStorageBuffer, nbObj, vkSS::eClosestHitKHR));
 ````

@ -724,11 +850,10 @@ We set the actual contents of the descriptor set by adding those buffers in `upd
    dbiVert.push_back({m_objModel[i].vertexBuffer.buffer, 0, VK_WHOLE_SIZE});
    dbiIdx.push_back({m_objModel[i].indexBuffer.buffer, 0, VK_WHOLE_SIZE});
  }
-  writes.emplace_back(nvvkpp::util::createWrite(m_descSet, m_descSetLayoutBind[1], dbiMat.data()));
-  writes.emplace_back(
-      nvvkpp::util::createWrite(m_descSet, m_descSetLayoutBind[4], dbiMatIdx.data()));
-  writes.emplace_back(nvvkpp::util::createWrite(m_descSet, m_descSetLayoutBind[5], dbiVert.data()));
-  writes.emplace_back(nvvkpp::util::createWrite(m_descSet, m_descSetLayoutBind[6], dbiIdx.data()));
+  writes.emplace_back(m_descSetLayoutBind.makeWriteArray(m_descSet, 1, dbiMat.data()));
+  writes.emplace_back(m_descSetLayoutBind.makeWriteArray(m_descSet, 4, dbiMatIdx.data()));
+  writes.emplace_back(m_descSetLayoutBind.makeWriteArray(m_descSet, 5, dbiVert.data()));
+  writes.emplace_back(m_descSetLayoutBind.makeWriteArray(m_descSet, 6, dbiIdx.data()));
 ````

 Originally the buffers containing the vertices and indices were only used by the rasterization pipeline. The ray tracing
@ -922,11 +1047,11 @@ void HelloVulkan::createRtPipeline()
  std::vector<std::string> paths = defaultSearchPaths;

  vk::ShaderModule raygenSM =
-      nvvkpp::util::createShaderModule(m_device,  //
-                                       nvh::loadFile("shaders/raytrace.rgen.spv", true, paths));
+    nvvk::createShaderModule(m_device,  //
+                             nvh::loadFile("shaders/raytrace.rgen.spv", true, paths));
  vk::ShaderModule missSM =
-      nvvkpp::util::createShaderModule(m_device,  //
-                                       nvh::loadFile("shaders/raytrace.rmiss.spv", true, paths));
+    nvvk::createShaderModule(m_device,  //
+                             nvh::loadFile("shaders/raytrace.rmiss.spv", true, paths));

  std::vector<vk::PipelineShaderStageCreateInfo> stages;

@ -960,8 +1085,8 @@ shaders.
 ```` C
  // Hit Group - Closest Hit + AnyHit
  vk::ShaderModule chitSM =
-      nvvkpp::util::createShaderModule(m_device,  //
-                                       nvh::loadFile("shaders/raytrace.rchit.spv", true, paths));
+      nvvk::createShaderModule(m_device,  //
+                               nvh::loadFile("shaders/raytrace.rchit.spv", true, paths));

  vk::RayTracingShaderGroupCreateInfoKHR hg{vk::RayTracingShaderGroupTypeKHR::eTrianglesHitGroup,
                                           VK_SHADER_UNUSED_KHR, VK_SHADER_UNUSED_KHR,
@ -1137,19 +1262,19 @@ copy the handles into the SBT:
 ```` C
  std::vector<uint8_t> shaderHandleStorage(sbtSize);
  m_device.getRayTracingShaderGroupHandlesKHR(m_rtPipeline, 0, groupCount, sbtSize,
-                                             shaderHandleStorage.data());
+                                              shaderHandleStorage.data());
  // Write the handles in the SBT
-  nvvkpp::SingleCommandBuffer genCmdBuf(m_device, m_graphicsQueueIndex);
-  vk::CommandBuffer           cmdBuf = genCmdBuf.createCommandBuffer();
+  nvvk::CommandPool genCmdBuf(m_device, m_graphicsQueueIndex);
+  vk::CommandBuffer cmdBuf = genCmdBuf.createCommandBuffer();

  m_rtSBTBuffer =
      m_alloc.createBuffer(cmdBuf, shaderHandleStorage, vk::BufferUsageFlagBits::eRayTracingKHR);
  m_debug.setObjectName(m_rtSBTBuffer.buffer, "SBT");


-  genCmdBuf.flushCommandBuffer(cmdBuf);
+  genCmdBuf.submitAndWait(cmdBuf);

-  m_alloc.flushStaging();
+  m_alloc.finalizeAndReleaseStaging();
 }
 ````

@ -1696,8 +1821,9 @@ In the body of `createRtPipeline`, we need to define the new miss shader right a
 ```` C
  // The second miss shader is invoked when a shadow ray misses the geometry. It
  // simply indicates that no occlusion has been found
-  vk::ShaderModule shadowmissSM = nvvkpp::util::createShaderModule(
-      m_device, nvh::loadFile("shaders/raytraceShadow.rmiss.spv", true, paths));
+  vk::ShaderModule shadowmissSM =
+      nvvk::createShaderModule(m_device,
+                               nvh::loadFile("shaders/raytraceShadow.rmiss.spv", true, paths));

 ````