cleanup and refactoring

2024-05-25 11:53:25 +02:00 · 2024-05-25 11:53:25 +02:00 · 76f6bf62a4
commit 76f6bf62a4
parent 2302158928
1285 changed files with 757994 additions and 8 deletions
--- a/raytracer/nvpro_core/nvvk/compute_vk.hpp
+++ b/raytracer/nvpro_core/nvvk/compute_vk.hpp
@ -0,0 +1,400 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+#pragma once
+
+#include <unordered_map>
+#include <memory>
+#include "vulkan/vulkan_core.h"
+#include "descriptorsets_vk.hpp"
+
+#define NVVK_COMPUTE_DEFAULT_BLOCK_SIZE 256
+
+namespace nvvk {
+
+//////////////////////////////////////////////////////////////////////////
+/** @DOC_START
+  # class nvvk::PushComputeDispatcher
+
+  nvvk::PushComputeDispatcher is a convenience structure for easily creating
+  compute-only pipelines by defining the bindings and providing SPV code.
+  The descriptor set updates are carried out using the KHR_push_descriptor 
+  extension.
+
+
+  Example:
+
+  ```cpp
+  
+  enum BindingLocation
+  {
+    eMyBindingLocation = 0
+  };
+
+  struct PushConstant{
+   ...
+  }
+  pushConstant;
+
+  nvvk::PushComputeDispatcher<PushConstant, BindingLocation> myCompute;
+  VkBuffer myFirstBuffer = createMyFirstBuffer(...);
+  VkBuffer mySecondBuffer = createMySecondBuffer(...);
+  VkDevice device = getMyVkDevice(...);
+  const uint8_t* spvCode = getMyComputeShaderCode(...);
+  size_t spvCodeSize = getMyComputeShaderCodeSize(...);
+  myCompute.addBufferBinding(BindingLocation::eMyBindingLocation, myFirstBuffer);
+  myCompute.setCode(device, spvCode, spvCodeSize);
+  myCompute.finalizePipeline(device);
+
+  ...
+  VkCommandBuffer cmd = getMyCommandBuffer(...);
+  myCompute.dispatch(cmd, targetThreadCount, &pushConstant);
+  ...
+  myCompute.updateBufferBinding(BindingLocation::eMyBindingLocation, mySecondBuffer)
+  myCompute.dispatch(cmd, targetThreadCount, &pushConstant);
+  ...
+  ```
+@DOC_END */
+
+
+/// Barrier types usable before and after the shader dispatch
+/// Those barriers apply to SHADER_READ, SHADER_WRITE and TRANSFER if needed
+enum DispatcherBarrier
+{
+  eNone       = 0,
+  eCompute    = 1,
+  eTransfer   = 2,
+  eGraphics   = 4,
+  eRaytracing = 8
+};
+
+template <typename TPushConstants, typename TBindingEnum, uint32_t pipelineCount = 1u>
+struct PushComputeDispatcher
+{
+  VkPipelineLayout                      layout{};
+  std::array<VkPipeline, pipelineCount> pipelines{};
+  VkDescriptorSetLayout                 dsetLayout{};
+  nvvk::DescriptorSetBindings           bindings;
+
+  std::unordered_map<TBindingEnum, std::unique_ptr<VkDescriptorBufferInfo>>                       bufferInfos;
+  std::unordered_map<TBindingEnum, std::unique_ptr<VkWriteDescriptorSetAccelerationStructureKHR>> accelInfos;
+  std::unordered_map<TBindingEnum, std::unique_ptr<VkAccelerationStructureKHR>>                   accel;
+  std::unordered_map<TBindingEnum, std::unique_ptr<VkDescriptorImageInfo>>                        sampledImageInfos;
+
+  TPushConstants pushConstants{};
+
+  struct ShaderModule
+  {
+    VkShaderModule module{VK_NULL_HANDLE};
+    bool           isLocal{false};
+  };
+
+  std::vector<VkWriteDescriptorSet>       writes;
+  std::array<ShaderModule, pipelineCount> shaderModules;
+
+  bool addBufferBinding(TBindingEnum index)
+  {
+    if(bufferInfos.find(index) == bufferInfos.end())
+    {
+      bindings.addBinding(VkDescriptorSetLayoutBinding{uint32_t(index), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT});
+
+      bufferInfos[index] = std::make_unique<VkDescriptorBufferInfo>();
+      auto* info         = bufferInfos[index].get();
+      *(info)            = {VK_NULL_HANDLE, 0, VK_WHOLE_SIZE};
+      writes.emplace_back(bindings.makeWrite(0, index, info));
+      return true;
+    }
+    return false;
+  }
+
+  bool addAccelerationStructureBinding(TBindingEnum index)
+  {
+    if(accelInfos.find(index) == accelInfos.end())
+    {
+      bindings.addBinding(VkDescriptorSetLayoutBinding{uint32_t(index), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR,
+                                                       1, VK_SHADER_STAGE_COMPUTE_BIT});
+
+      accelInfos[index] = std::make_unique<VkWriteDescriptorSetAccelerationStructureKHR>();
+      auto* info        = accelInfos[index].get();
+
+      accel[index]                     = std::make_unique<VkAccelerationStructureKHR>();
+      auto* acc                        = accel[index].get();
+      info->sType                      = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR;
+      info->pNext                      = nullptr;
+      info->accelerationStructureCount = 1;
+      info->pAccelerationStructures    = acc;
+
+      writes.emplace_back(bindings.makeWrite(0, index, info));
+      return true;
+    }
+    return false;
+  }
+  bool addSampledImageBinding(TBindingEnum index)
+  {
+    if(sampledImageInfos.find(index) == sampledImageInfos.end())
+    {
+      bindings.addBinding(VkDescriptorSetLayoutBinding{uint32_t(index), VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1,
+                                                       VK_SHADER_STAGE_COMPUTE_BIT});
+      sampledImageInfos[index] = std::make_unique<VkDescriptorImageInfo>();
+      auto* info               = sampledImageInfos[index].get();
+      writes.emplace_back(bindings.makeWrite(0, index, info));
+      return true;
+    }
+    return false;
+  }
+
+  bool updateBufferBinding(TBindingEnum index, VkBuffer buffer)
+  {
+    auto it = bufferInfos.find(index);
+    if(it != bufferInfos.end())
+    {
+      it->second->buffer = buffer;
+      return true;
+    }
+    return false;
+  }
+  bool updateAccelerationStructureBinding(TBindingEnum index, VkAccelerationStructureKHR acc)
+  {
+    auto it = accel.find(index);
+    if(it != accel.end())
+    {
+      *(it->second.get()) = acc;
+      return true;
+    }
+    return false;
+  }
+
+  bool updateSampledImageBinding(TBindingEnum  index,
+                                 VkSampler     sampler = VK_NULL_HANDLE,
+                                 VkImageView   view    = VK_NULL_HANDLE,
+                                 VkImageLayout layout  = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)
+  {
+    auto it = sampledImageInfos.find(index);
+    if(it != sampledImageInfos.end())
+    {
+      it->second->sampler     = sampler;
+      it->second->imageView   = view;
+      it->second->imageLayout = layout;
+      return true;
+    }
+    return false;
+  }
+
+
+  bool setCode(VkDevice device, void* shaderCode, size_t codeSize, uint32_t pipelineIndex = 0u)
+  {
+    VkShaderModuleCreateInfo moduleCreateInfo{VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
+    moduleCreateInfo.codeSize = codeSize;
+    moduleCreateInfo.pCode    = reinterpret_cast<uint32_t*>(shaderCode);
+
+    VkResult r = vkCreateShaderModule(device, &moduleCreateInfo, nullptr, &(shaderModules[pipelineIndex].module));
+    if(r != VK_SUCCESS || shaderModules[pipelineIndex].module == VK_NULL_HANDLE)
+    {
+      return false;
+    }
+    shaderModules[pipelineIndex].isLocal = true;
+    return true;
+  }
+
+  bool setCode(VkShaderModule m, uint32_t pipelineIndex = 0u)
+  {
+    shaderModules[pipelineIndex].module  = m;
+    shaderModules[pipelineIndex].isLocal = false;
+    return m != VK_NULL_HANDLE;
+  }
+
+  bool finalizePipeline(VkDevice device)
+  {
+
+    dsetLayout = bindings.createLayout(device, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
+
+    VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
+    pipelineLayoutCreateInfo.pSetLayouts    = &dsetLayout;
+    pipelineLayoutCreateInfo.setLayoutCount = 1;
+
+    VkPushConstantRange pushConstantRange{VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(TPushConstants)};
+    pipelineLayoutCreateInfo.pushConstantRangeCount = 1;
+    pipelineLayoutCreateInfo.pPushConstantRanges    = &pushConstantRange;
+
+    VkResult r = vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, nullptr, &layout);
+
+    if(r != VK_SUCCESS || layout == VK_NULL_HANDLE)
+    {
+      return false;
+    }
+    VkPipelineShaderStageCreateInfo stageCreateInfo = {VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO};
+    stageCreateInfo.stage                           = VK_SHADER_STAGE_COMPUTE_BIT;
+    stageCreateInfo.pName                           = "main";
+
+
+    for(uint32_t i = 0; i < pipelineCount; i++)
+    {
+      stageCreateInfo.module = shaderModules[i].module;
+
+      VkComputePipelineCreateInfo createInfo{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
+      createInfo.stage  = stageCreateInfo;
+      createInfo.layout = layout;
+      r                 = vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &createInfo, nullptr, &pipelines[i]);
+      if(r != VK_SUCCESS || pipelines[i] == VK_NULL_HANDLE)
+      {
+        return false;
+      }
+      if(shaderModules[i].isLocal)
+      {
+        vkDestroyShaderModule(device, shaderModules[i].module, nullptr);
+      }
+    }
+    return true;
+  }
+
+
+  uint32_t getBlockCount(uint32_t targetThreadCount, uint32_t blockSize)
+  {
+    return (targetThreadCount + blockSize - 1) / blockSize;
+  }
+
+
+  // Bind the pipeline resources. Used internally, or if the app uses a direct call to
+  // vkCmdDispatch instead of the dispatch() method
+  void bind(VkCommandBuffer cmd, const TPushConstants* constants = nullptr, uint32_t pipelineIndex = 0u)
+  {
+    vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipelines[pipelineIndex]);
+    if(constants != nullptr)
+    {
+      vkCmdPushConstants(cmd, layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(TPushConstants), constants);
+    }
+    if(writes.size() > 0)
+    {
+      vkCmdPushDescriptorSetKHR(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, static_cast<uint32_t>(writes.size()),
+                                writes.data());
+    }
+  }
+
+  void dispatchThreads(VkCommandBuffer       cmd,
+                       uint32_t              threadCount,
+                       const TPushConstants* constants   = nullptr,
+                       uint32_t              postBarrier = DispatcherBarrier::eCompute,
+                       uint32_t              preBarrier  = DispatcherBarrier::eNone,
+                       uint32_t              blockSize   = NVVK_COMPUTE_DEFAULT_BLOCK_SIZE,
+                       // If pipelineIndex == ~0u, all pipelines will be executed sequentially. Otherwise, only dispatch the requested pipeline
+                       uint32_t pipelineIndex = ~0u)
+  {
+    uint32_t blockCount = getBlockCount(threadCount, blockSize);
+    dispatchBlocks(cmd, blockCount, constants, postBarrier, preBarrier, pipelineIndex);
+  }
+
+  void dispatchBlocks(VkCommandBuffer       cmd,
+                      uint32_t              blockCount,
+                      const TPushConstants* constants   = nullptr,
+                      uint32_t              postBarrier = DispatcherBarrier::eCompute,
+                      uint32_t              preBarrier  = DispatcherBarrier::eNone,
+                      // If pipelineIndex == ~0u, all pipelines will be executed sequentially. Otherwise, only dispatch the requested pipeline
+                      uint32_t pipelineIndex = ~0u)
+  {
+
+    dispatchBlocks(cmd, {blockCount, 1, 1}, constants, postBarrier, preBarrier, pipelineIndex);
+  }
+
+  void dispatchBlocks(VkCommandBuffer       cmd,
+                      glm::uvec3            blockCount,
+                      const TPushConstants* constants   = nullptr,
+                      uint32_t              postBarrier = DispatcherBarrier::eCompute,
+                      uint32_t              preBarrier  = DispatcherBarrier::eNone,
+                      // If pipelineIndex == ~0u, all pipelines will be executed sequentially. Otherwise, only dispatch the requested pipeline
+                      uint32_t pipelineIndex = ~0u)
+  {
+
+    if(preBarrier != eNone)
+    {
+      VkMemoryBarrier mb{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
+      mb.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
+      VkPipelineStageFlags srcStage{};
+      if((preBarrier & eCompute) || (preBarrier & eGraphics) || (preBarrier & eRaytracing))
+      {
+        mb.srcAccessMask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
+        if(preBarrier & eCompute)
+          srcStage |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+        if(preBarrier & eGraphics)
+          srcStage |= VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT;
+        if(preBarrier & eRaytracing)
+          srcStage |= VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR;
+      }
+      if(preBarrier & eTransfer)
+      {
+        mb.srcAccessMask |= VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
+        srcStage |= VK_PIPELINE_STAGE_TRANSFER_BIT;
+      }
+
+      vkCmdPipelineBarrier(cmd, srcStage, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &mb, 0, nullptr, 0, nullptr);
+    }
+
+    uint32_t currentPipeline = (pipelineIndex == ~0u) ? 0 : pipelineIndex;
+    uint32_t count           = (pipelineIndex == ~0u) ? pipelineCount : 1;
+
+    for(uint32_t i = 0; i < count; i++)
+    {
+      bind(cmd, constants, currentPipeline + i);
+      vkCmdDispatch(cmd, blockCount.x, blockCount.y, blockCount.z);
+
+      if(postBarrier != eNone)
+      {
+        VkMemoryBarrier mb{VK_STRUCTURE_TYPE_MEMORY_BARRIER};
+        mb.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
+        VkPipelineStageFlags dstStage{};
+        if((postBarrier & eCompute) || (postBarrier & eGraphics) || (postBarrier & eRaytracing))
+        {
+          mb.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
+          if(postBarrier & eCompute)
+            dstStage |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+          if(postBarrier & eGraphics)
+            dstStage |= VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT;
+          if(postBarrier & eRaytracing)
+            dstStage |= VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR;
+        }
+        if(postBarrier & eTransfer)
+        {
+          mb.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
+          dstStage |= VK_PIPELINE_STAGE_TRANSFER_BIT;
+        }
+
+        vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, dstStage, 0, 1, &mb, 0, nullptr, 0, nullptr);
+      }
+    }
+  }
+
+
+  void destroy(VkDevice device)
+  {
+    vkDestroyPipelineLayout(device, layout, nullptr);
+    for(uint32_t i = 0; i < pipelineCount; i++)
+    {
+      vkDestroyPipeline(device, pipelines[i], nullptr);
+    }
+    vkDestroyDescriptorSetLayout(device, dsetLayout, nullptr);
+
+    bufferInfos.clear();
+    accelInfos.clear();
+    accel.clear();
+    sampledImageInfos.clear();
+    writes.clear();
+    bindings.clear();
+  }
+};
+}  // namespace nvvk