From 4b4fc0b00de4347c31e7b000e9d18f3de57de1ae Mon Sep 17 00:00:00 2001 From: redstrate <54911369+redstrate@users.noreply.github.com> Date: Tue, 22 Sep 2020 17:27:10 -0400 Subject: [PATCH] Add buffer/image binding for compute, and add histogram construction --- engine/gfx/metal/src/gfx_metal.mm | 51 ++++++++++----------- engine/gfx/metal/src/gfx_metal_pipeline.hpp | 2 + engine/gfx/public/gfx.hpp | 3 ++ engine/renderer/include/renderer.hpp | 1 + engine/renderer/src/renderer.cpp | 16 +++++-- shaders/common.nocompile.glsl | 3 +- shaders/histogram.comp.glsl | 43 +++++++++++++++-- 7 files changed, 85 insertions(+), 34 deletions(-) diff --git a/engine/gfx/metal/src/gfx_metal.mm b/engine/gfx/metal/src/gfx_metal.mm index fc6848e..b453e1f 100755 --- a/engine/gfx/metal/src/gfx_metal.mm +++ b/engine/gfx/metal/src/gfx_metal.mm @@ -673,6 +673,8 @@ GFXPipeline* GFXMetal::create_compute_pipeline(const GFXComputePipelineCreateInf MTLComputePipelineDescriptor* pipelineDescriptor = [MTLComputePipelineDescriptor new]; pipelineDescriptor.computeFunction = computeFunc; + pipeline->threadGroupSize = MTLSizeMake(info.workgroup_size_x, info.workgroup_size_y, info.workgroup_size_z); + if(debug_enabled) { pipelineDescriptor.label = [NSString stringWithFormat:@"%s", info.label.data()]; pipeline->label = info.label; @@ -881,29 +883,33 @@ void GFXMetal::submit(GFXCommandBuffer* command_buffer, const int window) { break; case GFXCommandType::BindShaderBuffer: { - needEncoder(CurrentEncoder::Render); - - [renderEncoder setVertexBuffer:((GFXMetalBuffer*)command.data.bind_shader_buffer.buffer)->get(currentFrameIndex) offset:(NSUInteger)command.data.bind_shader_buffer.offset atIndex:(NSUInteger)command.data.bind_shader_buffer.index ]; - - [renderEncoder setFragmentBuffer:((GFXMetalBuffer*)command.data.bind_shader_buffer.buffer)->get(currentFrameIndex) offset:(NSUInteger)command.data.bind_shader_buffer.offset atIndex:(NSUInteger)command.data.bind_shader_buffer.index ]; + if(current_encoder == CurrentEncoder::Render) { + [renderEncoder setVertexBuffer:((GFXMetalBuffer*)command.data.bind_shader_buffer.buffer)->get(currentFrameIndex) offset:(NSUInteger)command.data.bind_shader_buffer.offset atIndex:(NSUInteger)command.data.bind_shader_buffer.index ]; + + [renderEncoder setFragmentBuffer:((GFXMetalBuffer*)command.data.bind_shader_buffer.buffer)->get(currentFrameIndex) offset:(NSUInteger)command.data.bind_shader_buffer.offset atIndex:(NSUInteger)command.data.bind_shader_buffer.index ]; + } else if(current_encoder == CurrentEncoder::Compute) { + [computeEncoder setBuffer:((GFXMetalBuffer*)command.data.bind_shader_buffer.buffer)->get(currentFrameIndex) offset:(NSUInteger)command.data.bind_shader_buffer.offset atIndex:(NSUInteger)command.data.bind_shader_buffer.index ]; + } } break; case GFXCommandType::BindTexture: { - needEncoder(CurrentEncoder::Render); + if(current_encoder == CurrentEncoder::Render) { + if(command.data.bind_texture.texture != nullptr) { + [renderEncoder setVertexSamplerState:((GFXMetalTexture*)command.data.bind_texture.texture)->sampler atIndex:(NSUInteger)command.data.bind_texture.index]; + + [renderEncoder setVertexTexture:((GFXMetalTexture*)command.data.bind_texture.texture)->handle atIndex:(NSUInteger)command.data.bind_texture.index]; + + [renderEncoder setFragmentSamplerState:((GFXMetalTexture*)command.data.bind_texture.texture)->sampler atIndex:(NSUInteger)command.data.bind_texture.index]; - if(command.data.bind_texture.texture != nullptr) { - [renderEncoder setVertexSamplerState:((GFXMetalTexture*)command.data.bind_texture.texture)->sampler atIndex:(NSUInteger)command.data.bind_texture.index]; - - [renderEncoder setVertexTexture:((GFXMetalTexture*)command.data.bind_texture.texture)->handle atIndex:(NSUInteger)command.data.bind_texture.index]; - - [renderEncoder setFragmentSamplerState:((GFXMetalTexture*)command.data.bind_texture.texture)->sampler atIndex:(NSUInteger)command.data.bind_texture.index]; - - [renderEncoder setFragmentTexture:((GFXMetalTexture*)command.data.bind_texture.texture)->handle atIndex:(NSUInteger)command.data.bind_texture.index]; - } else { - [renderEncoder setVertexTexture:nil atIndex:(NSUInteger)command.data.bind_texture.index]; - - [renderEncoder setFragmentTexture:nil atIndex:(NSUInteger)command.data.bind_texture.index]; + [renderEncoder setFragmentTexture:((GFXMetalTexture*)command.data.bind_texture.texture)->handle atIndex:(NSUInteger)command.data.bind_texture.index]; + } else { + [renderEncoder setVertexTexture:nil atIndex:(NSUInteger)command.data.bind_texture.index]; + + [renderEncoder setFragmentTexture:nil atIndex:(NSUInteger)command.data.bind_texture.index]; + } + } else if(current_encoder == CurrentEncoder::Compute) { + [computeEncoder setTexture:((GFXMetalTexture*)command.data.bind_texture.texture)->handle atIndex:(NSUInteger)command.data.bind_texture.index]; } } break; @@ -1066,15 +1072,8 @@ void GFXMetal::submit(GFXCommandBuffer* command_buffer, const int window) { break; case GFXCommandType::Dispatch: { needEncoder(CurrentEncoder::Compute); - - NSUInteger threadGroupSize = currentPipeline->compute_handle.maxTotalThreadsPerThreadgroup; - MTLSize threadgroupSize = MTLSizeMake(std::min(threadGroupSize, - (NSUInteger)command.data.dispatch.group_count_x), - std::min(threadGroupSize, (NSUInteger)command.data.dispatch.group_count_y), - std::min(threadGroupSize, (NSUInteger)command.data.dispatch.group_count_z)); - - [computeEncoder dispatchThreads:MTLSizeMake(command.data.dispatch.group_count_x, command.data.dispatch.group_count_y, command.data.dispatch.group_count_z) threadsPerThreadgroup:threadgroupSize]; + [computeEncoder dispatchThreads:MTLSizeMake(command.data.dispatch.group_count_x, command.data.dispatch.group_count_y, command.data.dispatch.group_count_z) threadsPerThreadgroup:currentPipeline->threadGroupSize]; } break; } diff --git a/engine/gfx/metal/src/gfx_metal_pipeline.hpp b/engine/gfx/metal/src/gfx_metal_pipeline.hpp index a09e040..8c94c96 100755 --- a/engine/gfx/metal/src/gfx_metal_pipeline.hpp +++ b/engine/gfx/metal/src/gfx_metal_pipeline.hpp @@ -10,6 +10,8 @@ public: id handle = nil; id compute_handle = nil; + + MTLSize threadGroupSize; id depthStencil = nil; MTLPrimitiveType primitiveType; diff --git a/engine/gfx/public/gfx.hpp b/engine/gfx/public/gfx.hpp index b7f6be6..db5359c 100755 --- a/engine/gfx/public/gfx.hpp +++ b/engine/gfx/public/gfx.hpp @@ -201,6 +201,9 @@ struct GFXComputePipelineCreateInfo { std::string_view compute_path; ShaderSource compute_src; } shaders; + + // TODO: extract this from the shader instead of hardcoding it twice (once in GLSL, and now here) + int workgroup_size_x = 1, workgroup_size_y = 1, workgroup_size_z = 1; }; struct GFXFramebufferCreateInfo { diff --git a/engine/renderer/include/renderer.hpp b/engine/renderer/include/renderer.hpp index 5f92ed6..29a3684 100755 --- a/engine/renderer/include/renderer.hpp +++ b/engine/renderer/include/renderer.hpp @@ -180,6 +180,7 @@ private: // histogram compute GFXPipeline* histogram_pipeline = nullptr; + GFXBuffer* histogram_buffer = nullptr; std::unique_ptr smaaPass; std::unique_ptr gHelper; diff --git a/engine/renderer/src/renderer.cpp b/engine/renderer/src/renderer.cpp index 30f1eea..fd376f9 100755 --- a/engine/renderer/src/renderer.cpp +++ b/engine/renderer/src/renderer.cpp @@ -205,10 +205,6 @@ void Renderer::stopSceneBlur() { void Renderer::render(Scene* scene, int index) { GFXCommandBuffer* commandbuffer = engine->get_gfx()->acquire_command_buffer(); - - commandbuffer->set_compute_pipeline(histogram_pipeline); - - commandbuffer->dispatch(1, 1, 1); const auto extent = get_extent(); const auto render_extent = get_render_extent(); @@ -289,6 +285,14 @@ void Renderer::render(Scene* scene, int index) { commandbuffer->set_viewport(viewport); commandbuffer->push_group("Post Processing"); + + commandbuffer->set_compute_pipeline(histogram_pipeline); + + commandbuffer->bind_texture(offscreenColorTexture, 0); + commandbuffer->bind_shader_buffer(histogram_buffer, 0, 1, sizeof(uint) * 256); + + commandbuffer->dispatch(static_cast(std::ceil(render_extent.width / 16.0f)), + static_cast(std::ceil(render_extent.height / 16.0f)), 1); commandbuffer->set_graphics_pipeline(viewport_mode ? renderToViewportPipeline : postPipeline); commandbuffer->bind_texture(offscreenColorTexture, 1); @@ -1003,6 +1007,10 @@ void Renderer::createBRDF() { void Renderer::create_histogram_resources() { GFXComputePipelineCreateInfo create_info = {}; create_info.shaders.compute_path = "histogram.comp"; + create_info.workgroup_size_x = 16; + create_info.workgroup_size_y = 16; histogram_pipeline = gfx->create_compute_pipeline(create_info); + + histogram_buffer = gfx->create_buffer(nullptr, sizeof(uint) * 256, false, GFXBufferUsage::Storage); } diff --git a/shaders/common.nocompile.glsl b/shaders/common.nocompile.glsl index 6d5ea8f..68129c5 100755 --- a/shaders/common.nocompile.glsl +++ b/shaders/common.nocompile.glsl @@ -1,4 +1,5 @@ const float PI = 3.14159265359; +const float EPSILON = 0.005; const vec2 PoissonOffsets[64] = { vec2(0.0617981, 0.07294159), @@ -157,4 +158,4 @@ vec3 from_srgb_to_linear(const vec3 sRGB) { vec3 lower = sRGB/vec3(12.92); return mix(higher, lower, cutoff); -} \ No newline at end of file +} diff --git a/shaders/histogram.comp.glsl b/shaders/histogram.comp.glsl index a773306..6ec2010 100755 --- a/shaders/histogram.comp.glsl +++ b/shaders/histogram.comp.glsl @@ -1,10 +1,47 @@ +#include "common.nocompile.glsl" + layout(local_size_x = 16, local_size_y = 16) in; layout(rgba32f, binding = 0) uniform image2D hdr_image; +// adapated from https://bruop.github.io/exposure/ and http://www.alextardif.com/HistogramLuminance.html + +// Taken from RTR vol 4 pg. 278 +#define RGB_TO_LUM vec3(0.2125, 0.7154, 0.0721) + shared uint histogram_shared[256]; -void main() { - histogram_shared[gl_LocalInvocationIndex] = 1; - groupMemoryBarrier(); +layout(std430, binding = 1) buffer HistogramBuffer { + uint histogram[]; +}; + +const vec4 params = vec4(-10.0, 1.0 / 12.0, 0.0, 0.0); + +uint color_to_bin(const vec3 hdr_color, const float min_log_lum, const float inverse_log_lum_range) { + const float lum = dot(hdr_color, RGB_TO_LUM); + + if (lum < EPSILON) { + return 0; + } + + const float log_lum = clamp((log2(lum) - min_log_lum) * inverse_log_lum_range, 0.0, 1.0); + + return uint(log_lum * 254.0 + 1.0); +} + +void main() { + histogram_shared[gl_LocalInvocationIndex] = 0; + groupMemoryBarrier(); + + uvec2 dim = imageSize(hdr_image).xy; + if(gl_GlobalInvocationID.x < dim.x && gl_GlobalInvocationID.y < dim.y) { + vec3 hdr_color = imageLoad(hdr_image, ivec2(gl_GlobalInvocationID.xy)).xyz; + uint bin_index = color_to_bin(hdr_color, params.x, params.y); + + atomicAdd(histogram_shared[bin_index], 1); + } + + groupMemoryBarrier(); + + atomicAdd(histogram[gl_LocalInvocationIndex], histogram_shared[gl_LocalInvocationIndex]); }