Archived
1
Fork 0

Add buffer/image binding for compute, and add histogram construction

This commit is contained in:
redstrate 2020-09-22 17:27:10 -04:00
parent 267f6221bf
commit 4b4fc0b00d
7 changed files with 85 additions and 34 deletions

View file

@ -673,6 +673,8 @@ GFXPipeline* GFXMetal::create_compute_pipeline(const GFXComputePipelineCreateInf
MTLComputePipelineDescriptor* pipelineDescriptor = [MTLComputePipelineDescriptor new]; MTLComputePipelineDescriptor* pipelineDescriptor = [MTLComputePipelineDescriptor new];
pipelineDescriptor.computeFunction = computeFunc; pipelineDescriptor.computeFunction = computeFunc;
pipeline->threadGroupSize = MTLSizeMake(info.workgroup_size_x, info.workgroup_size_y, info.workgroup_size_z);
if(debug_enabled) { if(debug_enabled) {
pipelineDescriptor.label = [NSString stringWithFormat:@"%s", info.label.data()]; pipelineDescriptor.label = [NSString stringWithFormat:@"%s", info.label.data()];
pipeline->label = info.label; pipeline->label = info.label;
@ -881,17 +883,18 @@ void GFXMetal::submit(GFXCommandBuffer* command_buffer, const int window) {
break; break;
case GFXCommandType::BindShaderBuffer: case GFXCommandType::BindShaderBuffer:
{ {
needEncoder(CurrentEncoder::Render); if(current_encoder == CurrentEncoder::Render) {
[renderEncoder setVertexBuffer:((GFXMetalBuffer*)command.data.bind_shader_buffer.buffer)->get(currentFrameIndex) offset:(NSUInteger)command.data.bind_shader_buffer.offset atIndex:(NSUInteger)command.data.bind_shader_buffer.index ]; [renderEncoder setVertexBuffer:((GFXMetalBuffer*)command.data.bind_shader_buffer.buffer)->get(currentFrameIndex) offset:(NSUInteger)command.data.bind_shader_buffer.offset atIndex:(NSUInteger)command.data.bind_shader_buffer.index ];
[renderEncoder setFragmentBuffer:((GFXMetalBuffer*)command.data.bind_shader_buffer.buffer)->get(currentFrameIndex) offset:(NSUInteger)command.data.bind_shader_buffer.offset atIndex:(NSUInteger)command.data.bind_shader_buffer.index ]; [renderEncoder setFragmentBuffer:((GFXMetalBuffer*)command.data.bind_shader_buffer.buffer)->get(currentFrameIndex) offset:(NSUInteger)command.data.bind_shader_buffer.offset atIndex:(NSUInteger)command.data.bind_shader_buffer.index ];
} else if(current_encoder == CurrentEncoder::Compute) {
[computeEncoder setBuffer:((GFXMetalBuffer*)command.data.bind_shader_buffer.buffer)->get(currentFrameIndex) offset:(NSUInteger)command.data.bind_shader_buffer.offset atIndex:(NSUInteger)command.data.bind_shader_buffer.index ];
}
} }
break; break;
case GFXCommandType::BindTexture: case GFXCommandType::BindTexture:
{ {
needEncoder(CurrentEncoder::Render); if(current_encoder == CurrentEncoder::Render) {
if(command.data.bind_texture.texture != nullptr) { if(command.data.bind_texture.texture != nullptr) {
[renderEncoder setVertexSamplerState:((GFXMetalTexture*)command.data.bind_texture.texture)->sampler atIndex:(NSUInteger)command.data.bind_texture.index]; [renderEncoder setVertexSamplerState:((GFXMetalTexture*)command.data.bind_texture.texture)->sampler atIndex:(NSUInteger)command.data.bind_texture.index];
@ -905,6 +908,9 @@ void GFXMetal::submit(GFXCommandBuffer* command_buffer, const int window) {
[renderEncoder setFragmentTexture:nil atIndex:(NSUInteger)command.data.bind_texture.index]; [renderEncoder setFragmentTexture:nil atIndex:(NSUInteger)command.data.bind_texture.index];
} }
} else if(current_encoder == CurrentEncoder::Compute) {
[computeEncoder setTexture:((GFXMetalTexture*)command.data.bind_texture.texture)->handle atIndex:(NSUInteger)command.data.bind_texture.index];
}
} }
break; break;
case GFXCommandType::BindSampler: case GFXCommandType::BindSampler:
@ -1067,14 +1073,7 @@ void GFXMetal::submit(GFXCommandBuffer* command_buffer, const int window) {
case GFXCommandType::Dispatch: { case GFXCommandType::Dispatch: {
needEncoder(CurrentEncoder::Compute); needEncoder(CurrentEncoder::Compute);
NSUInteger threadGroupSize = currentPipeline->compute_handle.maxTotalThreadsPerThreadgroup; [computeEncoder dispatchThreads:MTLSizeMake(command.data.dispatch.group_count_x, command.data.dispatch.group_count_y, command.data.dispatch.group_count_z) threadsPerThreadgroup:currentPipeline->threadGroupSize];
MTLSize threadgroupSize = MTLSizeMake(std::min(threadGroupSize,
(NSUInteger)command.data.dispatch.group_count_x),
std::min(threadGroupSize, (NSUInteger)command.data.dispatch.group_count_y),
std::min(threadGroupSize, (NSUInteger)command.data.dispatch.group_count_z));
[computeEncoder dispatchThreads:MTLSizeMake(command.data.dispatch.group_count_x, command.data.dispatch.group_count_y, command.data.dispatch.group_count_z) threadsPerThreadgroup:threadgroupSize];
} }
break; break;
} }

View file

@ -11,6 +11,8 @@ public:
id<MTLRenderPipelineState> handle = nil; id<MTLRenderPipelineState> handle = nil;
id<MTLComputePipelineState> compute_handle = nil; id<MTLComputePipelineState> compute_handle = nil;
MTLSize threadGroupSize;
id<MTLDepthStencilState> depthStencil = nil; id<MTLDepthStencilState> depthStencil = nil;
MTLPrimitiveType primitiveType; MTLPrimitiveType primitiveType;

View file

@ -201,6 +201,9 @@ struct GFXComputePipelineCreateInfo {
std::string_view compute_path; std::string_view compute_path;
ShaderSource compute_src; ShaderSource compute_src;
} shaders; } shaders;
// TODO: extract this from the shader instead of hardcoding it twice (once in GLSL, and now here)
int workgroup_size_x = 1, workgroup_size_y = 1, workgroup_size_z = 1;
}; };
struct GFXFramebufferCreateInfo { struct GFXFramebufferCreateInfo {

View file

@ -180,6 +180,7 @@ private:
// histogram compute // histogram compute
GFXPipeline* histogram_pipeline = nullptr; GFXPipeline* histogram_pipeline = nullptr;
GFXBuffer* histogram_buffer = nullptr;
std::unique_ptr<SMAAPass> smaaPass; std::unique_ptr<SMAAPass> smaaPass;
std::unique_ptr<GaussianHelper> gHelper; std::unique_ptr<GaussianHelper> gHelper;

View file

@ -206,10 +206,6 @@ void Renderer::stopSceneBlur() {
void Renderer::render(Scene* scene, int index) { void Renderer::render(Scene* scene, int index) {
GFXCommandBuffer* commandbuffer = engine->get_gfx()->acquire_command_buffer(); GFXCommandBuffer* commandbuffer = engine->get_gfx()->acquire_command_buffer();
commandbuffer->set_compute_pipeline(histogram_pipeline);
commandbuffer->dispatch(1, 1, 1);
const auto extent = get_extent(); const auto extent = get_extent();
const auto render_extent = get_render_extent(); const auto render_extent = get_render_extent();
@ -290,6 +286,14 @@ void Renderer::render(Scene* scene, int index) {
commandbuffer->push_group("Post Processing"); commandbuffer->push_group("Post Processing");
commandbuffer->set_compute_pipeline(histogram_pipeline);
commandbuffer->bind_texture(offscreenColorTexture, 0);
commandbuffer->bind_shader_buffer(histogram_buffer, 0, 1, sizeof(uint) * 256);
commandbuffer->dispatch(static_cast<uint32_t>(std::ceil(render_extent.width / 16.0f)),
static_cast<uint32_t>(std::ceil(render_extent.height / 16.0f)), 1);
commandbuffer->set_graphics_pipeline(viewport_mode ? renderToViewportPipeline : postPipeline); commandbuffer->set_graphics_pipeline(viewport_mode ? renderToViewportPipeline : postPipeline);
commandbuffer->bind_texture(offscreenColorTexture, 1); commandbuffer->bind_texture(offscreenColorTexture, 1);
commandbuffer->bind_texture(offscreenBackTexture, 2); commandbuffer->bind_texture(offscreenBackTexture, 2);
@ -1003,6 +1007,10 @@ void Renderer::createBRDF() {
void Renderer::create_histogram_resources() { void Renderer::create_histogram_resources() {
GFXComputePipelineCreateInfo create_info = {}; GFXComputePipelineCreateInfo create_info = {};
create_info.shaders.compute_path = "histogram.comp"; create_info.shaders.compute_path = "histogram.comp";
create_info.workgroup_size_x = 16;
create_info.workgroup_size_y = 16;
histogram_pipeline = gfx->create_compute_pipeline(create_info); histogram_pipeline = gfx->create_compute_pipeline(create_info);
histogram_buffer = gfx->create_buffer(nullptr, sizeof(uint) * 256, false, GFXBufferUsage::Storage);
} }

View file

@ -1,4 +1,5 @@
const float PI = 3.14159265359; const float PI = 3.14159265359;
const float EPSILON = 0.005;
const vec2 PoissonOffsets[64] = { const vec2 PoissonOffsets[64] = {
vec2(0.0617981, 0.07294159), vec2(0.0617981, 0.07294159),

View file

@ -1,10 +1,47 @@
#include "common.nocompile.glsl"
layout(local_size_x = 16, local_size_y = 16) in; layout(local_size_x = 16, local_size_y = 16) in;
layout(rgba32f, binding = 0) uniform image2D hdr_image; layout(rgba32f, binding = 0) uniform image2D hdr_image;
// adapated from https://bruop.github.io/exposure/ and http://www.alextardif.com/HistogramLuminance.html
// Taken from RTR vol 4 pg. 278
#define RGB_TO_LUM vec3(0.2125, 0.7154, 0.0721)
shared uint histogram_shared[256]; shared uint histogram_shared[256];
void main() { layout(std430, binding = 1) buffer HistogramBuffer {
histogram_shared[gl_LocalInvocationIndex] = 1; uint histogram[];
groupMemoryBarrier(); };
const vec4 params = vec4(-10.0, 1.0 / 12.0, 0.0, 0.0);
uint color_to_bin(const vec3 hdr_color, const float min_log_lum, const float inverse_log_lum_range) {
const float lum = dot(hdr_color, RGB_TO_LUM);
if (lum < EPSILON) {
return 0;
}
const float log_lum = clamp((log2(lum) - min_log_lum) * inverse_log_lum_range, 0.0, 1.0);
return uint(log_lum * 254.0 + 1.0);
}
void main() {
histogram_shared[gl_LocalInvocationIndex] = 0;
groupMemoryBarrier();
uvec2 dim = imageSize(hdr_image).xy;
if(gl_GlobalInvocationID.x < dim.x && gl_GlobalInvocationID.y < dim.y) {
vec3 hdr_color = imageLoad(hdr_image, ivec2(gl_GlobalInvocationID.xy)).xyz;
uint bin_index = color_to_bin(hdr_color, params.x, params.y);
atomicAdd(histogram_shared[bin_index], 1);
}
groupMemoryBarrier();
atomicAdd(histogram[gl_LocalInvocationIndex], histogram_shared[gl_LocalInvocationIndex]);
} }