From e6b59cb5d8de2b622a7011beac2c23102484b557 Mon Sep 17 00:00:00 2001 From: BreadFish64 Date: Tue, 7 Apr 2020 17:09:05 -0500 Subject: [PATCH] video_core: implement optimized D24S8->RGBA8 reinterpreters --- .../gl_format_reinterpreter.cpp | 170 +++++++++++++++++- 1 file changed, 168 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp index 2175c62bd..ee842a859 100644 --- a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp +++ b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp @@ -220,9 +220,175 @@ private: GLint d24s8_abgr_viewport_u_id; }; +class ShaderD24S8toRGBA8 final : public FormatReinterpreterBase { +public: + ShaderD24S8toRGBA8() { + constexpr std::string_view vs_source = R"( +out vec2 dst_coord; + +uniform mediump ivec2 dst_size; + +const vec2 vertices[4] = + vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); + +void main() { + gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); + dst_coord = (vertices[gl_VertexID] / 2.0 + 0.5) * vec2(dst_size); +} +)"; + + constexpr std::string_view fs_source = R"( +in mediump vec2 dst_coord; + +out lowp vec4 frag_color; + +uniform highp sampler2D depth; +uniform lowp usampler2D stencil; +uniform mediump ivec2 dst_size; +uniform mediump ivec2 src_size; +uniform mediump ivec2 src_offset; + +void main() { + mediump ivec2 tex_coord; + if (src_size == dst_size) { + tex_coord = ivec2(dst_coord); + } else { + highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x); + mediump int y = tex_index / src_size.x; + tex_coord = ivec2(tex_index - y * src_size.x, y); + } + tex_coord -= src_offset; + + highp uint depth_val = + uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0)); + lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x; + highp uvec4 components = + uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu); + frag_color = vec4(components) / (exp2(8.0) - 1.0); +} +)"; + + program.Create(vs_source.data(), fs_source.data()); + dst_size_loc = glGetUniformLocation(program.handle, "dst_size"); + src_size_loc = glGetUniformLocation(program.handle, "src_size"); + src_offset_loc = glGetUniformLocation(program.handle, "src_offset"); + vao.Create(); + + auto state = OpenGLState::GetCurState(); + auto cur_program = state.draw.shader_program; + state.draw.shader_program = program.handle; + state.Apply(); + glUniform1i(glGetUniformLocation(program.handle, "stencil"), 1); + state.draw.shader_program = cur_program; + state.Apply(); + + // OES_texture_view doesn't seem to support D24S8 views, at least on adreno + // so instead it will do an intermediate copy before running through the shader + if (GLAD_GL_ARB_texture_view) { + texture_view_func = glTextureView; + } else { + LOG_INFO(Render_OpenGL, + "Texture views are unsupported, reinterpretation will do intermediate copy"); + temp_tex.Create(); + } + } + + void Reinterpret(GLuint src_tex, const Common::Rectangle& src_rect, GLuint read_fb_handle, + GLuint dst_tex, const Common::Rectangle& dst_rect, + GLuint draw_fb_handle) override { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state; + state.texture_units[0].texture_2d = src_tex; + + if (texture_view_func) { + temp_tex.Create(); + glActiveTexture(GL_TEXTURE1); + texture_view_func(temp_tex.handle, GL_TEXTURE_2D, src_tex, GL_DEPTH24_STENCIL8, 0, 1, 0, + 1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + } else if (src_rect.top > temp_rect.top || src_rect.right > temp_rect.right) { + temp_tex.Release(); + temp_tex.Create(); + state.texture_units[1].texture_2d = temp_tex.handle; + state.Apply(); + glActiveTexture(GL_TEXTURE1); + glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, src_rect.right, src_rect.top); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + temp_rect = src_rect; + } + + state.texture_units[1].texture_2d = temp_tex.handle; + state.draw.draw_framebuffer = draw_fb_handle; + state.draw.shader_program = program.handle; + state.draw.vertex_array = vao.handle; + state.viewport = {static_cast(dst_rect.left), static_cast(dst_rect.bottom), + static_cast(dst_rect.GetWidth()), + static_cast(dst_rect.GetHeight())}; + state.Apply(); + + glActiveTexture(GL_TEXTURE1); + if (!texture_view_func) { + glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, + temp_tex.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, + src_rect.GetWidth(), src_rect.GetHeight(), 1); + } + glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, + 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight()); + glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight()); + glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + if (texture_view_func) { + temp_tex.Release(); + } + } + +private: + decltype(glTextureView) texture_view_func = nullptr; + OGLProgram program{}; + GLint dst_size_loc{-1}, src_size_loc{-1}, src_offset_loc{-1}; + OGLVertexArray vao{}; + OGLTexture temp_tex{}; + Common::Rectangle temp_rect{0, 0, 0, 0}; +}; + +class CopyImageSubData final : public FormatReinterpreterBase { + void Reinterpret(GLuint src_tex, const Common::Rectangle& src_rect, GLuint read_fb_handle, + GLuint dst_tex, const Common::Rectangle& dst_rect, + GLuint draw_fb_handle) override { + glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, dst_tex, + GL_TEXTURE_2D, 0, dst_rect.left, dst_rect.bottom, 0, src_rect.GetWidth(), + src_rect.GetHeight(), 1); + } +}; + FormatReinterpreterOpenGL::FormatReinterpreterOpenGL() { - reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8}, - std::make_unique()); + std::string_view vendor{reinterpret_cast(glGetString(GL_VENDOR))}; + if (vendor.find("NVIDIA") != vendor.npos) { + reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8}, + std::make_unique()); + // Nvidia bends the spec and allows direct copies between color and depth formats + // might as well take advantage of it + LOG_INFO(Render_OpenGL, "Using glCopyImageSubData for D24S8 to RGBA8 reinterpretation"); + } else if ((GLAD_GL_ARB_stencil_texturing && GLAD_GL_ARB_texture_storage) || GLES) { + reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8}, + std::make_unique()); + LOG_INFO(Render_OpenGL, "Using shader for D24S8 to RGBA8 reinterpretation"); + } else { + reinterpreters.emplace(PixelFormatPair{PixelFormat::RGBA8, PixelFormat::D24S8}, + std::make_unique()); + LOG_INFO(Render_OpenGL, "Using pbo for D24S8 to RGBA8 reinterpretation"); + } reinterpreters.emplace(PixelFormatPair{PixelFormat::RGB5A1, PixelFormat::RGBA4}, std::make_unique()); }