video_core: Refactor GLSL fragment emitter (#7093)

* video_core: Refactor GLSL fragment emitter

* shader: Add back custom normal maps
This commit is contained in:
GPUCode 2023-11-06 22:26:28 +02:00 committed by GitHub
parent 9b2a5926a6
commit 1f6393e7d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 2340 additions and 2098 deletions

View File

@ -10,16 +10,20 @@ set(HASH_FILES
"${VIDEO_CORE}/renderer_opengl/gl_shader_util.h"
"${VIDEO_CORE}/renderer_vulkan/vk_shader_util.cpp"
"${VIDEO_CORE}/renderer_vulkan/vk_shader_util.h"
"${VIDEO_CORE}/shader/generator/glsl_fs_shader_gen.cpp"
"${VIDEO_CORE}/shader/generator/glsl_fs_shader_gen.h"
"${VIDEO_CORE}/shader/generator/glsl_shader_decompiler.cpp"
"${VIDEO_CORE}/shader/generator/glsl_shader_decompiler.h"
"${VIDEO_CORE}/shader/generator/glsl_shader_gen.cpp"
"${VIDEO_CORE}/shader/generator/glsl_shader_gen.h"
"${VIDEO_CORE}/shader/generator/pica_fs_config.cpp"
"${VIDEO_CORE}/shader/generator/pica_fs_config.h"
"${VIDEO_CORE}/shader/generator/shader_gen.cpp"
"${VIDEO_CORE}/shader/generator/shader_gen.h"
"${VIDEO_CORE}/shader/generator/shader_uniforms.cpp"
"${VIDEO_CORE}/shader/generator/shader_uniforms.h"
"${VIDEO_CORE}/shader/generator/spv_shader_gen.cpp"
"${VIDEO_CORE}/shader/generator/spv_shader_gen.h"
"${VIDEO_CORE}/shader/generator/spv_fs_shader_gen.cpp"
"${VIDEO_CORE}/shader/generator/spv_fs_shader_gen.h"
"${VIDEO_CORE}/shader/shader.cpp"
"${VIDEO_CORE}/shader/shader.h"
"${VIDEO_CORE}/pica.cpp"

View File

@ -21,16 +21,20 @@ add_custom_command(OUTPUT scm_rev.cpp
"${VIDEO_CORE}/renderer_opengl/gl_shader_util.h"
"${VIDEO_CORE}/renderer_vulkan/vk_shader_util.cpp"
"${VIDEO_CORE}/renderer_vulkan/vk_shader_util.h"
"${VIDEO_CORE}/shader/generator/glsl_fs_shader_gen.cpp"
"${VIDEO_CORE}/shader/generator/glsl_fs_shader_gen.h"
"${VIDEO_CORE}/shader/generator/glsl_shader_decompiler.cpp"
"${VIDEO_CORE}/shader/generator/glsl_shader_decompiler.h"
"${VIDEO_CORE}/shader/generator/glsl_shader_gen.cpp"
"${VIDEO_CORE}/shader/generator/glsl_shader_gen.h"
"${VIDEO_CORE}/shader/generator/pica_fs_config.cpp"
"${VIDEO_CORE}/shader/generator/pica_fs_config.h"
"${VIDEO_CORE}/shader/generator/shader_gen.cpp"
"${VIDEO_CORE}/shader/generator/shader_gen.h"
"${VIDEO_CORE}/shader/generator/shader_uniforms.cpp"
"${VIDEO_CORE}/shader/generator/shader_uniforms.h"
"${VIDEO_CORE}/shader/generator/spv_shader_gen.cpp"
"${VIDEO_CORE}/shader/generator/spv_shader_gen.h"
"${VIDEO_CORE}/shader/generator/spv_fs_shader_gen.cpp"
"${VIDEO_CORE}/shader/generator/spv_fs_shader_gen.h"
"${VIDEO_CORE}/shader/shader.cpp"
"${VIDEO_CORE}/shader/shader.h"
"${VIDEO_CORE}/pica.cpp"

View File

@ -135,16 +135,21 @@ add_library(video_core STATIC
renderer_vulkan/vk_texture_runtime.cpp
renderer_vulkan/vk_texture_runtime.h
shader/debug_data.h
shader/generator/glsl_fs_shader_gen.cpp
shader/generator/glsl_fs_shader_gen.h
shader/generator/glsl_shader_decompiler.cpp
shader/generator/glsl_shader_decompiler.h
shader/generator/glsl_shader_gen.cpp
shader/generator/glsl_shader_gen.h
shader/generator/pica_fs_config.cpp
shader/generator/pica_fs_config.h
shader/generator/profile.h
shader/generator/shader_gen.cpp
shader/generator/shader_gen.h
shader/generator/shader_uniforms.cpp
shader/generator/shader_uniforms.h
shader/generator/spv_shader_gen.cpp
shader/generator/spv_shader_gen.h
shader/generator/spv_fs_shader_gen.cpp
shader/generator/spv_fs_shader_gen.h
shader/shader.cpp
shader/shader.h
shader/shader_interpreter.cpp

View File

@ -7,6 +7,7 @@
#include "common/vector_math.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/regs_texturing.h"
#include "video_core/shader/generator/pica_fs_config.h"
#include "video_core/shader/generator/shader_uniforms.h"
namespace Memory {
@ -153,6 +154,7 @@ protected:
Pica::Regs& regs;
std::vector<HardwareVertex> vertex_batch;
Pica::Shader::UserConfig user_config{};
bool shader_dirty = true;
VSUniformBlockData vs_uniform_block_data{};
@ -166,4 +168,5 @@ protected:
std::array<Common::Vec4f, 256> proctex_lut_data{};
std::array<Common::Vec4f, 256> proctex_diff_lut_data{};
};
} // namespace VideoCore

View File

@ -179,8 +179,8 @@ void Driver::CheckExtensionSupport() {
arb_texture_compression_bptc = GLAD_GL_ARB_texture_compression_bptc;
clip_cull_distance = !is_gles || GLAD_GL_EXT_clip_cull_distance;
ext_texture_compression_s3tc = GLAD_GL_EXT_texture_compression_s3tc;
shader_framebuffer_fetch =
GLAD_GL_EXT_shader_framebuffer_fetch || GLAD_GL_ARM_shader_framebuffer_fetch;
ext_shader_framebuffer_fetch = GLAD_GL_EXT_shader_framebuffer_fetch;
arm_shader_framebuffer_fetch = GLAD_GL_ARM_shader_framebuffer_fetch;
blend_minmax_factor = GLAD_GL_AMD_blend_minmax_factor || GLAD_GL_NV_blend_minmax_factor;
is_suitable = GLAD_GL_VERSION_4_3 || GLAD_GL_ES_VERSION_3_1;
}

View File

@ -107,7 +107,15 @@ public:
/// Returns true if the implementation supports (EXT/ARM)_shader_framebuffer_fetch
bool HasShaderFramebufferFetch() const {
return shader_framebuffer_fetch;
return ext_shader_framebuffer_fetch || arm_shader_framebuffer_fetch;
}
bool HasExtFramebufferFetch() const {
return ext_shader_framebuffer_fetch;
}
bool HasArmShaderFramebufferFetch() const {
return arm_shader_framebuffer_fetch;
}
/// Returns true if the implementation supports (NV/AMD)_blend_minmax_factor
@ -136,7 +144,8 @@ private:
bool clip_cull_distance{};
bool ext_texture_compression_s3tc{};
bool arb_texture_compression_bptc{};
bool shader_framebuffer_fetch{};
bool arm_shader_framebuffer_fetch{};
bool ext_shader_framebuffer_fetch{};
bool blend_minmax_factor{};
std::string_view gl_version{};

View File

@ -426,7 +426,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
// Sync and bind the shader
if (shader_dirty) {
shader_manager.UseFragmentShader(regs, use_custom_normal);
shader_manager.UseFragmentShader(regs, user_config);
shader_dirty = false;
}
@ -479,7 +479,7 @@ void RasterizerOpenGL::SyncTextureUnits(const Framebuffer* framebuffer) {
// Reset transient draw state
state.color_buffer.texture_2d = 0;
use_custom_normal = false;
user_config = {};
const auto pica_textures = regs.texturing.GetTextures();
for (u32 texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
@ -577,20 +577,15 @@ void RasterizerOpenGL::BindMaterial(u32 texture_index, Surface& surface) {
return;
}
const auto bind_texture = [&](const TextureUnits::TextureUnit& unit, GLuint texture,
GLuint sampler) {
glActiveTexture(unit.Enum());
glBindTexture(GL_TEXTURE_2D, texture);
glBindSampler(unit.id, sampler);
};
const GLuint sampler = state.texture_units[texture_index].sampler;
if (surface.HasNormalMap()) {
if (regs.lighting.disable) {
LOG_WARNING(Render_OpenGL, "Custom normal map used but scene has no light enabled");
}
bind_texture(TextureUnits::TextureNormalMap, surface.Handle(2), sampler);
use_custom_normal = true;
glActiveTexture(TextureUnits::TextureNormalMap.Enum());
glBindTexture(GL_TEXTURE_2D, surface.Handle(2));
glBindSampler(TextureUnits::TextureNormalMap.id, sampler);
user_config.use_custom_normal.Assign(1);
}
}

View File

@ -155,7 +155,6 @@ private:
OGLTexture texture_buffer_lut_lf;
OGLTexture texture_buffer_lut_rg;
OGLTexture texture_buffer_lut_rgba;
bool use_custom_normal{};
bool emulate_minmax_blend{};
};

View File

@ -14,10 +14,12 @@
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/shader/generator/shader_uniforms.h"
#include "video_core/shader/generator/glsl_fs_shader_gen.h"
#include "video_core/shader/generator/profile.h"
#include "video_core/video_core.h"
using namespace Pica::Shader::Generator;
using Pica::Shader::FSConfig;
namespace OpenGL {
@ -146,17 +148,20 @@ private:
OGLShaderStage program;
};
template <typename KeyConfigType, std::string (*CodeGenerator)(const KeyConfigType&, bool),
GLenum ShaderType>
template <typename KeyConfigType, auto CodeGenerator, GLenum ShaderType>
class ShaderCache {
public:
explicit ShaderCache(bool separable) : separable(separable) {}
std::tuple<GLuint, std::optional<std::string>> Get(const KeyConfigType& config) {
explicit ShaderCache(bool separable_) : separable{separable_} {}
~ShaderCache() = default;
template <typename... Args>
std::tuple<GLuint, std::optional<std::string>> Get(const KeyConfigType& config,
Args&&... args) {
auto [iter, new_shader] = shaders.emplace(config, OGLShaderStage{separable});
OGLShaderStage& cached_shader = iter->second;
std::optional<std::string> result{};
if (new_shader) {
result = CodeGenerator(config, separable);
result = CodeGenerator(config, args...);
cached_shader.Create(result->c_str(), ShaderType);
}
return {cached_shader.GetHandle(), std::move(result)};
@ -243,8 +248,7 @@ using ProgrammableVertexShaders =
using FixedGeometryShaders =
ShaderCache<PicaFixedGSConfig, &GLSL::GenerateFixedGeometryShader, GL_GEOMETRY_SHADER>;
using FragmentShaders =
ShaderCache<PicaFSConfig, &GLSL::GenerateFragmentShader, GL_FRAGMENT_SHADER>;
using FragmentShaders = ShaderCache<FSConfig, &GLSL::GenerateFragmentShader, GL_FRAGMENT_SHADER>;
class ShaderProgramManager::Impl {
public:
@ -252,8 +256,24 @@ public:
: separable(separable), programmable_vertex_shaders(separable),
trivial_vertex_shader(driver, separable), fixed_geometry_shaders(separable),
fragment_shaders(separable), disk_cache(separable) {
if (separable)
if (separable) {
pipeline.Create();
}
profile = Pica::Shader::Profile{
.has_separable_shaders = separable,
.has_clip_planes = driver.HasClipCullDistance(),
.has_geometry_shader = true,
.has_custom_border_color = true,
.has_fragment_shader_interlock = false,
.has_blend_minmax_factor = driver.HasBlendMinMaxFactor(),
.has_minus_one_to_one_range = true,
.has_logic_op = !driver.IsOpenGLES(),
.has_gl_ext_framebuffer_fetch = driver.HasExtFramebufferFetch(),
.has_gl_arm_framebuffer_fetch = driver.HasArmShaderFramebufferFetch(),
.has_gl_nv_fragment_shader_interlock = driver.GetVendor() == Vendor::Nvidia,
.has_gl_intel_fragment_shader_interlock = driver.GetVendor() == Vendor::Intel,
.is_vulkan = false,
};
}
struct ShaderTuple {
@ -283,7 +303,7 @@ public:
"ShaderTuple layout changed!");
bool separable;
Pica::Shader::Profile profile{};
ShaderTuple current;
ProgrammableVertexShaders programmable_vertex_shaders;
@ -336,7 +356,7 @@ void ShaderProgramManager::UseTrivialVertexShader() {
void ShaderProgramManager::UseFixedGeometryShader(const Pica::Regs& regs) {
PicaFixedGSConfig gs_config(regs, driver.HasClipCullDistance());
auto [handle, _] = impl->fixed_geometry_shaders.Get(gs_config);
auto [handle, _] = impl->fixed_geometry_shaders.Get(gs_config, impl->separable);
impl->current.gs = handle;
impl->current.gs_hash = gs_config.Hash();
}
@ -346,12 +366,12 @@ void ShaderProgramManager::UseTrivialGeometryShader() {
impl->current.gs_hash = 0;
}
void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs, bool use_normal) {
PicaFSConfig config(regs, false, driver.IsOpenGLES(), false, driver.HasBlendMinMaxFactor(),
use_normal);
auto [handle, result] = impl->fragment_shaders.Get(config);
void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs,
const Pica::Shader::UserConfig& user) {
const FSConfig fs_config{regs, user, impl->profile};
auto [handle, result] = impl->fragment_shaders.Get(fs_config, impl->profile);
impl->current.fs = handle;
impl->current.fs_hash = config.Hash();
impl->current.fs_hash = fs_config.Hash();
// Save FS to the disk cache if its a new shader
if (result) {
auto& disk_cache = impl->disk_cache;
@ -470,8 +490,8 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading,
impl->programmable_vertex_shaders.Inject(conf, decomp->second.code,
std::move(shader));
} else if (raw.GetProgramType() == ProgramType::FS) {
PicaFSConfig conf(raw.GetRawShaderConfig(), false, driver.IsOpenGLES(), false,
driver.HasBlendMinMaxFactor());
// TODO: Support UserConfig in disk shader cache
const FSConfig conf(raw.GetRawShaderConfig(), {}, impl->profile);
std::scoped_lock lock(mutex);
impl->fragment_shaders.Inject(conf, std::move(shader));
} else {
@ -581,14 +601,14 @@ void ShaderProgramManager::LoadDiskCache(const std::atomic_bool& stop_loading,
std::scoped_lock lock(mutex);
impl->programmable_vertex_shaders.Inject(conf, code, std::move(stage));
} else if (raw.GetProgramType() == ProgramType::FS) {
PicaFSConfig conf(raw.GetRawShaderConfig(), false, driver.IsOpenGLES(), false,
driver.HasBlendMinMaxFactor());
code = GLSL::GenerateFragmentShader(conf, impl->separable);
// TODO: Support UserConfig in disk shader cache
const FSConfig fs_config{raw.GetRawShaderConfig(), {}, impl->profile};
code = GLSL::GenerateFragmentShader(fs_config, impl->profile);
OGLShaderStage stage{impl->separable};
stage.Create(code.c_str(), GL_FRAGMENT_SHADER);
handle = stage.GetHandle();
std::scoped_lock lock(mutex);
impl->fragment_shaders.Inject(conf, std::move(stage));
impl->fragment_shaders.Inject(fs_config, std::move(stage));
} else {
// Unsupported shader type got stored somehow so nuke the cache
LOG_ERROR(Frontend, "failed to load raw ProgramType {}", raw.GetProgramType());

View File

@ -17,7 +17,8 @@ struct Regs;
namespace Pica::Shader {
struct ShaderSetup;
}
union UserConfig;
} // namespace Pica::Shader
namespace OpenGL {
@ -47,7 +48,7 @@ public:
void UseTrivialGeometryShader();
void UseFragmentShader(const Pica::Regs& config, bool use_normal);
void UseFragmentShader(const Pica::Regs& config, const Pica::Shader::UserConfig& user);
void ApplyTo(OpenGLState& state);

View File

@ -4,9 +4,9 @@
#include "common/thread_worker.h"
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/regs_pipeline.h"
#include "video_core/regs_rasterizer.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/shader/generator/glsl_shader_gen.h"
#include "video_core/shader/generator/spv_shader_gen.h"
namespace Common {

View File

@ -15,8 +15,12 @@
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/shader/generator/glsl_fs_shader_gen.h"
#include "video_core/shader/generator/glsl_shader_gen.h"
#include "video_core/shader/generator/spv_fs_shader_gen.h"
using namespace Pica::Shader::Generator;
using Pica::Shader::FSConfig;
MICROPROFILE_DEFINE(Vulkan_Bind, "Vulkan", "Pipeline Bind", MP_RGB(192, 32, 32));
@ -86,6 +90,17 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
trivial_vertex_shader{
instance, vk::ShaderStageFlagBits::eVertex,
GLSL::GenerateTrivialVertexShader(instance.IsShaderClipDistanceSupported(), true)} {
profile = Pica::Shader::Profile{
.has_separable_shaders = true,
.has_clip_planes = instance.IsShaderClipDistanceSupported(),
.has_geometry_shader = instance.UseGeometryShaders(),
.has_custom_border_color = instance.IsCustomBorderColorSupported(),
.has_fragment_shader_interlock = instance.IsFragmentShaderInterlockSupported(),
.has_blend_minmax_factor = false,
.has_minus_one_to_one_range = false,
.has_logic_op = !instance.NeedsLogicOpEmulation(),
.is_vulkan = true,
};
BuildLayout();
}
@ -403,35 +418,30 @@ void PipelineCache::UseTrivialGeometryShader() {
shader_hashes[ProgramType::GS] = 0;
}
void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
const PicaFSConfig config{regs, instance.IsFragmentShaderInterlockSupported(),
instance.NeedsLogicOpEmulation(),
!instance.IsCustomBorderColorSupported(), false};
const auto [it, new_shader] = fragment_shaders.try_emplace(config, instance);
void PipelineCache::UseFragmentShader(const Pica::Regs& regs,
const Pica::Shader::UserConfig& user) {
const FSConfig fs_config{regs, user, profile};
const auto [it, new_shader] = fragment_shaders.try_emplace(fs_config, instance);
auto& shader = it->second;
if (new_shader) {
const bool use_spirv = Settings::values.spirv_shader_gen.GetValue();
const auto texture0_type = config.state.texture0_type.Value();
const bool is_shadow = texture0_type == Pica::TexturingRegs::TextureConfig::Shadow2D ||
texture0_type == Pica::TexturingRegs::TextureConfig::ShadowCube ||
config.state.shadow_rendering.Value();
if (use_spirv && !is_shadow) {
const std::vector code = SPIRV::GenerateFragmentShader(config);
if (use_spirv && !fs_config.UsesShadowPipeline()) {
const std::vector code = SPIRV::GenerateFragmentShader(fs_config);
shader.module = CompileSPV(code, instance.GetDevice());
shader.MarkDone();
} else {
workers.QueueWork([config, device = instance.GetDevice(), &shader]() {
const std::string code = GLSL::GenerateFragmentShader(config, true);
shader.module = Compile(code, vk::ShaderStageFlagBits::eFragment, device);
workers.QueueWork([fs_config, this, &shader]() {
const std::string code = GLSL::GenerateFragmentShader(fs_config, profile);
shader.module =
Compile(code, vk::ShaderStageFlagBits::eFragment, instance.GetDevice());
shader.MarkDone();
});
}
}
current_shaders[ProgramType::FS] = &shader;
shader_hashes[ProgramType::FS] = config.Hash();
shader_hashes[ProgramType::FS] = fs_config.Hash();
}
void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler) {

View File

@ -9,13 +9,18 @@
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
#include "video_core/shader/generator/glsl_shader_gen.h"
#include "video_core/shader/generator/spv_shader_gen.h"
#include "video_core/shader/generator/pica_fs_config.h"
#include "video_core/shader/generator/profile.h"
#include "video_core/shader/generator/shader_gen.h"
namespace Pica {
struct Regs;
}
namespace Pica::Shader {
struct ShaderSetup;
}
namespace Vulkan {
class Instance;
@ -62,7 +67,7 @@ public:
void UseTrivialGeometryShader();
/// Binds a fragment shader generated from PICA state
void UseFragmentShader(const Pica::Regs& regs);
void UseFragmentShader(const Pica::Regs& regs, const Pica::Shader::UserConfig& user);
/// Binds a texture to the specified binding
void BindTexture(u32 binding, vk::ImageView image_view, vk::Sampler sampler);
@ -98,6 +103,7 @@ private:
RenderpassCache& renderpass_cache;
DescriptorPool& pool;
Pica::Shader::Profile profile{};
vk::UniquePipelineCache pipeline_cache;
vk::UniquePipelineLayout pipeline_layout;
std::size_t num_worker_threads;
@ -118,7 +124,7 @@ private:
std::unordered_map<Pica::Shader::Generator::PicaVSConfig, Shader*> programmable_vertex_map;
std::unordered_map<std::string, Shader> programmable_vertex_cache;
std::unordered_map<Pica::Shader::Generator::PicaFixedGSConfig, Shader> fixed_geometry_shaders;
std::unordered_map<Pica::Shader::Generator::PicaFSConfig, Shader> fragment_shaders;
std::unordered_map<Pica::Shader::FSConfig, Shader> fragment_shaders;
Shader trivial_vertex_shader;
};

View File

@ -497,7 +497,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
// Sync and bind the shader
if (shader_dirty) {
pipeline_cache.UseFragmentShader(regs);
pipeline_cache.UseFragmentShader(regs, user_config);
shader_dirty = false;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,100 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/shader/generator/pica_fs_config.h"
namespace Pica::Shader::Generator::GLSL {
class FragmentModule {
public:
explicit FragmentModule(const FSConfig& config, const Profile& profile);
~FragmentModule();
/// Emits GLSL source corresponding to the provided pica fragment configuration
std::string Generate();
private:
/// Undos the host perspective transformation and applies the PICA one
void WriteDepth();
/// Emits code to emulate the scissor rectangle
void WriteScissor();
/// Writes the code to emulate fragment lighting
void WriteLighting();
/// Writes the code to emulate fog
void WriteFog();
/// Writes the code to emulate gas rendering
void WriteGas();
/// Writes the code to emulate shadow-map rendering
void WriteShadow();
/// Writes the code to emulate logic ops in the fragment shader
void WriteLogicOp();
/// Writes the code to emulate PICA min/max blending factors
void WriteBlending();
/// Writes the specified TEV stage source component(s)
void AppendSource(Pica::TexturingRegs::TevStageConfig::Source source, u32 tev_index);
/// Writes the color components to use for the specified TEV stage color modifier
void AppendColorModifier(Pica::TexturingRegs::TevStageConfig::ColorModifier modifier,
Pica::TexturingRegs::TevStageConfig::Source source, u32 tev_index);
/// Writes the alpha component to use for the specified TEV stage alpha modifier
void AppendAlphaModifier(Pica::TexturingRegs::TevStageConfig::AlphaModifier modifier,
Pica::TexturingRegs::TevStageConfig::Source source, u32 tev_index);
/// Writes the combiner function for the color components for the specified TEV stage operation
void AppendColorCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation);
/// Writes the combiner function for the alpha component for the specified TEV stage operation
void AppendAlphaCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation);
/// Writes the if-statement condition used to evaluate alpha testing
void WriteAlphaTestCondition(Pica::FramebufferRegs::CompareFunc func);
/// Writes the code to emulate the specified TEV stage
void WriteTevStage(u32 index);
void AppendProcTexShiftOffset(std::string_view v, Pica::TexturingRegs::ProcTexShift mode,
Pica::TexturingRegs::ProcTexClamp clamp_mode);
void AppendProcTexClamp(std::string_view var, Pica::TexturingRegs::ProcTexClamp mode);
void AppendProcTexCombineAndMap(Pica::TexturingRegs::ProcTexCombiner combiner,
std::string_view offset);
void DefineExtensions();
void DefineInterface();
void DefineBindings();
void DefineHelpers();
void DefineLightingHelpers();
void DefineShadowHelpers();
void DefineProcTexSampler();
void DefineTexUnitSampler(u32 i);
private:
const FSConfig& config;
const Profile& profile;
std::string out;
bool use_blend_fallback{};
bool use_fragment_shader_interlock{};
};
/**
* Generates the GLSL fragment shader program source code for the current Pica state
* @param config ShaderCacheKey object generated for the current Pica state, used for the shader
* configuration (NOTE: Use state in this struct only, not the Pica registers!)
* @returns String of the shader source code
*/
std::string GenerateFragmentShader(const FSConfig& config, const Profile& profile);
} // namespace Pica::Shader::Generator::GLSL

File diff suppressed because it is too large Load Diff

View File

@ -46,12 +46,4 @@ std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const P
*/
std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader);
/**
* Generates the GLSL fragment shader program source code for the current Pica state
* @param config ShaderCacheKey object generated for the current Pica state, used for the shader
* configuration (NOTE: Use state in this struct only, not the Pica registers!)
* @returns String of the shader source code
*/
std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader);
} // namespace Pica::Shader::Generator::GLSL

View File

@ -0,0 +1,193 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/shader/generator/pica_fs_config.h"
namespace Pica::Shader {
FramebufferConfig::FramebufferConfig(const Pica::Regs& regs, const Profile& profile) {
const auto& output_merger = regs.framebuffer.output_merger;
scissor_test_mode.Assign(regs.rasterizer.scissor_test.mode);
depthmap_enable.Assign(regs.rasterizer.depthmap_enable);
shadow_rendering.Assign(regs.framebuffer.IsShadowRendering());
alpha_test_func.Assign(output_merger.alpha_test.enable
? output_merger.alpha_test.func.Value()
: Pica::FramebufferRegs::CompareFunc::Always);
// Emulate logic op in the shader if needed and not supported.
logic_op.Assign(Pica::FramebufferRegs::LogicOp::Copy);
if (!profile.has_logic_op && !regs.framebuffer.output_merger.alphablend_enable) {
logic_op.Assign(regs.framebuffer.output_merger.logic_op);
}
const auto alpha_eq = output_merger.alpha_blending.blend_equation_a.Value();
const auto rgb_eq = output_merger.alpha_blending.blend_equation_rgb.Value();
if (!profile.has_blend_minmax_factor && output_merger.alphablend_enable) {
if (rgb_eq == Pica::FramebufferRegs::BlendEquation::Max ||
rgb_eq == Pica::FramebufferRegs::BlendEquation::Min) {
rgb_blend.eq = rgb_eq;
rgb_blend.src_factor = output_merger.alpha_blending.factor_source_rgb;
rgb_blend.dst_factor = output_merger.alpha_blending.factor_dest_rgb;
}
if (alpha_eq == Pica::FramebufferRegs::BlendEquation::Max ||
alpha_eq == Pica::FramebufferRegs::BlendEquation::Min) {
alpha_blend.eq = alpha_eq;
alpha_blend.src_factor = output_merger.alpha_blending.factor_source_a;
alpha_blend.dst_factor = output_merger.alpha_blending.factor_dest_a;
}
}
}
TextureConfig::TextureConfig(const Pica::TexturingRegs& regs, const Profile& profile) {
texture0_type.Assign(regs.texture0.type);
texture2_use_coord1.Assign(regs.main_config.texture2_use_coord1 != 0);
combiner_buffer_input.Assign(regs.tev_combiner_buffer_input.update_mask_rgb.Value() |
regs.tev_combiner_buffer_input.update_mask_a.Value() << 4);
fog_mode.Assign(regs.fog_mode);
fog_flip.Assign(regs.fog_flip != 0);
shadow_texture_orthographic.Assign(regs.shadow.orthographic != 0);
// Emulate custom border color if needed and not supported.
const auto pica_textures = regs.GetTextures();
for (u32 tex_index = 0; tex_index < 3; tex_index++) {
const auto& config = pica_textures[tex_index].config;
texture_border_color[tex_index].enable_s.Assign(
!profile.has_custom_border_color &&
config.wrap_s == Pica::TexturingRegs::TextureConfig::WrapMode::ClampToBorder);
texture_border_color[tex_index].enable_t.Assign(
!profile.has_custom_border_color &&
config.wrap_t == Pica::TexturingRegs::TextureConfig::WrapMode::ClampToBorder);
}
const auto& stages = regs.GetTevStages();
for (std::size_t i = 0; i < tev_stages.size(); i++) {
const auto& tev_stage = stages[i];
tev_stages[i].sources_raw = tev_stage.sources_raw;
tev_stages[i].modifiers_raw = tev_stage.modifiers_raw;
tev_stages[i].ops_raw = tev_stage.ops_raw;
tev_stages[i].scales_raw = tev_stage.scales_raw;
if (tev_stage.color_op == Pica::TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) {
tev_stages[i].sources_raw &= 0xFFF;
tev_stages[i].modifiers_raw &= 0xFFF;
tev_stages[i].ops_raw &= 0xF;
}
}
}
LightConfig::LightConfig(const Pica::LightingRegs& regs) {
if (regs.disable) {
return;
}
enable.Assign(1);
src_num.Assign(regs.max_light_index + 1);
config.Assign(regs.config0.config);
enable_primary_alpha.Assign(regs.config0.enable_primary_alpha);
enable_secondary_alpha.Assign(regs.config0.enable_secondary_alpha);
bump_mode.Assign(regs.config0.bump_mode);
bump_selector.Assign(regs.config0.bump_selector);
bump_renorm.Assign(regs.config0.disable_bump_renorm == 0);
clamp_highlights.Assign(regs.config0.clamp_highlights != 0);
enable_shadow.Assign(regs.config0.enable_shadow != 0);
if (enable_shadow) {
shadow_primary.Assign(regs.config0.shadow_primary != 0);
shadow_secondary.Assign(regs.config0.shadow_secondary != 0);
shadow_invert.Assign(regs.config0.shadow_invert != 0);
shadow_alpha.Assign(regs.config0.shadow_alpha != 0);
shadow_selector.Assign(regs.config0.shadow_selector);
}
for (u32 light_index = 0; light_index <= regs.max_light_index; ++light_index) {
const u32 num = regs.light_enable.GetNum(light_index);
const auto& light = regs.light[num];
lights[light_index].num.Assign(num);
lights[light_index].directional.Assign(light.config.directional != 0);
lights[light_index].two_sided_diffuse.Assign(light.config.two_sided_diffuse != 0);
lights[light_index].geometric_factor_0.Assign(light.config.geometric_factor_0 != 0);
lights[light_index].geometric_factor_1.Assign(light.config.geometric_factor_1 != 0);
lights[light_index].dist_atten_enable.Assign(!regs.IsDistAttenDisabled(num));
lights[light_index].spot_atten_enable.Assign(!regs.IsSpotAttenDisabled(num));
lights[light_index].shadow_enable.Assign(!regs.IsShadowDisabled(num));
}
lut_d0.enable.Assign(regs.config1.disable_lut_d0 == 0);
if (lut_d0.enable) {
lut_d0.abs_input.Assign(regs.abs_lut_input.disable_d0 == 0);
lut_d0.type.Assign(regs.lut_input.d0.Value());
lut_d0.scale = regs.lut_scale.GetScale(regs.lut_scale.d0);
}
lut_d1.enable.Assign(regs.config1.disable_lut_d1 == 0);
if (lut_d1.enable) {
lut_d1.abs_input.Assign(regs.abs_lut_input.disable_d1 == 0);
lut_d1.type.Assign(regs.lut_input.d1.Value());
lut_d1.scale = regs.lut_scale.GetScale(regs.lut_scale.d1);
}
// This is a dummy field due to lack of the corresponding register
lut_sp.enable.Assign(1);
lut_sp.abs_input.Assign(regs.abs_lut_input.disable_sp == 0);
lut_sp.type.Assign(regs.lut_input.sp.Value());
lut_sp.scale = regs.lut_scale.GetScale(regs.lut_scale.sp);
lut_fr.enable.Assign(regs.config1.disable_lut_fr == 0);
if (lut_fr.enable) {
lut_fr.abs_input.Assign(regs.abs_lut_input.disable_fr == 0);
lut_fr.type.Assign(regs.lut_input.fr.Value());
lut_fr.scale = regs.lut_scale.GetScale(regs.lut_scale.fr);
}
lut_rr.enable.Assign(regs.config1.disable_lut_rr == 0);
if (lut_rr.enable) {
lut_rr.abs_input.Assign(regs.abs_lut_input.disable_rr == 0);
lut_rr.type.Assign(regs.lut_input.rr.Value());
lut_rr.scale = regs.lut_scale.GetScale(regs.lut_scale.rr);
}
lut_rg.enable.Assign(regs.config1.disable_lut_rg == 0);
if (lut_rg.enable) {
lut_rg.abs_input.Assign(regs.abs_lut_input.disable_rg == 0);
lut_rg.type.Assign(regs.lut_input.rg.Value());
lut_rg.scale = regs.lut_scale.GetScale(regs.lut_scale.rg);
}
lut_rb.enable.Assign(regs.config1.disable_lut_rb == 0);
if (lut_rb.enable) {
lut_rb.abs_input.Assign(regs.abs_lut_input.disable_rb == 0);
lut_rb.type.Assign(regs.lut_input.rb.Value());
lut_rb.scale = regs.lut_scale.GetScale(regs.lut_scale.rb);
}
}
ProcTexConfig::ProcTexConfig(const Pica::TexturingRegs& regs) {
if (!regs.main_config.texture3_enable) {
return;
}
enable.Assign(1);
coord.Assign(regs.main_config.texture3_coordinates);
u_clamp.Assign(regs.proctex.u_clamp);
v_clamp.Assign(regs.proctex.v_clamp);
color_combiner.Assign(regs.proctex.color_combiner);
alpha_combiner.Assign(regs.proctex.alpha_combiner);
separate_alpha.Assign(regs.proctex.separate_alpha);
noise_enable.Assign(regs.proctex.noise_enable);
u_shift.Assign(regs.proctex.u_shift);
v_shift.Assign(regs.proctex.v_shift);
lut_width = regs.proctex_lut.width;
lut_offset0 = regs.proctex_lut_offset.level0;
lut_offset1 = regs.proctex_lut_offset.level1;
lut_offset2 = regs.proctex_lut_offset.level2;
lut_offset3 = regs.proctex_lut_offset.level3;
lod_min = regs.proctex_lut.lod_min;
lod_max = regs.proctex_lut.lod_max;
lut_filter.Assign(regs.proctex_lut.filter);
}
FSConfig::FSConfig(const Pica::Regs& regs, const UserConfig& user_, const Profile& profile)
: framebuffer{regs, profile}, texture{regs.texturing, profile}, lighting{regs.lighting},
proctex{regs.texturing}, user{user_} {}
} // namespace Pica::Shader

View File

@ -0,0 +1,207 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/hash.h"
#include "video_core/regs.h"
#include "video_core/shader/generator/profile.h"
namespace Pica::Shader {
struct BlendConfig {
Pica::FramebufferRegs::BlendEquation eq;
Pica::FramebufferRegs::BlendFactor src_factor;
Pica::FramebufferRegs::BlendFactor dst_factor;
};
struct FramebufferConfig {
explicit FramebufferConfig(const Pica::Regs& regs, const Profile& profile);
union {
u32 raw{};
BitField<0, 3, Pica::FramebufferRegs::CompareFunc> alpha_test_func;
BitField<3, 2, Pica::RasterizerRegs::ScissorMode> scissor_test_mode;
BitField<5, 1, Pica::RasterizerRegs::DepthBuffering> depthmap_enable;
BitField<6, 4, Pica::FramebufferRegs::LogicOp> logic_op;
BitField<10, 1, u32> shadow_rendering;
};
BlendConfig rgb_blend{};
BlendConfig alpha_blend{};
};
static_assert(std::has_unique_object_representations_v<FramebufferConfig>);
struct TevStageConfigRaw {
u32 sources_raw;
u32 modifiers_raw;
u32 ops_raw;
u32 scales_raw;
operator Pica::TexturingRegs::TevStageConfig() const noexcept {
return {
.sources_raw = sources_raw,
.modifiers_raw = modifiers_raw,
.ops_raw = ops_raw,
.const_color = 0,
.scales_raw = scales_raw,
};
}
};
union TextureBorder {
BitField<0, 1, u32> enable_s;
BitField<1, 1, u32> enable_t;
};
struct TextureConfig {
explicit TextureConfig(const Pica::TexturingRegs& regs, const Profile& profile);
union {
u32 raw{};
BitField<0, 3, Pica::TexturingRegs::TextureConfig::TextureType> texture0_type;
BitField<3, 1, u32> texture2_use_coord1;
BitField<4, 8, u32> combiner_buffer_input;
BitField<12, 3, Pica::TexturingRegs::FogMode> fog_mode;
BitField<15, 1, u32> fog_flip;
BitField<16, 1, u32> shadow_texture_orthographic;
};
std::array<TextureBorder, 3> texture_border_color{};
std::array<TevStageConfigRaw, 6> tev_stages{};
};
static_assert(std::has_unique_object_representations_v<TextureConfig>);
union Light {
u16 raw;
BitField<0, 3, u16> num;
BitField<3, 1, u16> directional;
BitField<4, 1, u16> two_sided_diffuse;
BitField<5, 1, u16> dist_atten_enable;
BitField<6, 1, u16> spot_atten_enable;
BitField<7, 1, u16> geometric_factor_0;
BitField<8, 1, u16> geometric_factor_1;
BitField<9, 1, u16> shadow_enable;
};
static_assert(std::has_unique_object_representations_v<Light>);
struct LutConfig {
union {
u32 raw;
BitField<0, 1, u32> enable;
BitField<1, 1, u32> abs_input;
BitField<2, 3, Pica::LightingRegs::LightingLutInput> type;
};
f32 scale;
};
struct LightConfig {
explicit LightConfig(const Pica::LightingRegs& regs);
union {
u32 raw{};
BitField<0, 1, u32> enable;
BitField<1, 4, u32> src_num;
BitField<5, 2, Pica::LightingRegs::LightingBumpMode> bump_mode;
BitField<7, 2, u32> bump_selector;
BitField<9, 1, u32> bump_renorm;
BitField<10, 1, u32> clamp_highlights;
BitField<11, 4, Pica::LightingRegs::LightingConfig> config;
BitField<15, 1, u32> enable_primary_alpha;
BitField<16, 1, u32> enable_secondary_alpha;
BitField<17, 1, u32> enable_shadow;
BitField<18, 1, u32> shadow_primary;
BitField<19, 1, u32> shadow_secondary;
BitField<20, 1, u32> shadow_invert;
BitField<21, 1, u32> shadow_alpha;
BitField<22, 2, u32> shadow_selector;
};
LutConfig lut_d0{};
LutConfig lut_d1{};
LutConfig lut_sp{};
LutConfig lut_fr{};
LutConfig lut_rr{};
LutConfig lut_rg{};
LutConfig lut_rb{};
std::array<Light, 8> lights{};
};
struct ProcTexConfig {
explicit ProcTexConfig(const Pica::TexturingRegs& regs);
union {
u32 raw{};
BitField<0, 1, u32> enable;
BitField<1, 2, u32> coord;
BitField<3, 3, Pica::TexturingRegs::ProcTexClamp> u_clamp;
BitField<6, 3, Pica::TexturingRegs::ProcTexClamp> v_clamp;
BitField<9, 4, Pica::TexturingRegs::ProcTexCombiner> color_combiner;
BitField<13, 4, Pica::TexturingRegs::ProcTexCombiner> alpha_combiner;
BitField<17, 3, Pica::TexturingRegs::ProcTexFilter> lut_filter;
BitField<20, 1, u32> separate_alpha;
BitField<21, 1, u32> noise_enable;
BitField<22, 2, Pica::TexturingRegs::ProcTexShift> u_shift;
BitField<24, 2, Pica::TexturingRegs::ProcTexShift> v_shift;
};
s32 lut_width{};
s32 lut_offset0{};
s32 lut_offset1{};
s32 lut_offset2{};
s32 lut_offset3{};
u16 lod_min{};
u16 lod_max{};
};
static_assert(std::has_unique_object_representations_v<ProcTexConfig>);
union UserConfig {
u32 raw{};
BitField<0, 1, u32> use_custom_normal;
};
static_assert(std::has_unique_object_representations_v<UserConfig>);
struct FSConfig {
explicit FSConfig(const Pica::Regs& regs, const UserConfig& user, const Profile& profile);
[[nodiscard]] bool TevStageUpdatesCombinerBufferColor(u32 stage_index) const {
return (stage_index < 4) && (texture.combiner_buffer_input & (1 << stage_index));
}
[[nodiscard]] bool TevStageUpdatesCombinerBufferAlpha(u32 stage_index) const {
return (stage_index < 4) && ((texture.combiner_buffer_input >> 4) & (1 << stage_index));
}
[[nodiscard]] bool EmulateBlend() const {
return framebuffer.rgb_blend.eq != Pica::FramebufferRegs::BlendEquation::Add ||
framebuffer.alpha_blend.eq != Pica::FramebufferRegs::BlendEquation::Add;
}
[[nodiscard]] bool UsesShadowPipeline() const {
const auto texture0_type = texture.texture0_type.Value();
return texture0_type == Pica::TexturingRegs::TextureConfig::Shadow2D ||
texture0_type == Pica::TexturingRegs::TextureConfig::ShadowCube ||
framebuffer.shadow_rendering.Value();
}
bool operator==(const FSConfig& other) const noexcept {
return std::memcmp(this, &other, sizeof(FSConfig)) == 0;
}
std::size_t Hash() const noexcept {
return Common::ComputeHash64(this, sizeof(FSConfig));
}
FramebufferConfig framebuffer;
TextureConfig texture;
LightConfig lighting;
ProcTexConfig proctex;
UserConfig user;
};
} // namespace Pica::Shader
namespace std {
template <>
struct hash<Pica::Shader::FSConfig> {
std::size_t operator()(const Pica::Shader::FSConfig& k) const noexcept {
return k.Hash();
}
};
} // namespace std

View File

@ -0,0 +1,25 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
namespace Pica::Shader {
struct Profile {
bool has_separable_shaders{};
bool has_clip_planes{};
bool has_geometry_shader{};
bool has_custom_border_color{};
bool has_fragment_shader_interlock{};
bool has_blend_minmax_factor{};
bool has_minus_one_to_one_range{};
bool has_logic_op{};
bool has_gl_ext_framebuffer_fetch{};
bool has_gl_arm_framebuffer_fetch{};
bool has_gl_nv_fragment_shader_interlock{};
bool has_gl_intel_fragment_shader_interlock{};
bool is_vulkan{};
};
} // namespace Pica::Shader

View File

@ -9,218 +9,6 @@
namespace Pica::Shader::Generator {
PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, bool has_fragment_shader_interlock,
bool emulate_logic_op, bool emulate_custom_border_color,
bool emulate_blend_minmax_factor, bool use_custom_normal_map) {
state.scissor_test_mode.Assign(regs.rasterizer.scissor_test.mode);
state.depthmap_enable.Assign(regs.rasterizer.depthmap_enable);
state.alpha_test_func.Assign(regs.framebuffer.output_merger.alpha_test.enable
? regs.framebuffer.output_merger.alpha_test.func.Value()
: Pica::FramebufferRegs::CompareFunc::Always);
state.texture0_type.Assign(regs.texturing.texture0.type);
state.texture2_use_coord1.Assign(regs.texturing.main_config.texture2_use_coord1 != 0);
const auto pica_textures = regs.texturing.GetTextures();
for (u32 tex_index = 0; tex_index < 3; tex_index++) {
const auto config = pica_textures[tex_index].config;
state.texture_border_color[tex_index].enable_s.Assign(
emulate_custom_border_color &&
config.wrap_s == Pica::TexturingRegs::TextureConfig::WrapMode::ClampToBorder);
state.texture_border_color[tex_index].enable_t.Assign(
emulate_custom_border_color &&
config.wrap_t == Pica::TexturingRegs::TextureConfig::WrapMode::ClampToBorder);
}
// Emulate logic op in the shader if not supported. This is mostly for mobile GPUs
const bool needs_emulate_logic_op =
emulate_logic_op && !regs.framebuffer.output_merger.alphablend_enable;
state.emulate_logic_op.Assign(needs_emulate_logic_op);
if (needs_emulate_logic_op) {
state.logic_op.Assign(regs.framebuffer.output_merger.logic_op);
} else {
state.logic_op.Assign(Pica::FramebufferRegs::LogicOp::NoOp);
}
// Copy relevant tev stages fields.
// We don't sync const_color here because of the high variance, it is a
// shader uniform instead.
const auto& tev_stages = regs.texturing.GetTevStages();
DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size());
for (std::size_t i = 0; i < tev_stages.size(); i++) {
const auto& tev_stage = tev_stages[i];
state.tev_stages[i].sources_raw = tev_stage.sources_raw;
state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw;
state.tev_stages[i].ops_raw = tev_stage.ops_raw;
state.tev_stages[i].scales_raw = tev_stage.scales_raw;
if (tev_stage.color_op == Pica::TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) {
state.tev_stages[i].sources_raw &= 0xFFF;
state.tev_stages[i].modifiers_raw &= 0xFFF;
state.tev_stages[i].ops_raw &= 0xF;
}
}
state.fog_mode.Assign(regs.texturing.fog_mode);
state.fog_flip.Assign(regs.texturing.fog_flip != 0);
state.combiner_buffer_input.Assign(
regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() |
regs.texturing.tev_combiner_buffer_input.update_mask_a.Value() << 4);
// Fragment lighting
state.lighting.enable.Assign(!regs.lighting.disable);
if (state.lighting.enable) {
state.lighting.src_num.Assign(regs.lighting.max_light_index + 1);
for (u32 light_index = 0; light_index < state.lighting.src_num; ++light_index) {
const u32 num = regs.lighting.light_enable.GetNum(light_index);
const auto& light = regs.lighting.light[num];
state.lighting.light[light_index].num.Assign(num);
state.lighting.light[light_index].directional.Assign(light.config.directional != 0);
state.lighting.light[light_index].two_sided_diffuse.Assign(
light.config.two_sided_diffuse != 0);
state.lighting.light[light_index].geometric_factor_0.Assign(
light.config.geometric_factor_0 != 0);
state.lighting.light[light_index].geometric_factor_1.Assign(
light.config.geometric_factor_1 != 0);
state.lighting.light[light_index].dist_atten_enable.Assign(
!regs.lighting.IsDistAttenDisabled(num));
state.lighting.light[light_index].spot_atten_enable.Assign(
!regs.lighting.IsSpotAttenDisabled(num));
state.lighting.light[light_index].shadow_enable.Assign(
!regs.lighting.IsShadowDisabled(num));
}
state.lighting.lut_d0.enable.Assign(regs.lighting.config1.disable_lut_d0 == 0);
if (state.lighting.lut_d0.enable) {
state.lighting.lut_d0.abs_input.Assign(regs.lighting.abs_lut_input.disable_d0 == 0);
state.lighting.lut_d0.type.Assign(regs.lighting.lut_input.d0.Value());
state.lighting.lut_d0.scale =
regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
}
state.lighting.lut_d1.enable.Assign(regs.lighting.config1.disable_lut_d1 == 0);
if (state.lighting.lut_d1.enable) {
state.lighting.lut_d1.abs_input.Assign(regs.lighting.abs_lut_input.disable_d1 == 0);
state.lighting.lut_d1.type.Assign(regs.lighting.lut_input.d1.Value());
state.lighting.lut_d1.scale =
regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
}
// this is a dummy field due to lack of the corresponding register
state.lighting.lut_sp.enable.Assign(1);
state.lighting.lut_sp.abs_input.Assign(regs.lighting.abs_lut_input.disable_sp == 0);
state.lighting.lut_sp.type.Assign(regs.lighting.lut_input.sp.Value());
state.lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp);
state.lighting.lut_fr.enable.Assign(regs.lighting.config1.disable_lut_fr == 0);
if (state.lighting.lut_fr.enable) {
state.lighting.lut_fr.abs_input.Assign(regs.lighting.abs_lut_input.disable_fr == 0);
state.lighting.lut_fr.type.Assign(regs.lighting.lut_input.fr.Value());
state.lighting.lut_fr.scale =
regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
}
state.lighting.lut_rr.enable.Assign(regs.lighting.config1.disable_lut_rr == 0);
if (state.lighting.lut_rr.enable) {
state.lighting.lut_rr.abs_input.Assign(regs.lighting.abs_lut_input.disable_rr == 0);
state.lighting.lut_rr.type.Assign(regs.lighting.lut_input.rr.Value());
state.lighting.lut_rr.scale =
regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
}
state.lighting.lut_rg.enable.Assign(regs.lighting.config1.disable_lut_rg == 0);
if (state.lighting.lut_rg.enable) {
state.lighting.lut_rg.abs_input.Assign(regs.lighting.abs_lut_input.disable_rg == 0);
state.lighting.lut_rg.type.Assign(regs.lighting.lut_input.rg.Value());
state.lighting.lut_rg.scale =
regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
}
state.lighting.lut_rb.enable.Assign(regs.lighting.config1.disable_lut_rb == 0);
if (state.lighting.lut_rb.enable) {
state.lighting.lut_rb.abs_input.Assign(regs.lighting.abs_lut_input.disable_rb == 0);
state.lighting.lut_rb.type.Assign(regs.lighting.lut_input.rb.Value());
state.lighting.lut_rb.scale =
regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
}
state.lighting.config.Assign(regs.lighting.config0.config);
state.lighting.enable_primary_alpha.Assign(regs.lighting.config0.enable_primary_alpha);
state.lighting.enable_secondary_alpha.Assign(regs.lighting.config0.enable_secondary_alpha);
state.lighting.bump_mode.Assign(regs.lighting.config0.bump_mode);
state.lighting.bump_selector.Assign(regs.lighting.config0.bump_selector);
state.lighting.bump_renorm.Assign(regs.lighting.config0.disable_bump_renorm == 0);
state.lighting.clamp_highlights.Assign(regs.lighting.config0.clamp_highlights != 0);
state.lighting.enable_shadow.Assign(regs.lighting.config0.enable_shadow != 0);
if (state.lighting.enable_shadow) {
state.lighting.shadow_primary.Assign(regs.lighting.config0.shadow_primary != 0);
state.lighting.shadow_secondary.Assign(regs.lighting.config0.shadow_secondary != 0);
state.lighting.shadow_invert.Assign(regs.lighting.config0.shadow_invert != 0);
state.lighting.shadow_alpha.Assign(regs.lighting.config0.shadow_alpha != 0);
state.lighting.shadow_selector.Assign(regs.lighting.config0.shadow_selector);
}
}
state.proctex.enable.Assign(regs.texturing.main_config.texture3_enable);
if (state.proctex.enable) {
state.proctex.coord.Assign(regs.texturing.main_config.texture3_coordinates);
state.proctex.u_clamp.Assign(regs.texturing.proctex.u_clamp);
state.proctex.v_clamp.Assign(regs.texturing.proctex.v_clamp);
state.proctex.color_combiner.Assign(regs.texturing.proctex.color_combiner);
state.proctex.alpha_combiner.Assign(regs.texturing.proctex.alpha_combiner);
state.proctex.separate_alpha.Assign(regs.texturing.proctex.separate_alpha);
state.proctex.noise_enable.Assign(regs.texturing.proctex.noise_enable);
state.proctex.u_shift.Assign(regs.texturing.proctex.u_shift);
state.proctex.v_shift.Assign(regs.texturing.proctex.v_shift);
state.proctex.lut_width = regs.texturing.proctex_lut.width;
state.proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0;
state.proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1;
state.proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2;
state.proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3;
state.proctex.lod_min = regs.texturing.proctex_lut.lod_min;
state.proctex.lod_max = regs.texturing.proctex_lut.lod_max;
state.proctex.lut_filter.Assign(regs.texturing.proctex_lut.filter);
}
const auto alpha_eq = regs.framebuffer.output_merger.alpha_blending.blend_equation_a.Value();
const auto rgb_eq = regs.framebuffer.output_merger.alpha_blending.blend_equation_rgb.Value();
if (emulate_blend_minmax_factor && regs.framebuffer.output_merger.alphablend_enable) {
if (rgb_eq == Pica::FramebufferRegs::BlendEquation::Max ||
rgb_eq == Pica::FramebufferRegs::BlendEquation::Min) {
state.rgb_blend.emulate_blending = true;
state.rgb_blend.eq = rgb_eq;
state.rgb_blend.src_factor =
regs.framebuffer.output_merger.alpha_blending.factor_source_rgb;
state.rgb_blend.dst_factor =
regs.framebuffer.output_merger.alpha_blending.factor_dest_rgb;
}
if (alpha_eq == Pica::FramebufferRegs::BlendEquation::Max ||
alpha_eq == Pica::FramebufferRegs::BlendEquation::Min) {
state.alpha_blend.emulate_blending = true;
state.alpha_blend.eq = alpha_eq;
state.alpha_blend.src_factor =
regs.framebuffer.output_merger.alpha_blending.factor_source_a;
state.alpha_blend.dst_factor =
regs.framebuffer.output_merger.alpha_blending.factor_dest_a;
}
}
state.shadow_rendering.Assign(regs.framebuffer.output_merger.fragment_operation_mode ==
Pica::FramebufferRegs::FragmentOperationMode::Shadow);
state.shadow_texture_orthographic.Assign(regs.texturing.shadow.orthographic != 0);
// We only need fragment shader interlock when shadow rendering.
state.use_fragment_shader_interlock.Assign(state.shadow_rendering &&
has_fragment_shader_interlock);
state.use_custom_normal_map.Assign(use_custom_normal_map);
}
void PicaGSConfigState::Init(const Pica::Regs& regs, bool use_clip_planes_) {
use_clip_planes = use_clip_planes_;

View File

@ -28,141 +28,6 @@ enum Attributes {
ATTRIBUTE_VIEW,
};
// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs()
struct TevStageConfigRaw {
u32 sources_raw;
u32 modifiers_raw;
u32 ops_raw;
u32 scales_raw;
explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept {
return {
.sources_raw = sources_raw,
.modifiers_raw = modifiers_raw,
.ops_raw = ops_raw,
.const_color = 0,
.scales_raw = scales_raw,
};
}
};
struct PicaFSConfigState {
union {
BitField<0, 3, Pica::FramebufferRegs::CompareFunc> alpha_test_func;
BitField<3, 2, Pica::RasterizerRegs::ScissorMode> scissor_test_mode;
BitField<5, 3, Pica::TexturingRegs::TextureConfig::TextureType> texture0_type;
BitField<8, 1, u32> texture2_use_coord1;
BitField<9, 8, u32> combiner_buffer_input;
BitField<17, 1, Pica::RasterizerRegs::DepthBuffering> depthmap_enable;
BitField<18, 3, Pica::TexturingRegs::FogMode> fog_mode;
BitField<21, 1, u32> fog_flip;
BitField<22, 1, u32> emulate_logic_op;
BitField<23, 4, Pica::FramebufferRegs::LogicOp> logic_op;
BitField<27, 1, u32> shadow_rendering;
BitField<28, 1, u32> shadow_texture_orthographic;
BitField<29, 1, u32> use_fragment_shader_interlock;
BitField<30, 1, u32> use_custom_normal_map;
};
union {
BitField<0, 1, u32> enable_s;
BitField<1, 1, u32> enable_t;
} texture_border_color[3];
std::array<TevStageConfigRaw, 6> tev_stages;
struct {
union {
BitField<0, 3, u16> num;
BitField<3, 1, u16> directional;
BitField<4, 1, u16> two_sided_diffuse;
BitField<5, 1, u16> dist_atten_enable;
BitField<6, 1, u16> spot_atten_enable;
BitField<7, 1, u16> geometric_factor_0;
BitField<8, 1, u16> geometric_factor_1;
BitField<9, 1, u16> shadow_enable;
} light[8];
union {
BitField<0, 1, u32> enable;
BitField<1, 4, u32> src_num;
BitField<5, 2, Pica::LightingRegs::LightingBumpMode> bump_mode;
BitField<7, 2, u32> bump_selector;
BitField<9, 1, u32> bump_renorm;
BitField<10, 1, u32> clamp_highlights;
BitField<11, 4, Pica::LightingRegs::LightingConfig> config;
BitField<15, 1, u32> enable_primary_alpha;
BitField<16, 1, u32> enable_secondary_alpha;
BitField<17, 1, u32> enable_shadow;
BitField<18, 1, u32> shadow_primary;
BitField<19, 1, u32> shadow_secondary;
BitField<20, 1, u32> shadow_invert;
BitField<21, 1, u32> shadow_alpha;
BitField<22, 2, u32> shadow_selector;
};
struct {
union {
BitField<0, 1, u32> enable;
BitField<1, 1, u32> abs_input;
BitField<2, 3, Pica::LightingRegs::LightingLutInput> type;
};
float scale;
} lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb;
} lighting;
struct {
union {
BitField<0, 1, u32> enable;
BitField<1, 2, u32> coord;
BitField<3, 3, Pica::TexturingRegs::ProcTexClamp> u_clamp;
BitField<6, 3, Pica::TexturingRegs::ProcTexClamp> v_clamp;
BitField<9, 4, Pica::TexturingRegs::ProcTexCombiner> color_combiner;
BitField<13, 4, Pica::TexturingRegs::ProcTexCombiner> alpha_combiner;
BitField<17, 3, Pica::TexturingRegs::ProcTexFilter> lut_filter;
BitField<20, 1, u32> separate_alpha;
BitField<21, 1, u32> noise_enable;
BitField<22, 2, Pica::TexturingRegs::ProcTexShift> u_shift;
BitField<24, 2, Pica::TexturingRegs::ProcTexShift> v_shift;
};
s32 lut_width;
s32 lut_offset0;
s32 lut_offset1;
s32 lut_offset2;
s32 lut_offset3;
u8 lod_min;
u8 lod_max;
} proctex;
struct {
bool emulate_blending;
Pica::FramebufferRegs::BlendEquation eq;
Pica::FramebufferRegs::BlendFactor src_factor;
Pica::FramebufferRegs::BlendFactor dst_factor;
} rgb_blend, alpha_blend;
};
/**
* This struct contains all state used to generate the GLSL fragment shader that emulates the
* current Pica register configuration. This struct is used as a cache key for generated GLSL shader
* programs. The functions in glsl_shader_gen.cpp should retrieve state from this struct only, not
* by directly accessing Pica registers. This should reduce the risk of bugs in shader generation
* where Pica state is not being captured in the shader cache key, thereby resulting in (what should
* be) two separate shaders sharing the same key.
*/
struct PicaFSConfig : Common::HashableStruct<PicaFSConfigState> {
PicaFSConfig(const Pica::Regs& regs, bool has_fragment_shader_interlock, bool emulate_logic_op,
bool emulate_custom_border_color, bool emulate_blend_minmax_factor,
bool use_custom_normal_map = false);
[[nodiscard]] bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
}
[[nodiscard]] bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
}
};
enum class AttribLoadFlags {
Float = 1 << 0,
Sint = 1 << 1,
@ -238,13 +103,6 @@ struct PicaFixedGSConfig : Common::HashableStruct<PicaGSConfigState> {
} // namespace Pica::Shader::Generator
namespace std {
template <>
struct hash<Pica::Shader::Generator::PicaFSConfig> {
std::size_t operator()(const Pica::Shader::Generator::PicaFSConfig& k) const noexcept {
return k.Hash();
}
};
template <>
struct hash<Pica::Shader::Generator::PicaVSConfig> {
std::size_t operator()(const Pica::Shader::Generator::PicaVSConfig& k) const noexcept {

View File

@ -2,9 +2,9 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "core/core.h"
#include "core/telemetry_session.h"
#include "video_core/shader/generator/spv_shader_gen.h"
#include "video_core/shader/generator/spv_fs_shader_gen.h"
namespace Pica::Shader::Generator::SPIRV {
using Pica::FramebufferRegs;
using Pica::LightingRegs;
@ -12,12 +12,10 @@ using Pica::RasterizerRegs;
using Pica::TexturingRegs;
using TevStageConfig = TexturingRegs::TevStageConfig;
namespace Pica::Shader::Generator::SPIRV {
constexpr u32 SPIRV_VERSION_1_3 = 0x00010300;
FragmentModule::FragmentModule(Core::TelemetrySession& telemetry_, const PicaFSConfig& config_)
: Sirit::Module{SPIRV_VERSION_1_3}, telemetry{telemetry_}, config{config_} {
FragmentModule::FragmentModule(const FSConfig& config_)
: Sirit::Module{SPIRV_VERSION_1_3}, config{config_} {
DefineArithmeticTypes();
DefineUniformStructs();
DefineInterface();
@ -37,38 +35,32 @@ void FragmentModule::Generate() {
secondary_fragment_color = ConstF32(0.f, 0.f, 0.f, 0.f);
// Do not do any sort of processing if it's obvious we're not going to pass the alpha test
if (config.state.alpha_test_func == Pica::FramebufferRegs::CompareFunc::Never) {
if (config.framebuffer.alpha_test_func == Pica::FramebufferRegs::CompareFunc::Never) {
OpKill();
OpFunctionEnd();
return;
}
// Check if the fragment is outside scissor rectangle
// Append the scissor and depth tests
WriteDepth();
WriteScissor();
// Write shader bytecode to emulate all enabled PICA lights
if (config.state.lighting.enable) {
WriteLighting();
}
WriteLighting();
combiner_buffer = ConstF32(0.f, 0.f, 0.f, 0.f);
next_combiner_buffer = GetShaderDataMember(vec_ids.Get(4), ConstS32(26));
last_tex_env_out = rounded_primary_color;
// Write shader bytecode to emulate PICA TEV stages
for (std::size_t index = 0; index < config.state.tev_stages.size(); ++index) {
WriteTevStage(static_cast<s32>(index));
for (u32 index = 0; index < config.texture.tev_stages.size(); ++index) {
WriteTevStage(index);
}
WriteAlphaTestCondition(config.state.alpha_test_func);
// After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use
// default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then
// do our own transformation according to PICA specification.
WriteDepth();
WriteAlphaTestCondition(config.framebuffer.alpha_test_func);
// Emulate the fog
switch (config.state.fog_mode) {
switch (config.texture.fog_mode) {
case TexturingRegs::FogMode::Fog:
WriteFog();
break;
@ -80,29 +72,27 @@ void FragmentModule::Generate() {
}
Id color{Byteround(last_tex_env_out, 4)};
if (config.state.emulate_logic_op) {
switch (config.state.logic_op) {
case FramebufferRegs::LogicOp::Clear:
color = ConstF32(0.f, 0.f, 0.f, 0.f);
break;
case FramebufferRegs::LogicOp::Set:
color = ConstF32(1.f, 1.f, 1.f, 1.f);
break;
case FramebufferRegs::LogicOp::Copy:
// Take the color output as-is
break;
case FramebufferRegs::LogicOp::CopyInverted:
// out += "color = ~color;\n";
break;
case FramebufferRegs::LogicOp::NoOp:
// We need to discard the color, but not necessarily the depth. This is not possible
// with fragment shader alone, so we emulate this behavior with the color mask.
break;
default:
LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}",
static_cast<u32>(config.state.logic_op.Value()));
UNIMPLEMENTED();
}
switch (config.framebuffer.logic_op) {
case FramebufferRegs::LogicOp::Clear:
color = ConstF32(0.f, 0.f, 0.f, 0.f);
break;
case FramebufferRegs::LogicOp::Set:
color = ConstF32(1.f, 1.f, 1.f, 1.f);
break;
case FramebufferRegs::LogicOp::Copy:
// Take the color output as-is
break;
case FramebufferRegs::LogicOp::CopyInverted:
// out += "color = ~color;\n";
break;
case FramebufferRegs::LogicOp::NoOp:
// We need to discard the color, but not necessarily the depth. This is not possible
// with fragment shader alone, so we emulate this behavior with the color mask.
break;
default:
LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}",
static_cast<u32>(config.framebuffer.logic_op.Value()));
UNIMPLEMENTED();
}
// Write output color
@ -119,7 +109,7 @@ void FragmentModule::WriteDepth() {
const Id depth_scale{GetShaderDataMember(f32_id, ConstS32(2))};
const Id depth_offset{GetShaderDataMember(f32_id, ConstS32(3))};
depth = OpFma(f32_id, z_over_w, depth_scale, depth_offset);
if (config.state.depthmap_enable == Pica::RasterizerRegs::DepthBuffering::WBuffering) {
if (config.framebuffer.depthmap_enable == Pica::RasterizerRegs::DepthBuffering::WBuffering) {
const Id gl_frag_coord_w{
OpLoad(f32_id, OpAccessChain(input_pointer_id, gl_frag_coord_id, ConstU32(3u)))};
depth = OpFDiv(f32_id, depth, gl_frag_coord_w);
@ -128,7 +118,7 @@ void FragmentModule::WriteDepth() {
}
void FragmentModule::WriteScissor() {
if (config.state.scissor_test_mode == RasterizerRegs::ScissorMode::Disabled) {
if (config.framebuffer.scissor_test_mode == RasterizerRegs::ScissorMode::Disabled) {
return;
}
@ -149,7 +139,7 @@ void FragmentModule::WriteScissor() {
const Id cond2{OpFOrdLessThan(bvec_ids.Get(2), gl_frag_coord_xy, scissor_2)};
Id result{OpAll(bool_id, OpCompositeConstruct(bvec_ids.Get(4), cond1, cond2))};
if (config.state.scissor_test_mode == RasterizerRegs::ScissorMode::Include) {
if (config.framebuffer.scissor_test_mode == RasterizerRegs::ScissorMode::Include) {
result = OpLogicalNot(bool_id, result);
}
@ -167,7 +157,7 @@ void FragmentModule::WriteScissor() {
void FragmentModule::WriteFog() {
// Get index into fog LUT
Id fog_index{};
if (config.state.fog_flip) {
if (config.texture.fog_flip) {
fog_index = OpFMul(f32_id, OpFSub(f32_id, ConstF32(1.f), depth), ConstF32(128.f));
} else {
fog_index = OpFMul(f32_id, depth, ConstF32(128.f));
@ -201,14 +191,17 @@ void FragmentModule::WriteFog() {
void FragmentModule::WriteGas() {
// TODO: Implement me
telemetry.AddField(Common::Telemetry::FieldType::Session, "VideoCore_Pica_UseGasMode", true);
LOG_CRITICAL(Render, "Unimplemented gas mode");
OpKill();
OpFunctionEnd();
}
void FragmentModule::WriteLighting() {
const auto& lighting = config.state.lighting;
if (!config.lighting.enable) {
return;
}
const auto& lighting = config.lighting;
// Define lighting globals
Id diffuse_sum{ConstF32(0.f, 0.f, 0.f, 1.f)};
@ -363,7 +356,7 @@ void FragmentModule::WriteLighting() {
const Id sampler_index{ConstU32(static_cast<u32>(sampler))};
if (abs) {
// LUT index is in the range of (0.0, 1.0)
index = lighting.light[light_num].two_sided_diffuse
index = lighting.lights[light_num].two_sided_diffuse
? OpFAbs(f32_id, index)
: OpFMax(f32_id, index, ConstF32(0.f));
return lookup_lighting_lut_unsigned(sampler_index, index);
@ -375,11 +368,12 @@ void FragmentModule::WriteLighting() {
// Write the code to emulate each enabled light
for (u32 light_index = 0; light_index < lighting.src_num; ++light_index) {
const auto& light_config = lighting.light[light_index];
const auto& light_config = lighting.lights[light_index];
const auto GetLightMember = [&](s32 member) -> Id {
const Id member_type = member < 6 ? vec_ids.Get(3) : f32_id;
const Id light_num{ConstS32(static_cast<s32>(lighting.light[light_index].num.Value()))};
const Id light_num{
ConstS32(static_cast<s32>(lighting.lights[light_index].num.Value()))};
return GetShaderDataMember(member_type, ConstS32(24), light_num, ConstS32(member));
};
@ -595,7 +589,7 @@ void FragmentModule::WriteLighting() {
void FragmentModule::WriteTevStage(s32 index) {
const TexturingRegs::TevStageConfig stage =
static_cast<const TexturingRegs::TevStageConfig>(config.state.tev_stages[index]);
static_cast<const TexturingRegs::TevStageConfig>(config.texture.tev_stages[index]);
// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code)
const auto is_passthrough_tev_stage = [](const TevStageConfig& stage) {
@ -860,8 +854,6 @@ Id FragmentModule::AppendProcTexCombineAndMap(ProcTexCombiner combiner, Id u, Id
}
void FragmentModule::DefineTexSampler(u32 texture_unit) {
const PicaFSConfigState& state = config.state;
const Id func_type{TypeFunction(vec_ids.Get(4))};
sample_tex_unit_func[texture_unit] =
OpFunction(vec_ids.Get(4), spv::FunctionControlMask::MaskNone, func_type);
@ -869,14 +861,15 @@ void FragmentModule::DefineTexSampler(u32 texture_unit) {
const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)};
if (texture_unit == 0 && state.texture0_type == TexturingRegs::TextureConfig::Disabled) {
if (texture_unit == 0 &&
config.texture.texture0_type == TexturingRegs::TextureConfig::Disabled) {
OpReturnValue(zero_vec);
OpFunctionEnd();
return;
}
if (texture_unit == 3) {
if (state.proctex.enable) {
if (config.proctex.enable) {
OpReturnValue(ProcTexSampler());
} else {
OpReturnValue(zero_vec);
@ -888,10 +881,10 @@ void FragmentModule::DefineTexSampler(u32 texture_unit) {
const Id border_label{OpLabel()};
const Id not_border_label{OpLabel()};
u32 texcoord_num = texture_unit == 2 && state.texture2_use_coord1 ? 1 : texture_unit;
u32 texcoord_num = texture_unit == 2 && config.texture.texture2_use_coord1 ? 1 : texture_unit;
const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id[texcoord_num])};
auto& texture_border_color = state.texture_border_color[texture_unit];
const auto& texture_border_color = config.texture.texture_border_color[texture_unit];
if (texture_border_color.enable_s || texture_border_color.enable_t) {
const Id texcoord_s{OpCompositeExtract(f32_id, texcoord, 0)};
const Id texcoord_t{OpCompositeExtract(f32_id, texcoord, 1)};
@ -960,7 +953,7 @@ void FragmentModule::DefineTexSampler(u32 texture_unit) {
switch (texture_unit) {
case 0:
// Only unit 0 respects the texturing type
switch (state.texture0_type) {
switch (config.texture.texture0_type) {
case Pica::TexturingRegs::TextureConfig::Texture2D:
ret_val = sample_lod(tex0_id);
break;
@ -976,7 +969,8 @@ void FragmentModule::DefineTexSampler(u32 texture_unit) {
// return "shadowTextureCube(texcoord0, texcoord0_w)";
break;
default:
LOG_CRITICAL(Render, "Unhandled texture type {:x}", state.texture0_type.Value());
LOG_CRITICAL(Render, "Unhandled texture type {:x}",
config.texture.texture0_type.Value());
UNIMPLEMENTED();
ret_val = zero_vec;
break;
@ -999,7 +993,7 @@ void FragmentModule::DefineTexSampler(u32 texture_unit) {
Id FragmentModule::ProcTexSampler() {
// Define noise tables at the beginning of the function
if (config.state.proctex.noise_enable) {
if (config.proctex.noise_enable) {
noise1d_table =
DefineVar<false>(TypeArray(i32_id, ConstU32(16u)), spv::StorageClass::Function);
noise2d_table =
@ -1008,8 +1002,8 @@ Id FragmentModule::ProcTexSampler() {
lut_offsets = DefineVar<false>(TypeArray(i32_id, ConstU32(8u)), spv::StorageClass::Function);
Id uv{};
if (config.state.proctex.coord < 3) {
const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id[config.state.proctex.coord.Value()])};
if (config.proctex.coord < 3) {
const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id[config.proctex.coord.Value()])};
uv = OpFAbs(vec_ids.Get(2), texcoord);
} else {
LOG_CRITICAL(Render, "Unexpected proctex.coord >= 3");
@ -1027,26 +1021,24 @@ Id FragmentModule::ProcTexSampler() {
// unlike normal texture, the bias is inside the log2
const Id proctex_bias{GetShaderDataMember(f32_id, ConstS32(16))};
const Id bias{
OpFMul(f32_id, ConstF32(static_cast<f32>(config.state.proctex.lut_width)), proctex_bias)};
OpFMul(f32_id, ConstF32(static_cast<f32>(config.proctex.lut_width)), proctex_bias)};
const Id duv_xy{
OpFAdd(f32_id, OpCompositeExtract(f32_id, duv, 0), OpCompositeExtract(f32_id, duv, 1))};
Id lod{OpLog2(f32_id, OpFMul(f32_id, OpFAbs(f32_id, bias), duv_xy))};
lod = OpSelect(f32_id, OpFOrdEqual(bool_id, proctex_bias, ConstF32(0.f)), ConstF32(0.f), lod);
lod = OpFClamp(f32_id, lod,
ConstF32(std::max(0.0f, static_cast<float>(config.state.proctex.lod_min))),
ConstF32(std::min(7.0f, static_cast<float>(config.state.proctex.lod_max))));
lod =
OpFClamp(f32_id, lod, ConstF32(std::max(0.0f, static_cast<float>(config.proctex.lod_min))),
ConstF32(std::min(7.0f, static_cast<float>(config.proctex.lod_max))));
// Get shift offset before noise generation
const Id u_shift{AppendProcTexShiftOffset(OpCompositeExtract(f32_id, uv, 1),
config.state.proctex.u_shift,
config.state.proctex.u_clamp)};
config.proctex.u_shift, config.proctex.u_clamp)};
const Id v_shift{AppendProcTexShiftOffset(OpCompositeExtract(f32_id, uv, 0),
config.state.proctex.v_shift,
config.state.proctex.v_clamp)};
config.proctex.v_shift, config.proctex.v_clamp)};
// Generate noise
if (config.state.proctex.noise_enable) {
if (config.proctex.noise_enable) {
const Id proctex_noise_a{GetShaderDataMember(vec_ids.Get(2), ConstS32(21))};
const Id noise_coef{ProcTexNoiseCoef(uv)};
uv = OpFAdd(vec_ids.Get(2), uv,
@ -1059,16 +1051,16 @@ Id FragmentModule::ProcTexSampler() {
Id v{OpFAdd(f32_id, OpCompositeExtract(f32_id, uv, 1), v_shift)};
// Clamp
u = AppendProcTexClamp(u, config.state.proctex.u_clamp);
v = AppendProcTexClamp(v, config.state.proctex.v_clamp);
u = AppendProcTexClamp(u, config.proctex.u_clamp);
v = AppendProcTexClamp(v, config.proctex.v_clamp);
// Combine and map
const Id proctex_color_map_offset{GetShaderDataMember(i32_id, ConstS32(12))};
const Id lut_coord{AppendProcTexCombineAndMap(config.state.proctex.color_combiner, u, v,
proctex_color_map_offset)};
const Id lut_coord{
AppendProcTexCombineAndMap(config.proctex.color_combiner, u, v, proctex_color_map_offset)};
Id final_color{};
switch (config.state.proctex.lut_filter) {
switch (config.proctex.lut_filter) {
case ProcTexFilter::Linear:
case ProcTexFilter::Nearest: {
final_color = SampleProcTexColor(lut_coord, ConstS32(0));
@ -1090,9 +1082,9 @@ Id FragmentModule::ProcTexSampler() {
}
}
if (config.state.proctex.separate_alpha) {
if (config.proctex.separate_alpha) {
const Id proctex_alpha_map_offset{GetShaderDataMember(i32_id, ConstS32(13))};
const Id final_alpha{AppendProcTexCombineAndMap(config.state.proctex.alpha_combiner, u, v,
const Id final_alpha{AppendProcTexCombineAndMap(config.proctex.alpha_combiner, u, v,
proctex_alpha_map_offset)};
final_color = OpCompositeInsert(vec_ids.Get(4), final_alpha, final_color, 3);
}
@ -1189,13 +1181,11 @@ Id FragmentModule::ProcTexNoiseCoef(Id x) {
}
Id FragmentModule::SampleProcTexColor(Id lut_coord, Id level) {
const Id lut_width{
OpShiftRightArithmetic(i32_id, ConstS32(config.state.proctex.lut_width), level)};
const Id lut_width{OpShiftRightArithmetic(i32_id, ConstS32(config.proctex.lut_width), level)};
const Id lut_ptr{TypePointer(spv::StorageClass::Function, i32_id)};
// Offsets for level 4-7 seem to be hardcoded
InitTableS32(lut_offsets, config.state.proctex.lut_offset0, config.state.proctex.lut_offset1,
config.state.proctex.lut_offset2, config.state.proctex.lut_offset3, 0xF0, 0xF8,
0xFC, 0xFE);
InitTableS32(lut_offsets, config.proctex.lut_offset0, config.proctex.lut_offset1,
config.proctex.lut_offset2, config.proctex.lut_offset3, 0xF0, 0xF8, 0xFC, 0xFE);
const Id lut_offset{OpLoad(i32_id, OpAccessChain(lut_ptr, lut_offsets, level))};
// For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1]
lut_coord =
@ -1209,7 +1199,7 @@ Id FragmentModule::SampleProcTexColor(Id lut_coord, Id level) {
const Id proctex_lut_offset{GetShaderDataMember(i32_id, ConstS32(14))};
const Id lut_rgba{OpImage(image_buffer_id, texture_buffer_lut_rgba)};
switch (config.state.proctex.lut_filter) {
switch (config.proctex.lut_filter) {
case ProcTexFilter::Linear:
case ProcTexFilter::LinearMipmapLinear:
case ProcTexFilter::LinearMipmapNearest: {
@ -1549,9 +1539,8 @@ void FragmentModule::DefineInterface() {
Decorate(gl_frag_depth_id, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
}
std::vector<u32> GenerateFragmentShader(const PicaFSConfig& config) {
auto& telemetry = Core::System::GetInstance().TelemetrySession();
FragmentModule module{telemetry, config};
std::vector<u32> GenerateFragmentShader(const FSConfig& config) {
FragmentModule module{config};
module.Generate();
return module.Assemble();
}

View File

@ -7,11 +7,7 @@
#include <array>
#include <sirit/sirit.h>
#include "video_core/shader/generator/shader_gen.h"
namespace Core {
class TelemetrySession;
}
#include "video_core/shader/generator/pica_fs_config.h"
namespace Pica::Shader::Generator::SPIRV {
@ -34,7 +30,7 @@ class FragmentModule : public Sirit::Module {
static constexpr u32 NUM_NON_PROC_TEX_UNITS = 3;
public:
explicit FragmentModule(Core::TelemetrySession& telemetry, const PicaFSConfig& config);
explicit FragmentModule(const FSConfig& config);
~FragmentModule();
/// Emits SPIR-V bytecode corresponding to the provided pica fragment configuration
@ -218,8 +214,7 @@ private:
Id CompareShadow(Id pixel, Id z);
private:
Core::TelemetrySession& telemetry;
PicaFSConfig config;
const FSConfig& config;
Id void_id{};
Id bool_id{};
Id f32_id{};
@ -289,6 +284,6 @@ private:
* @param separable_shader generates shader that can be used for separate shader object
* @returns String of the shader source code
*/
std::vector<u32> GenerateFragmentShader(const PicaFSConfig& config);
std::vector<u32> GenerateFragmentShader(const FSConfig& config);
} // namespace Pica::Shader::Generator::SPIRV