Merge pull request #3281 from jroweboy/texcache-pt2
Texture Cache Rework
This commit is contained in:
commit
4befbddc34
@ -89,7 +89,7 @@ void Config::ReadValues() {
|
||||
Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", true);
|
||||
Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true);
|
||||
Settings::values.resolution_factor =
|
||||
(float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0);
|
||||
static_cast<u16>(sdl2_config->GetInteger("Renderer", "resolution_factor", 1));
|
||||
Settings::values.use_vsync = sdl2_config->GetBoolean("Renderer", "use_vsync", false);
|
||||
Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true);
|
||||
Settings::values.frame_limit =
|
||||
|
@ -75,7 +75,8 @@ void Config::ReadValues() {
|
||||
qt_config->beginGroup("Renderer");
|
||||
Settings::values.use_hw_renderer = qt_config->value("use_hw_renderer", true).toBool();
|
||||
Settings::values.use_shader_jit = qt_config->value("use_shader_jit", true).toBool();
|
||||
Settings::values.resolution_factor = qt_config->value("resolution_factor", 1.0).toFloat();
|
||||
Settings::values.resolution_factor =
|
||||
static_cast<u16>(qt_config->value("resolution_factor", 1).toInt());
|
||||
Settings::values.use_vsync = qt_config->value("use_vsync", false).toBool();
|
||||
Settings::values.use_frame_limit = qt_config->value("use_frame_limit", true).toBool();
|
||||
Settings::values.frame_limit = qt_config->value("frame_limit", 100).toInt();
|
||||
@ -240,7 +241,7 @@ void Config::SaveValues() {
|
||||
qt_config->beginGroup("Renderer");
|
||||
qt_config->setValue("use_hw_renderer", Settings::values.use_hw_renderer);
|
||||
qt_config->setValue("use_shader_jit", Settings::values.use_shader_jit);
|
||||
qt_config->setValue("resolution_factor", (double)Settings::values.resolution_factor);
|
||||
qt_config->setValue("resolution_factor", Settings::values.resolution_factor);
|
||||
qt_config->setValue("use_vsync", Settings::values.use_vsync);
|
||||
qt_config->setValue("use_frame_limit", Settings::values.use_frame_limit);
|
||||
qt_config->setValue("frame_limit", Settings::values.frame_limit);
|
||||
|
@ -23,81 +23,11 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
|
||||
|
||||
ConfigureGraphics::~ConfigureGraphics() {}
|
||||
|
||||
enum class Resolution : int {
|
||||
Auto,
|
||||
Scale1x,
|
||||
Scale2x,
|
||||
Scale3x,
|
||||
Scale4x,
|
||||
Scale5x,
|
||||
Scale6x,
|
||||
Scale7x,
|
||||
Scale8x,
|
||||
Scale9x,
|
||||
Scale10x,
|
||||
};
|
||||
|
||||
float ToResolutionFactor(Resolution option) {
|
||||
switch (option) {
|
||||
case Resolution::Auto:
|
||||
return 0.f;
|
||||
case Resolution::Scale1x:
|
||||
return 1.f;
|
||||
case Resolution::Scale2x:
|
||||
return 2.f;
|
||||
case Resolution::Scale3x:
|
||||
return 3.f;
|
||||
case Resolution::Scale4x:
|
||||
return 4.f;
|
||||
case Resolution::Scale5x:
|
||||
return 5.f;
|
||||
case Resolution::Scale6x:
|
||||
return 6.f;
|
||||
case Resolution::Scale7x:
|
||||
return 7.f;
|
||||
case Resolution::Scale8x:
|
||||
return 8.f;
|
||||
case Resolution::Scale9x:
|
||||
return 9.f;
|
||||
case Resolution::Scale10x:
|
||||
return 10.f;
|
||||
}
|
||||
return 0.f;
|
||||
}
|
||||
|
||||
Resolution FromResolutionFactor(float factor) {
|
||||
if (factor == 0.f) {
|
||||
return Resolution::Auto;
|
||||
} else if (factor == 1.f) {
|
||||
return Resolution::Scale1x;
|
||||
} else if (factor == 2.f) {
|
||||
return Resolution::Scale2x;
|
||||
} else if (factor == 3.f) {
|
||||
return Resolution::Scale3x;
|
||||
} else if (factor == 4.f) {
|
||||
return Resolution::Scale4x;
|
||||
} else if (factor == 5.f) {
|
||||
return Resolution::Scale5x;
|
||||
} else if (factor == 6.f) {
|
||||
return Resolution::Scale6x;
|
||||
} else if (factor == 7.f) {
|
||||
return Resolution::Scale7x;
|
||||
} else if (factor == 8.f) {
|
||||
return Resolution::Scale8x;
|
||||
} else if (factor == 9.f) {
|
||||
return Resolution::Scale9x;
|
||||
} else if (factor == 10.f) {
|
||||
return Resolution::Scale10x;
|
||||
}
|
||||
return Resolution::Auto;
|
||||
}
|
||||
|
||||
void ConfigureGraphics::setConfiguration() {
|
||||
ui->toggle_hw_renderer->setChecked(Settings::values.use_hw_renderer);
|
||||
ui->resolution_factor_combobox->setEnabled(Settings::values.use_hw_renderer);
|
||||
ui->toggle_shader_jit->setChecked(Settings::values.use_shader_jit);
|
||||
ui->resolution_factor_combobox->setCurrentIndex(
|
||||
static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor)));
|
||||
ui->resolution_factor_combobox->setCurrentIndex(Settings::values.resolution_factor);
|
||||
ui->toggle_vsync->setChecked(Settings::values.use_vsync);
|
||||
ui->toggle_frame_limit->setChecked(Settings::values.use_frame_limit);
|
||||
ui->frame_limit->setValue(Settings::values.frame_limit);
|
||||
@ -109,7 +39,7 @@ void ConfigureGraphics::applyConfiguration() {
|
||||
Settings::values.use_hw_renderer = ui->toggle_hw_renderer->isChecked();
|
||||
Settings::values.use_shader_jit = ui->toggle_shader_jit->isChecked();
|
||||
Settings::values.resolution_factor =
|
||||
ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex()));
|
||||
static_cast<u16>(ui->resolution_factor_combobox->currentIndex());
|
||||
Settings::values.use_vsync = ui->toggle_vsync->isChecked();
|
||||
Settings::values.use_frame_limit = ui->toggle_frame_limit->isChecked();
|
||||
Settings::values.frame_limit = ui->frame_limit->value();
|
||||
|
@ -29,7 +29,7 @@ struct Rectangle {
|
||||
T right;
|
||||
T bottom;
|
||||
|
||||
Rectangle() {}
|
||||
Rectangle() = default;
|
||||
|
||||
Rectangle(T left, T top, T right, T bottom)
|
||||
: left(left), top(top), right(right), bottom(bottom) {}
|
||||
|
@ -16,8 +16,8 @@ static const float TOP_SCREEN_ASPECT_RATIO =
|
||||
static const float BOT_SCREEN_ASPECT_RATIO =
|
||||
static_cast<float>(Core::kScreenBottomHeight) / Core::kScreenBottomWidth;
|
||||
|
||||
float FramebufferLayout::GetScalingRatio() const {
|
||||
return static_cast<float>(top_screen.GetWidth()) / Core::kScreenTopWidth;
|
||||
u16 FramebufferLayout::GetScalingRatio() const {
|
||||
return static_cast<u16>(((top_screen.GetWidth() - 1) / Core::kScreenTopWidth) + 1);
|
||||
}
|
||||
|
||||
// Finds the largest size subrectangle contained in window area that is confined to the aspect ratio
|
||||
|
@ -21,7 +21,7 @@ struct FramebufferLayout {
|
||||
* Returns the ration of pixel size of the top screen, compared to the native size of the 3DS
|
||||
* screen.
|
||||
*/
|
||||
float GetScalingRatio() const;
|
||||
u16 GetScalingRatio() const;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -58,7 +58,6 @@ void VMManager::Reset() {
|
||||
|
||||
page_table.pointers.fill(nullptr);
|
||||
page_table.attributes.fill(Memory::PageType::Unmapped);
|
||||
page_table.cached_res_count.fill(0);
|
||||
|
||||
UpdatePageTableForVMA(initial_vma);
|
||||
}
|
||||
|
@ -465,7 +465,7 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
|
||||
command.dma_request.size, Memory::FlushMode::Flush);
|
||||
Memory::RasterizerFlushVirtualRegion(command.dma_request.dest_address,
|
||||
command.dma_request.size,
|
||||
Memory::FlushMode::FlushAndInvalidate);
|
||||
Memory::FlushMode::Invalidate);
|
||||
|
||||
// TODO(Subv): These memory accesses should not go through the application's memory mapping.
|
||||
// They should go through the GSP module's memory mapping.
|
||||
|
@ -96,20 +96,11 @@ static void MemoryFill(const Regs::MemoryFillConfig& config) {
|
||||
u8* start = Memory::GetPhysicalPointer(start_addr);
|
||||
u8* end = Memory::GetPhysicalPointer(end_addr);
|
||||
|
||||
// TODO: Consider always accelerating and returning vector of
|
||||
// regions that the accelerated fill did not cover to
|
||||
// reduce/eliminate the fill that the cpu has to do.
|
||||
// This would also mean that the flush below is not needed.
|
||||
// Fill should first flush all surfaces that touch but are
|
||||
// not completely within the fill range.
|
||||
// Then fill all completely covered surfaces, and return the
|
||||
// regions that were between surfaces or within the touching
|
||||
// ones for cpu to manually fill here.
|
||||
if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config))
|
||||
return;
|
||||
|
||||
Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(),
|
||||
config.GetEndAddress() - config.GetStartAddress());
|
||||
Memory::RasterizerInvalidateRegion(config.GetStartAddress(),
|
||||
config.GetEndAddress() - config.GetStartAddress());
|
||||
|
||||
if (config.fill_24bit) {
|
||||
// fill with 24-bit values
|
||||
@ -199,7 +190,7 @@ static void DisplayTransfer(const Regs::DisplayTransferConfig& config) {
|
||||
u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
|
||||
|
||||
Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size);
|
||||
Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
|
||||
Memory::RasterizerInvalidateRegion(config.GetPhysicalOutputAddress(), output_size);
|
||||
|
||||
for (u32 y = 0; y < output_height; ++y) {
|
||||
for (u32 x = 0; x < output_width; ++x) {
|
||||
@ -363,8 +354,10 @@ static void TextureCopy(const Regs::DisplayTransferConfig& config) {
|
||||
|
||||
size_t contiguous_output_size =
|
||||
config.texture_copy.size / output_width * (output_width + output_gap);
|
||||
Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(),
|
||||
static_cast<u32>(contiguous_output_size));
|
||||
// Only need to flush output if it has a gap
|
||||
const auto FlushInvalidate_fn = (output_gap != 0) ? Memory::RasterizerFlushAndInvalidateRegion
|
||||
: Memory::RasterizerInvalidateRegion;
|
||||
FlushInvalidate_fn(config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size));
|
||||
|
||||
u32 remaining_input = input_width;
|
||||
u32 remaining_output = output_width;
|
||||
@ -570,4 +563,4 @@ void Shutdown() {
|
||||
LOG_DEBUG(HW_GPU, "shutdown OK");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace GPU
|
||||
|
@ -50,7 +50,6 @@ static void MapPages(PageTable& page_table, u32 base, u32 size, u8* memory, Page
|
||||
|
||||
page_table.attributes[base] = type;
|
||||
page_table.pointers[base] = memory;
|
||||
page_table.cached_res_count[base] = 0;
|
||||
|
||||
base += 1;
|
||||
if (memory != nullptr)
|
||||
@ -200,7 +199,7 @@ void Write(const VAddr vaddr, const T data) {
|
||||
ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr);
|
||||
break;
|
||||
case PageType::RasterizerCachedMemory: {
|
||||
RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::FlushAndInvalidate);
|
||||
RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate);
|
||||
std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T));
|
||||
break;
|
||||
}
|
||||
@ -208,7 +207,7 @@ void Write(const VAddr vaddr, const T data) {
|
||||
WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
|
||||
break;
|
||||
case PageType::RasterizerCachedSpecial: {
|
||||
RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::FlushAndInvalidate);
|
||||
RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate);
|
||||
WriteMMIO<T>(GetMMIOHandler(vaddr), vaddr, data);
|
||||
break;
|
||||
}
|
||||
@ -334,7 +333,7 @@ u8* GetPhysicalPointer(PAddr address) {
|
||||
return target_pointer;
|
||||
}
|
||||
|
||||
void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
|
||||
void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached) {
|
||||
if (start == 0) {
|
||||
return;
|
||||
}
|
||||
@ -355,14 +354,10 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
|
||||
}
|
||||
VAddr vaddr = *maybe_vaddr;
|
||||
|
||||
u8& res_count = current_page_table->cached_res_count[vaddr >> PAGE_BITS];
|
||||
ASSERT_MSG(count_delta <= UINT8_MAX - res_count,
|
||||
"Rasterizer resource cache counter overflow!");
|
||||
ASSERT_MSG(count_delta >= -res_count, "Rasterizer resource cache counter underflow!");
|
||||
PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
|
||||
|
||||
// Switch page type to cached if now cached
|
||||
if (res_count == 0) {
|
||||
PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
|
||||
if (cached) {
|
||||
// Switch page type to cached if now cached
|
||||
switch (page_type) {
|
||||
case PageType::Unmapped:
|
||||
// It is not necessary for a process to have this region mapped into its address
|
||||
@ -378,13 +373,8 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
res_count += count_delta;
|
||||
|
||||
// Switch page type to uncached if now uncached
|
||||
if (res_count == 0) {
|
||||
PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
|
||||
} else {
|
||||
// Switch page type to uncached if now uncached
|
||||
switch (page_type) {
|
||||
case PageType::Unmapped:
|
||||
// It is not necessary for a process to have this region mapped into its address
|
||||
@ -414,52 +404,69 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
|
||||
}
|
||||
|
||||
void RasterizerFlushRegion(PAddr start, u32 size) {
|
||||
if (VideoCore::g_renderer != nullptr) {
|
||||
VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size);
|
||||
if (VideoCore::g_renderer == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
VideoCore::g_renderer->Rasterizer()->FlushRegion(start, size);
|
||||
}
|
||||
|
||||
void RasterizerInvalidateRegion(PAddr start, u32 size) {
|
||||
if (VideoCore::g_renderer == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
VideoCore::g_renderer->Rasterizer()->InvalidateRegion(start, size);
|
||||
}
|
||||
|
||||
void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size) {
|
||||
// Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
|
||||
// null here
|
||||
if (VideoCore::g_renderer != nullptr) {
|
||||
VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(start, size);
|
||||
if (VideoCore::g_renderer == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
VideoCore::g_renderer->Rasterizer()->FlushAndInvalidateRegion(start, size);
|
||||
}
|
||||
|
||||
void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode) {
|
||||
// Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
|
||||
// null here
|
||||
if (VideoCore::g_renderer != nullptr) {
|
||||
VAddr end = start + size;
|
||||
|
||||
auto CheckRegion = [&](VAddr region_start, VAddr region_end) {
|
||||
if (start >= region_end || end <= region_start) {
|
||||
// No overlap with region
|
||||
return;
|
||||
}
|
||||
|
||||
VAddr overlap_start = std::max(start, region_start);
|
||||
VAddr overlap_end = std::min(end, region_end);
|
||||
|
||||
PAddr physical_start = TryVirtualToPhysicalAddress(overlap_start).value();
|
||||
u32 overlap_size = overlap_end - overlap_start;
|
||||
|
||||
auto* rasterizer = VideoCore::g_renderer->Rasterizer();
|
||||
switch (mode) {
|
||||
case FlushMode::Flush:
|
||||
rasterizer->FlushRegion(physical_start, overlap_size);
|
||||
break;
|
||||
case FlushMode::FlushAndInvalidate:
|
||||
rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
CheckRegion(LINEAR_HEAP_VADDR, LINEAR_HEAP_VADDR_END);
|
||||
CheckRegion(NEW_LINEAR_HEAP_VADDR, NEW_LINEAR_HEAP_VADDR_END);
|
||||
CheckRegion(VRAM_VADDR, VRAM_VADDR_END);
|
||||
if (VideoCore::g_renderer == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
VAddr end = start + size;
|
||||
|
||||
auto CheckRegion = [&](VAddr region_start, VAddr region_end) {
|
||||
if (start >= region_end || end <= region_start) {
|
||||
// No overlap with region
|
||||
return;
|
||||
}
|
||||
|
||||
VAddr overlap_start = std::max(start, region_start);
|
||||
VAddr overlap_end = std::min(end, region_end);
|
||||
|
||||
PAddr physical_start = TryVirtualToPhysicalAddress(overlap_start).value();
|
||||
u32 overlap_size = overlap_end - overlap_start;
|
||||
|
||||
auto* rasterizer = VideoCore::g_renderer->Rasterizer();
|
||||
switch (mode) {
|
||||
case FlushMode::Flush:
|
||||
rasterizer->FlushRegion(physical_start, overlap_size);
|
||||
break;
|
||||
case FlushMode::Invalidate:
|
||||
rasterizer->InvalidateRegion(physical_start, overlap_size);
|
||||
break;
|
||||
case FlushMode::FlushAndInvalidate:
|
||||
rasterizer->FlushAndInvalidateRegion(physical_start, overlap_size);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
CheckRegion(LINEAR_HEAP_VADDR, LINEAR_HEAP_VADDR_END);
|
||||
CheckRegion(NEW_LINEAR_HEAP_VADDR, NEW_LINEAR_HEAP_VADDR_END);
|
||||
CheckRegion(VRAM_VADDR, VRAM_VADDR_END);
|
||||
}
|
||||
|
||||
u8 Read8(const VAddr addr) {
|
||||
@ -588,7 +595,7 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
|
||||
}
|
||||
case PageType::RasterizerCachedMemory: {
|
||||
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
|
||||
FlushMode::FlushAndInvalidate);
|
||||
FlushMode::Invalidate);
|
||||
std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount);
|
||||
break;
|
||||
}
|
||||
@ -596,7 +603,7 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
|
||||
MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
|
||||
DEBUG_ASSERT(handler);
|
||||
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
|
||||
FlushMode::FlushAndInvalidate);
|
||||
FlushMode::Invalidate);
|
||||
handler->WriteBlock(current_vaddr, src_buffer, copy_amount);
|
||||
break;
|
||||
}
|
||||
@ -648,7 +655,7 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size
|
||||
}
|
||||
case PageType::RasterizerCachedMemory: {
|
||||
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
|
||||
FlushMode::FlushAndInvalidate);
|
||||
FlushMode::Invalidate);
|
||||
std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount);
|
||||
break;
|
||||
}
|
||||
@ -656,7 +663,7 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size
|
||||
MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
|
||||
DEBUG_ASSERT(handler);
|
||||
RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
|
||||
FlushMode::FlushAndInvalidate);
|
||||
FlushMode::Invalidate);
|
||||
handler->WriteBlock(current_vaddr, zeros.data(), copy_amount);
|
||||
break;
|
||||
}
|
||||
|
@ -72,12 +72,6 @@ struct PageTable {
|
||||
* the corresponding entry in `pointers` MUST be set to null.
|
||||
*/
|
||||
std::array<PageType, PAGE_TABLE_NUM_ENTRIES> attributes;
|
||||
|
||||
/**
|
||||
* Indicates the number of externally cached resources touching a page that should be
|
||||
* flushed before the memory is accessed
|
||||
*/
|
||||
std::array<u8, PAGE_TABLE_NUM_ENTRIES> cached_res_count;
|
||||
};
|
||||
|
||||
/// Physical memory regions as seen from the ARM11
|
||||
@ -245,16 +239,20 @@ boost::optional<VAddr> PhysicalToVirtualAddress(PAddr addr);
|
||||
u8* GetPhysicalPointer(PAddr address);
|
||||
|
||||
/**
|
||||
* Adds the supplied value to the rasterizer resource cache counter of each
|
||||
* page touching the region.
|
||||
* Mark each page touching the region as cached.
|
||||
*/
|
||||
void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta);
|
||||
void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached);
|
||||
|
||||
/**
|
||||
* Flushes any externally cached rasterizer resources touching the given region.
|
||||
*/
|
||||
void RasterizerFlushRegion(PAddr start, u32 size);
|
||||
|
||||
/**
|
||||
* Invalidates any externally cached rasterizer resources touching the given region.
|
||||
*/
|
||||
void RasterizerInvalidateRegion(PAddr start, u32 size);
|
||||
|
||||
/**
|
||||
* Flushes and invalidates any externally cached rasterizer resources touching the given region.
|
||||
*/
|
||||
@ -263,6 +261,8 @@ void RasterizerFlushAndInvalidateRegion(PAddr start, u32 size);
|
||||
enum class FlushMode {
|
||||
/// Write back modified surfaces to RAM
|
||||
Flush,
|
||||
/// Remove region from the cache
|
||||
Invalidate,
|
||||
/// Write back modified surfaces to RAM, and also remove them from the cache
|
||||
FlushAndInvalidate,
|
||||
};
|
||||
|
@ -95,7 +95,7 @@ struct Values {
|
||||
// Renderer
|
||||
bool use_hw_renderer;
|
||||
bool use_shader_jit;
|
||||
float resolution_factor;
|
||||
u16 resolution_factor;
|
||||
bool use_vsync;
|
||||
bool use_frame_limit;
|
||||
u16 frame_limit;
|
||||
|
@ -20,7 +20,6 @@ TestEnvironment::TestEnvironment(bool mutable_memory_)
|
||||
|
||||
page_table->pointers.fill(nullptr);
|
||||
page_table->attributes.fill(Memory::PageType::Unmapped);
|
||||
page_table->cached_res_count.fill(0);
|
||||
|
||||
Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
|
||||
Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);
|
||||
|
@ -38,6 +38,9 @@ public:
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
|
||||
virtual void FlushRegion(PAddr addr, u32 size) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||
virtual void InvalidateRegion(PAddr addr, u32 size) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
|
||||
/// and invalidated
|
||||
virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0;
|
||||
|
@ -7,8 +7,8 @@
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <glad/glad.h>
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/color.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/math_util.h"
|
||||
#include "common/microprofile.h"
|
||||
@ -23,6 +23,9 @@
|
||||
#include "video_core/renderer_opengl/pica_to_gl.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
|
||||
using PixelFormat = SurfaceParams::PixelFormat;
|
||||
using SurfaceType = SurfaceParams::SurfaceType;
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
|
||||
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
|
||||
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
|
||||
@ -227,21 +230,68 @@ void RasterizerOpenGL::DrawTriangles() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
|
||||
// Sync and bind the framebuffer surfaces
|
||||
CachedSurface* color_surface;
|
||||
CachedSurface* depth_surface;
|
||||
MathUtil::Rectangle<int> rect;
|
||||
std::tie(color_surface, depth_surface, rect) =
|
||||
res_cache.GetFramebufferSurfaces(regs.framebuffer.framebuffer);
|
||||
const bool has_stencil =
|
||||
regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8;
|
||||
|
||||
const bool write_color_fb =
|
||||
state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE ||
|
||||
state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE;
|
||||
|
||||
const bool write_depth_fb =
|
||||
(state.depth.test_enabled && state.depth.write_mask == GL_TRUE) ||
|
||||
(has_stencil && state.stencil.test_enabled && state.stencil.write_mask != 0);
|
||||
|
||||
const bool using_color_fb =
|
||||
regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb;
|
||||
const bool using_depth_fb =
|
||||
regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 &&
|
||||
(write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 ||
|
||||
(has_stencil && state.stencil.test_enabled));
|
||||
|
||||
MathUtil::Rectangle<s32> viewport_rect_unscaled{
|
||||
// These registers hold half-width and half-height, so must be multiplied by 2
|
||||
regs.rasterizer.viewport_corner.x, // left
|
||||
regs.rasterizer.viewport_corner.y + // top
|
||||
static_cast<s32>(Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() *
|
||||
2),
|
||||
regs.rasterizer.viewport_corner.x + // right
|
||||
static_cast<s32>(Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() *
|
||||
2),
|
||||
regs.rasterizer.viewport_corner.y // bottom
|
||||
};
|
||||
|
||||
Surface color_surface;
|
||||
Surface depth_surface;
|
||||
MathUtil::Rectangle<u32> surfaces_rect;
|
||||
std::tie(color_surface, depth_surface, surfaces_rect) =
|
||||
res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled);
|
||||
|
||||
const u16 res_scale = color_surface != nullptr
|
||||
? color_surface->res_scale
|
||||
: (depth_surface == nullptr ? 1u : depth_surface->res_scale);
|
||||
|
||||
MathUtil::Rectangle<u32> draw_rect{
|
||||
static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.left) +
|
||||
viewport_rect_unscaled.left * res_scale,
|
||||
surfaces_rect.left, surfaces_rect.right)), // Left
|
||||
static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.bottom) +
|
||||
viewport_rect_unscaled.top * res_scale,
|
||||
surfaces_rect.bottom, surfaces_rect.top)), // Top
|
||||
static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.left) +
|
||||
viewport_rect_unscaled.right * res_scale,
|
||||
surfaces_rect.left, surfaces_rect.right)), // Right
|
||||
static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.bottom) +
|
||||
viewport_rect_unscaled.bottom * res_scale,
|
||||
surfaces_rect.bottom, surfaces_rect.top))}; // Bottom
|
||||
|
||||
// Bind the framebuffer surfaces
|
||||
state.draw.draw_framebuffer = framebuffer.handle;
|
||||
state.Apply();
|
||||
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
|
||||
color_surface != nullptr ? color_surface->texture.handle : 0, 0);
|
||||
if (depth_surface != nullptr) {
|
||||
if (regs.framebuffer.framebuffer.depth_format ==
|
||||
Pica::FramebufferRegs::DepthFormat::D24S8) {
|
||||
if (has_stencil) {
|
||||
// attach both depth and stencil
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
|
||||
depth_surface->texture.handle, 0);
|
||||
@ -259,38 +309,30 @@ void RasterizerOpenGL::DrawTriangles() {
|
||||
}
|
||||
|
||||
// Sync the viewport
|
||||
// These registers hold half-width and half-height, so must be multiplied by 2
|
||||
GLsizei viewport_width =
|
||||
(GLsizei)Pica::float24::FromRaw(regs.rasterizer.viewport_size_x).ToFloat32() * 2;
|
||||
GLsizei viewport_height =
|
||||
(GLsizei)Pica::float24::FromRaw(regs.rasterizer.viewport_size_y).ToFloat32() * 2;
|
||||
state.viewport.x =
|
||||
static_cast<GLint>(surfaces_rect.left) + viewport_rect_unscaled.left * res_scale;
|
||||
state.viewport.y =
|
||||
static_cast<GLint>(surfaces_rect.bottom) + viewport_rect_unscaled.bottom * res_scale;
|
||||
state.viewport.width = static_cast<GLsizei>(viewport_rect_unscaled.GetWidth() * res_scale);
|
||||
state.viewport.height = static_cast<GLsizei>(viewport_rect_unscaled.GetHeight() * res_scale);
|
||||
|
||||
glViewport(
|
||||
(GLint)(rect.left + regs.rasterizer.viewport_corner.x * color_surface->res_scale_width),
|
||||
(GLint)(rect.bottom + regs.rasterizer.viewport_corner.y * color_surface->res_scale_height),
|
||||
(GLsizei)(viewport_width * color_surface->res_scale_width),
|
||||
(GLsizei)(viewport_height * color_surface->res_scale_height));
|
||||
|
||||
if (uniform_block_data.data.framebuffer_scale[0] != color_surface->res_scale_width ||
|
||||
uniform_block_data.data.framebuffer_scale[1] != color_surface->res_scale_height) {
|
||||
|
||||
uniform_block_data.data.framebuffer_scale[0] = color_surface->res_scale_width;
|
||||
uniform_block_data.data.framebuffer_scale[1] = color_surface->res_scale_height;
|
||||
if (uniform_block_data.data.framebuffer_scale != res_scale) {
|
||||
uniform_block_data.data.framebuffer_scale = res_scale;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
|
||||
// Scissor checks are window-, not viewport-relative, which means that if the cached texture
|
||||
// sub-rect changes, the scissor bounds also need to be updated.
|
||||
GLint scissor_x1 = static_cast<GLint>(
|
||||
rect.left + regs.rasterizer.scissor_test.x1 * color_surface->res_scale_width);
|
||||
GLint scissor_y1 = static_cast<GLint>(
|
||||
rect.bottom + regs.rasterizer.scissor_test.y1 * color_surface->res_scale_height);
|
||||
GLint scissor_x1 =
|
||||
static_cast<GLint>(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale);
|
||||
GLint scissor_y1 =
|
||||
static_cast<GLint>(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale);
|
||||
// x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when
|
||||
// scaling or doing multisampling.
|
||||
GLint scissor_x2 = static_cast<GLint>(
|
||||
rect.left + (regs.rasterizer.scissor_test.x2 + 1) * color_surface->res_scale_width);
|
||||
GLint scissor_y2 = static_cast<GLint>(
|
||||
rect.bottom + (regs.rasterizer.scissor_test.y2 + 1) * color_surface->res_scale_height);
|
||||
GLint scissor_x2 =
|
||||
static_cast<GLint>(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale);
|
||||
GLint scissor_y2 = static_cast<GLint>(surfaces_rect.bottom +
|
||||
(regs.rasterizer.scissor_test.y2 + 1) * res_scale);
|
||||
|
||||
if (uniform_block_data.data.scissor_x1 != scissor_x1 ||
|
||||
uniform_block_data.data.scissor_x2 != scissor_x2 ||
|
||||
@ -311,7 +353,7 @@ void RasterizerOpenGL::DrawTriangles() {
|
||||
|
||||
if (texture.enabled) {
|
||||
texture_samplers[texture_index].SyncWithConfig(texture.config);
|
||||
CachedSurface* surface = res_cache.GetTextureSurface(texture);
|
||||
Surface surface = res_cache.GetTextureSurface(texture);
|
||||
if (surface != nullptr) {
|
||||
state.texture_units[texture_index].texture_2d = surface->texture.handle;
|
||||
} else {
|
||||
@ -380,6 +422,15 @@ void RasterizerOpenGL::DrawTriangles() {
|
||||
uniform_block_data.dirty = false;
|
||||
}
|
||||
|
||||
// Viewport can have negative offsets or larger
|
||||
// dimensions than our framebuffer sub-rect.
|
||||
// Enable scissor test to prevent drawing
|
||||
// outside of the framebuffer region
|
||||
state.scissor.enabled = true;
|
||||
state.scissor.x = draw_rect.left;
|
||||
state.scissor.y = draw_rect.bottom;
|
||||
state.scissor.width = draw_rect.GetWidth();
|
||||
state.scissor.height = draw_rect.GetHeight();
|
||||
state.Apply();
|
||||
|
||||
// Draw the vertex batch
|
||||
@ -387,16 +438,8 @@ void RasterizerOpenGL::DrawTriangles() {
|
||||
GL_STREAM_DRAW);
|
||||
glDrawArrays(GL_TRIANGLES, 0, (GLsizei)vertex_batch.size());
|
||||
|
||||
// Mark framebuffer surfaces as dirty
|
||||
// TODO: Restrict invalidation area to the viewport
|
||||
if (color_surface != nullptr) {
|
||||
color_surface->dirty = true;
|
||||
res_cache.FlushRegion(color_surface->addr, color_surface->size, color_surface, true);
|
||||
}
|
||||
if (depth_surface != nullptr) {
|
||||
depth_surface->dirty = true;
|
||||
res_cache.FlushRegion(depth_surface->addr, depth_surface->size, depth_surface, true);
|
||||
}
|
||||
// Disable scissor test
|
||||
state.scissor.enabled = false;
|
||||
|
||||
vertex_batch.clear();
|
||||
|
||||
@ -405,6 +448,22 @@ void RasterizerOpenGL::DrawTriangles() {
|
||||
state.texture_units[texture_index].texture_2d = 0;
|
||||
}
|
||||
state.Apply();
|
||||
|
||||
// Mark framebuffer surfaces as dirty
|
||||
MathUtil::Rectangle<u32> draw_rect_unscaled{
|
||||
draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale,
|
||||
draw_rect.bottom / res_scale};
|
||||
|
||||
if (color_surface != nullptr && write_color_fb) {
|
||||
auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled);
|
||||
res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval),
|
||||
color_surface);
|
||||
}
|
||||
if (depth_surface != nullptr && write_depth_fb) {
|
||||
auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled);
|
||||
res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval),
|
||||
depth_surface);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
|
||||
@ -893,227 +952,164 @@ void RasterizerOpenGL::FlushAll() {
|
||||
|
||||
void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
res_cache.FlushRegion(addr, size, nullptr, false);
|
||||
res_cache.FlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
res_cache.InvalidateRegion(addr, size, nullptr);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
res_cache.FlushRegion(addr, size, nullptr, true);
|
||||
res_cache.FlushRegion(addr, size);
|
||||
res_cache.InvalidateRegion(addr, size, nullptr);
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Blits);
|
||||
|
||||
CachedSurface src_params;
|
||||
SurfaceParams src_params;
|
||||
src_params.addr = config.GetPhysicalInputAddress();
|
||||
// It's important to use the correct source input width to properly skip over parts of the input
|
||||
// image which will be cropped from the output but still affect the stride of the input image.
|
||||
src_params.width = config.input_width;
|
||||
// Using the output's height is fine because we don't read or skip over the remaining part of
|
||||
// the image, and it allows for smaller texture cache lookup rectangles.
|
||||
src_params.width = config.output_width;
|
||||
src_params.stride = config.input_width;
|
||||
src_params.height = config.output_height;
|
||||
src_params.is_tiled = !config.input_linear;
|
||||
src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.input_format);
|
||||
src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.input_format);
|
||||
src_params.UpdateParams();
|
||||
|
||||
CachedSurface dst_params;
|
||||
SurfaceParams dst_params;
|
||||
dst_params.addr = config.GetPhysicalOutputAddress();
|
||||
dst_params.width =
|
||||
config.scaling != config.NoScale ? config.output_width / 2 : config.output_width.Value();
|
||||
dst_params.height =
|
||||
config.scaling == config.ScaleXY ? config.output_height / 2 : config.output_height.Value();
|
||||
dst_params.width = config.scaling != config.NoScale ? config.output_width.Value() / 2
|
||||
: config.output_width.Value();
|
||||
dst_params.height = config.scaling == config.ScaleXY ? config.output_height.Value() / 2
|
||||
: config.output_height.Value();
|
||||
dst_params.is_tiled = config.input_linear != config.dont_swizzle;
|
||||
dst_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.output_format);
|
||||
dst_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.output_format);
|
||||
dst_params.UpdateParams();
|
||||
|
||||
MathUtil::Rectangle<int> src_rect;
|
||||
CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
|
||||
|
||||
if (src_surface == nullptr) {
|
||||
MathUtil::Rectangle<u32> src_rect;
|
||||
Surface src_surface;
|
||||
std::tie(src_surface, src_rect) =
|
||||
res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true);
|
||||
if (src_surface == nullptr)
|
||||
return false;
|
||||
}
|
||||
|
||||
// Adjust the source rectangle to take into account parts of the input lines being cropped
|
||||
if (config.input_width > config.output_width) {
|
||||
src_rect.right -= static_cast<int>((config.input_width - config.output_width) *
|
||||
src_surface->res_scale_width);
|
||||
}
|
||||
dst_params.res_scale = src_surface->res_scale;
|
||||
|
||||
// Require destination surface to have same resolution scale as source to preserve scaling
|
||||
dst_params.res_scale_width = src_surface->res_scale_width;
|
||||
dst_params.res_scale_height = src_surface->res_scale_height;
|
||||
|
||||
MathUtil::Rectangle<int> dst_rect;
|
||||
CachedSurface* dst_surface = res_cache.GetSurfaceRect(dst_params, true, false, dst_rect);
|
||||
|
||||
if (dst_surface == nullptr) {
|
||||
MathUtil::Rectangle<u32> dst_rect;
|
||||
Surface dst_surface;
|
||||
std::tie(dst_surface, dst_rect) =
|
||||
res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, false);
|
||||
if (dst_surface == nullptr)
|
||||
return false;
|
||||
}
|
||||
|
||||
// Don't accelerate if the src and dst surfaces are the same
|
||||
if (src_surface == dst_surface) {
|
||||
if (src_surface->is_tiled != dst_surface->is_tiled)
|
||||
std::swap(src_rect.top, src_rect.bottom);
|
||||
|
||||
if (config.flip_vertically)
|
||||
std::swap(src_rect.top, src_rect.bottom);
|
||||
|
||||
if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (config.flip_vertically) {
|
||||
std::swap(dst_rect.top, dst_rect.bottom);
|
||||
}
|
||||
|
||||
if (!res_cache.TryBlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
u32 dst_size = dst_params.width * dst_params.height *
|
||||
CachedSurface::GetFormatBpp(dst_params.pixel_format) / 8;
|
||||
dst_surface->dirty = true;
|
||||
res_cache.FlushRegion(config.GetPhysicalOutputAddress(), dst_size, dst_surface, true);
|
||||
res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) {
|
||||
// TODO(tfarley): Try to hardware accelerate this
|
||||
return false;
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Blits);
|
||||
using PixelFormat = CachedSurface::PixelFormat;
|
||||
using SurfaceType = CachedSurface::SurfaceType;
|
||||
|
||||
CachedSurface* dst_surface = res_cache.TryGetFillSurface(config);
|
||||
|
||||
if (dst_surface == nullptr) {
|
||||
u32 copy_size = Common::AlignDown(config.texture_copy.size, 16);
|
||||
if (copy_size == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
OpenGLState cur_state = OpenGLState::GetCurState();
|
||||
|
||||
SurfaceType dst_type = CachedSurface::GetFormatType(dst_surface->pixel_format);
|
||||
|
||||
GLuint old_fb = cur_state.draw.draw_framebuffer;
|
||||
cur_state.draw.draw_framebuffer = framebuffer.handle;
|
||||
// TODO: When scissor test is implemented, need to disable scissor test in cur_state here so
|
||||
// Clear call isn't affected
|
||||
cur_state.Apply();
|
||||
|
||||
if (dst_type == SurfaceType::Color || dst_type == SurfaceType::Texture) {
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
|
||||
dst_surface->texture.handle, 0);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
|
||||
0);
|
||||
|
||||
GLfloat color_values[4] = {0.0f, 0.0f, 0.0f, 0.0f};
|
||||
|
||||
// TODO: Handle additional pixel format and fill value size combinations to accelerate more
|
||||
// cases
|
||||
// For instance, checking if fill value's bytes/bits repeat to allow filling
|
||||
// I8/A8/I4/A4/...
|
||||
// Currently only handles formats that are multiples of the fill value size
|
||||
|
||||
if (config.fill_24bit) {
|
||||
switch (dst_surface->pixel_format) {
|
||||
case PixelFormat::RGB8:
|
||||
color_values[0] = config.value_24bit_r / 255.0f;
|
||||
color_values[1] = config.value_24bit_g / 255.0f;
|
||||
color_values[2] = config.value_24bit_b / 255.0f;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
} else if (config.fill_32bit) {
|
||||
u32 value = config.value_32bit;
|
||||
|
||||
switch (dst_surface->pixel_format) {
|
||||
case PixelFormat::RGBA8:
|
||||
color_values[0] = (value >> 24) / 255.0f;
|
||||
color_values[1] = ((value >> 16) & 0xFF) / 255.0f;
|
||||
color_values[2] = ((value >> 8) & 0xFF) / 255.0f;
|
||||
color_values[3] = (value & 0xFF) / 255.0f;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
u16 value_16bit = config.value_16bit.Value();
|
||||
Math::Vec4<u8> color;
|
||||
|
||||
switch (dst_surface->pixel_format) {
|
||||
case PixelFormat::RGBA8:
|
||||
color_values[0] = (value_16bit >> 8) / 255.0f;
|
||||
color_values[1] = (value_16bit & 0xFF) / 255.0f;
|
||||
color_values[2] = color_values[0];
|
||||
color_values[3] = color_values[1];
|
||||
break;
|
||||
case PixelFormat::RGB5A1:
|
||||
color = Color::DecodeRGB5A1((const u8*)&value_16bit);
|
||||
color_values[0] = color[0] / 31.0f;
|
||||
color_values[1] = color[1] / 31.0f;
|
||||
color_values[2] = color[2] / 31.0f;
|
||||
color_values[3] = color[3];
|
||||
break;
|
||||
case PixelFormat::RGB565:
|
||||
color = Color::DecodeRGB565((const u8*)&value_16bit);
|
||||
color_values[0] = color[0] / 31.0f;
|
||||
color_values[1] = color[1] / 63.0f;
|
||||
color_values[2] = color[2] / 31.0f;
|
||||
break;
|
||||
case PixelFormat::RGBA4:
|
||||
color = Color::DecodeRGBA4((const u8*)&value_16bit);
|
||||
color_values[0] = color[0] / 15.0f;
|
||||
color_values[1] = color[1] / 15.0f;
|
||||
color_values[2] = color[2] / 15.0f;
|
||||
color_values[3] = color[3] / 15.0f;
|
||||
break;
|
||||
case PixelFormat::IA8:
|
||||
case PixelFormat::RG8:
|
||||
color_values[0] = (value_16bit >> 8) / 255.0f;
|
||||
color_values[1] = (value_16bit & 0xFF) / 255.0f;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
cur_state.color_mask.red_enabled = GL_TRUE;
|
||||
cur_state.color_mask.green_enabled = GL_TRUE;
|
||||
cur_state.color_mask.blue_enabled = GL_TRUE;
|
||||
cur_state.color_mask.alpha_enabled = GL_TRUE;
|
||||
cur_state.Apply();
|
||||
glClearBufferfv(GL_COLOR, 0, color_values);
|
||||
} else if (dst_type == SurfaceType::Depth) {
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
|
||||
dst_surface->texture.handle, 0);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
|
||||
|
||||
GLfloat value_float;
|
||||
if (dst_surface->pixel_format == CachedSurface::PixelFormat::D16) {
|
||||
value_float = config.value_32bit / 65535.0f; // 2^16 - 1
|
||||
} else if (dst_surface->pixel_format == CachedSurface::PixelFormat::D24) {
|
||||
value_float = config.value_32bit / 16777215.0f; // 2^24 - 1
|
||||
}
|
||||
|
||||
cur_state.depth.write_mask = GL_TRUE;
|
||||
cur_state.Apply();
|
||||
glClearBufferfv(GL_DEPTH, 0, &value_float);
|
||||
} else if (dst_type == SurfaceType::DepthStencil) {
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
|
||||
dst_surface->texture.handle, 0);
|
||||
|
||||
GLfloat value_float = (config.value_32bit & 0xFFFFFF) / 16777215.0f; // 2^24 - 1
|
||||
GLint value_int = (config.value_32bit >> 24);
|
||||
|
||||
cur_state.depth.write_mask = GL_TRUE;
|
||||
cur_state.stencil.write_mask = 0xFF;
|
||||
cur_state.Apply();
|
||||
glClearBufferfi(GL_DEPTH_STENCIL, 0, value_float, value_int);
|
||||
u32 input_gap = config.texture_copy.input_gap * 16;
|
||||
u32 input_width = config.texture_copy.input_width * 16;
|
||||
if (input_width == 0 && input_gap != 0) {
|
||||
return false;
|
||||
}
|
||||
if (input_gap == 0 || input_width >= copy_size) {
|
||||
input_width = copy_size;
|
||||
input_gap = 0;
|
||||
}
|
||||
if (copy_size % input_width != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
cur_state.draw.draw_framebuffer = old_fb;
|
||||
// TODO: Return scissor test to previous value when scissor test is implemented
|
||||
cur_state.Apply();
|
||||
u32 output_gap = config.texture_copy.output_gap * 16;
|
||||
u32 output_width = config.texture_copy.output_width * 16;
|
||||
if (output_width == 0 && output_gap != 0) {
|
||||
return false;
|
||||
}
|
||||
if (output_gap == 0 || output_width >= copy_size) {
|
||||
output_width = copy_size;
|
||||
output_gap = 0;
|
||||
}
|
||||
if (copy_size % output_width != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
dst_surface->dirty = true;
|
||||
res_cache.FlushRegion(dst_surface->addr, dst_surface->size, dst_surface, true);
|
||||
SurfaceParams src_params;
|
||||
src_params.addr = config.GetPhysicalInputAddress();
|
||||
src_params.stride = input_width + input_gap; // stride in bytes
|
||||
src_params.width = input_width; // width in bytes
|
||||
src_params.height = copy_size / input_width;
|
||||
src_params.size = ((src_params.height - 1) * src_params.stride) + src_params.width;
|
||||
src_params.end = src_params.addr + src_params.size;
|
||||
|
||||
MathUtil::Rectangle<u32> src_rect;
|
||||
Surface src_surface;
|
||||
std::tie(src_surface, src_rect) = res_cache.GetTexCopySurface(src_params);
|
||||
if (src_surface == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (output_gap != 0 &&
|
||||
(output_width !=
|
||||
src_surface->BytesInPixels(src_rect.GetWidth() / src_surface->res_scale) *
|
||||
(src_surface->is_tiled ? 8 : 1) ||
|
||||
output_gap % src_surface->BytesInPixels(src_surface->is_tiled ? 64 : 1) != 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
SurfaceParams dst_params = *src_surface;
|
||||
dst_params.addr = config.GetPhysicalOutputAddress();
|
||||
dst_params.width = src_rect.GetWidth() / src_surface->res_scale;
|
||||
dst_params.stride =
|
||||
dst_params.width +
|
||||
src_surface->PixelsInBytes(src_surface->is_tiled ? output_gap / 8 : output_gap);
|
||||
dst_params.height = src_rect.GetHeight() / src_surface->res_scale;
|
||||
dst_params.res_scale = src_surface->res_scale;
|
||||
dst_params.UpdateParams();
|
||||
|
||||
// Since we are going to invalidate the gap if there is one, we will have to load it first
|
||||
const bool load_gap = output_gap != 0;
|
||||
MathUtil::Rectangle<u32> dst_rect;
|
||||
Surface dst_surface;
|
||||
std::tie(dst_surface, dst_rect) =
|
||||
res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, load_gap);
|
||||
if (src_surface == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dst_surface->type == SurfaceType::Texture) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!res_cache.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
res_cache.InvalidateRegion(dst_params.addr, dst_params.size, dst_surface);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) {
|
||||
Surface dst_surface = res_cache.GetFillSurface(config);
|
||||
if (dst_surface == nullptr)
|
||||
return false;
|
||||
|
||||
res_cache.InvalidateRegion(dst_surface->addr, dst_surface->size, dst_surface);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1125,16 +1121,19 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con
|
||||
}
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
|
||||
CachedSurface src_params;
|
||||
SurfaceParams src_params;
|
||||
src_params.addr = framebuffer_addr;
|
||||
src_params.width = config.width;
|
||||
src_params.width = std::min(config.width.Value(), pixel_stride);
|
||||
src_params.height = config.height;
|
||||
src_params.pixel_stride = pixel_stride;
|
||||
src_params.stride = pixel_stride;
|
||||
src_params.is_tiled = false;
|
||||
src_params.pixel_format = CachedSurface::PixelFormatFromGPUPixelFormat(config.color_format);
|
||||
src_params.pixel_format = SurfaceParams::PixelFormatFromGPUPixelFormat(config.color_format);
|
||||
src_params.UpdateParams();
|
||||
|
||||
MathUtil::Rectangle<int> src_rect;
|
||||
CachedSurface* src_surface = res_cache.GetSurfaceRect(src_params, false, true, src_rect);
|
||||
MathUtil::Rectangle<u32> src_rect;
|
||||
Surface src_surface;
|
||||
std::tie(src_surface, src_rect) =
|
||||
res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true);
|
||||
|
||||
if (src_surface == nullptr) {
|
||||
return false;
|
||||
@ -1144,8 +1143,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con
|
||||
u32 scaled_height = src_surface->GetScaledHeight();
|
||||
|
||||
screen_info.display_texcoords = MathUtil::Rectangle<float>(
|
||||
(float)src_rect.top / (float)scaled_height, (float)src_rect.left / (float)scaled_width,
|
||||
(float)src_rect.bottom / (float)scaled_height, (float)src_rect.right / (float)scaled_width);
|
||||
(float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width,
|
||||
(float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width);
|
||||
|
||||
screen_info.display_texture = src_surface->texture.handle;
|
||||
|
||||
@ -1158,8 +1157,8 @@ void RasterizerOpenGL::SamplerInfo::Create() {
|
||||
wrap_s = wrap_t = TextureConfig::Repeat;
|
||||
border_color = 0;
|
||||
|
||||
glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER,
|
||||
GL_LINEAR); // default is GL_LINEAR_MIPMAP_LINEAR
|
||||
// default is GL_LINEAR_MIPMAP_LINEAR
|
||||
glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
// Other attributes have correct defaults
|
||||
}
|
||||
|
||||
|
@ -43,6 +43,7 @@ public:
|
||||
void NotifyPicaRegisterChanged(u32 id) override;
|
||||
void FlushAll() override;
|
||||
void FlushRegion(PAddr addr, u32 size) override;
|
||||
void InvalidateRegion(PAddr addr, u32 size) override;
|
||||
void FlushAndInvalidateRegion(PAddr addr, u32 size) override;
|
||||
bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override;
|
||||
bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override;
|
||||
@ -135,7 +136,7 @@ private:
|
||||
// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
|
||||
// Not following that rule will cause problems on some AMD drivers.
|
||||
struct UniformData {
|
||||
alignas(8) GLvec2 framebuffer_scale;
|
||||
GLint framebuffer_scale;
|
||||
GLint alphatest_ref;
|
||||
GLfloat depth_scale;
|
||||
GLfloat depth_offset;
|
||||
@ -155,7 +156,7 @@ private:
|
||||
};
|
||||
|
||||
static_assert(
|
||||
sizeof(UniformData) == 0x470,
|
||||
sizeof(UniformData) == 0x460,
|
||||
"The size of the UniformData structure has changed, update the structure in the shader");
|
||||
static_assert(sizeof(UniformData) < 16384,
|
||||
"UniformData structure must be less than 16kb as per the OpenGL spec");
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -13,6 +13,7 @@
|
||||
#pragma GCC diagnostic ignored "-Wunused-local-typedefs"
|
||||
#endif
|
||||
#include <boost/icl/interval_map.hpp>
|
||||
#include <boost/icl/interval_set.hpp>
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
@ -20,21 +21,37 @@
|
||||
#include "common/assert.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/math_util.h"
|
||||
#include "core/hw/gpu.h"
|
||||
#include "video_core/regs_framebuffer.h"
|
||||
#include "video_core/regs_texturing.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace MathUtil {
|
||||
template <class T>
|
||||
struct Rectangle;
|
||||
}
|
||||
|
||||
struct CachedSurface;
|
||||
using Surface = std::shared_ptr<CachedSurface>;
|
||||
using SurfaceSet = std::set<Surface>;
|
||||
|
||||
using SurfaceCache = boost::icl::interval_map<PAddr, std::set<std::shared_ptr<CachedSurface>>>;
|
||||
using SurfaceRegions = boost::icl::interval_set<PAddr>;
|
||||
using SurfaceMap = boost::icl::interval_map<PAddr, Surface>;
|
||||
using SurfaceCache = boost::icl::interval_map<PAddr, SurfaceSet>;
|
||||
|
||||
struct CachedSurface {
|
||||
using SurfaceInterval = SurfaceCache::interval_type;
|
||||
static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() &&
|
||||
std::is_same<SurfaceMap::interval_type, SurfaceCache::interval_type>(),
|
||||
"incorrect interval types");
|
||||
|
||||
using SurfaceRect_Tuple = std::tuple<Surface, MathUtil::Rectangle<u32>>;
|
||||
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>;
|
||||
|
||||
using PageMap = boost::icl::interval_map<u32, int>;
|
||||
|
||||
enum class ScaleMatch {
|
||||
Exact, // only accept same res scale
|
||||
Upscale, // only allow higher scale than params
|
||||
Ignore // accept every scaled res
|
||||
};
|
||||
|
||||
struct SurfaceParams {
|
||||
enum class PixelFormat {
|
||||
// First 5 formats are shared between textures and color buffers
|
||||
RGBA8 = 0,
|
||||
@ -68,11 +85,12 @@ struct CachedSurface {
|
||||
Texture = 1,
|
||||
Depth = 2,
|
||||
DepthStencil = 3,
|
||||
Invalid = 4,
|
||||
Fill = 4,
|
||||
Invalid = 5
|
||||
};
|
||||
|
||||
static unsigned int GetFormatBpp(CachedSurface::PixelFormat format) {
|
||||
static const std::array<unsigned int, 18> bpp_table = {
|
||||
static constexpr unsigned int GetFormatBpp(PixelFormat format) {
|
||||
constexpr std::array<unsigned int, 18> bpp_table = {
|
||||
32, // RGBA8
|
||||
24, // RGB8
|
||||
16, // RGB5A1
|
||||
@ -93,8 +111,11 @@ struct CachedSurface {
|
||||
32, // D24S8
|
||||
};
|
||||
|
||||
ASSERT((unsigned int)format < ARRAY_SIZE(bpp_table));
|
||||
return bpp_table[(unsigned int)format];
|
||||
assert(static_cast<size_t>(format) < bpp_table.size());
|
||||
return bpp_table[static_cast<size_t>(format)];
|
||||
}
|
||||
unsigned int GetFormatBpp() const {
|
||||
return GetFormatBpp(pixel_format);
|
||||
}
|
||||
|
||||
static PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format) {
|
||||
@ -142,7 +163,7 @@ struct CachedSurface {
|
||||
return false;
|
||||
}
|
||||
|
||||
static SurfaceType GetFormatType(PixelFormat pixel_format) {
|
||||
static constexpr SurfaceType GetFormatType(PixelFormat pixel_format) {
|
||||
if ((unsigned int)pixel_format < 5) {
|
||||
return SurfaceType::Color;
|
||||
}
|
||||
@ -162,31 +183,117 @@ struct CachedSurface {
|
||||
return SurfaceType::Invalid;
|
||||
}
|
||||
|
||||
/// Update the params "size", "end" and "type" from the already set "addr", "width", "height"
|
||||
/// and "pixel_format"
|
||||
void UpdateParams() {
|
||||
if (stride == 0) {
|
||||
stride = width;
|
||||
}
|
||||
type = GetFormatType(pixel_format);
|
||||
size = !is_tiled ? BytesInPixels(stride * (height - 1) + width)
|
||||
: BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8);
|
||||
end = addr + size;
|
||||
}
|
||||
|
||||
SurfaceInterval GetInterval() const {
|
||||
return SurfaceInterval::right_open(addr, end);
|
||||
}
|
||||
|
||||
// Returns the outer rectangle containing "interval"
|
||||
SurfaceParams FromInterval(SurfaceInterval interval) const;
|
||||
|
||||
SurfaceInterval GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const;
|
||||
|
||||
// Returns the region of the biggest valid rectange within interval
|
||||
SurfaceInterval GetCopyableInterval(const Surface& src_surface) const;
|
||||
|
||||
u32 GetScaledWidth() const {
|
||||
return (u32)(width * res_scale_width);
|
||||
return width * res_scale;
|
||||
}
|
||||
|
||||
u32 GetScaledHeight() const {
|
||||
return (u32)(height * res_scale_height);
|
||||
return height * res_scale;
|
||||
}
|
||||
|
||||
PAddr addr;
|
||||
u32 size;
|
||||
MathUtil::Rectangle<u32> GetRect() const {
|
||||
return {0, height, width, 0};
|
||||
}
|
||||
|
||||
PAddr min_valid;
|
||||
PAddr max_valid;
|
||||
MathUtil::Rectangle<u32> GetScaledRect() const {
|
||||
return {0, GetScaledHeight(), GetScaledWidth(), 0};
|
||||
}
|
||||
|
||||
u32 PixelsInBytes(u32 size) const {
|
||||
return size * CHAR_BIT / GetFormatBpp(pixel_format);
|
||||
}
|
||||
|
||||
u32 BytesInPixels(u32 pixels) const {
|
||||
return pixels * GetFormatBpp(pixel_format) / CHAR_BIT;
|
||||
}
|
||||
|
||||
bool ExactMatch(const SurfaceParams& other_surface) const;
|
||||
bool CanSubRect(const SurfaceParams& sub_surface) const;
|
||||
bool CanExpand(const SurfaceParams& expanded_surface) const;
|
||||
bool CanTexCopy(const SurfaceParams& texcopy_params) const;
|
||||
|
||||
MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const;
|
||||
MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const;
|
||||
|
||||
PAddr addr = 0;
|
||||
PAddr end = 0;
|
||||
u32 size = 0;
|
||||
|
||||
u32 width = 0;
|
||||
u32 height = 0;
|
||||
u32 stride = 0;
|
||||
u16 res_scale = 1;
|
||||
|
||||
bool is_tiled = false;
|
||||
PixelFormat pixel_format = PixelFormat::Invalid;
|
||||
SurfaceType type = SurfaceType::Invalid;
|
||||
};
|
||||
|
||||
struct CachedSurface : SurfaceParams {
|
||||
bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const;
|
||||
bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const;
|
||||
|
||||
bool IsRegionValid(SurfaceInterval interval) const {
|
||||
return (invalid_regions.find(interval) == invalid_regions.end());
|
||||
}
|
||||
|
||||
bool IsSurfaceFullyInvalid() const {
|
||||
return (invalid_regions & GetInterval()) == SurfaceRegions(GetInterval());
|
||||
}
|
||||
|
||||
bool registered = false;
|
||||
SurfaceRegions invalid_regions;
|
||||
|
||||
u32 fill_size = 0; /// Number of bytes to read from fill_data
|
||||
std::array<u8, 4> fill_data;
|
||||
|
||||
OGLTexture texture;
|
||||
u32 width;
|
||||
u32 height;
|
||||
/// Stride between lines, in pixels. Only valid for images in linear format.
|
||||
u32 pixel_stride = 0;
|
||||
float res_scale_width = 1.f;
|
||||
float res_scale_height = 1.f;
|
||||
|
||||
bool is_tiled;
|
||||
PixelFormat pixel_format;
|
||||
bool dirty;
|
||||
static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) {
|
||||
// OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
|
||||
return format == PixelFormat::Invalid
|
||||
? 0
|
||||
: (format == PixelFormat::D24 || GetFormatType(format) == SurfaceType::Texture)
|
||||
? 4
|
||||
: SurfaceParams::GetFormatBpp(format) / 8;
|
||||
}
|
||||
|
||||
std::unique_ptr<u8[]> gl_buffer;
|
||||
size_t gl_buffer_size = 0;
|
||||
|
||||
// Read/Write data in 3DS memory to/from gl_buffer
|
||||
void LoadGLBuffer(PAddr load_start, PAddr load_end);
|
||||
void FlushGLBuffer(PAddr flush_start, PAddr flush_end);
|
||||
|
||||
// Upload/Download data in gl_buffer in/to this surface's texture
|
||||
void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle,
|
||||
GLuint draw_fb_handle);
|
||||
void DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle,
|
||||
GLuint draw_fb_handle);
|
||||
};
|
||||
|
||||
class RasterizerCacheOpenGL : NonCopyable {
|
||||
@ -194,46 +301,78 @@ public:
|
||||
RasterizerCacheOpenGL();
|
||||
~RasterizerCacheOpenGL();
|
||||
|
||||
/// Blits one texture to another
|
||||
void BlitTextures(GLuint src_tex, GLuint dst_tex, CachedSurface::SurfaceType type,
|
||||
const MathUtil::Rectangle<int>& src_rect,
|
||||
const MathUtil::Rectangle<int>& dst_rect);
|
||||
/// Blit one surface's texture to another
|
||||
bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle<u32>& src_rect,
|
||||
const Surface& dst_surface, const MathUtil::Rectangle<u32>& dst_rect);
|
||||
|
||||
/// Attempt to blit one surface's texture to another
|
||||
bool TryBlitSurfaces(CachedSurface* src_surface, const MathUtil::Rectangle<int>& src_rect,
|
||||
CachedSurface* dst_surface, const MathUtil::Rectangle<int>& dst_rect);
|
||||
void ConvertD24S8toABGR(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect,
|
||||
GLuint dst_tex, const MathUtil::Rectangle<u32>& dst_rect);
|
||||
|
||||
/// Loads a texture from 3DS memory to OpenGL and caches it (if not already cached)
|
||||
CachedSurface* GetSurface(const CachedSurface& params, bool match_res_scale,
|
||||
bool load_if_create);
|
||||
/// Copy one surface's region to another
|
||||
void CopySurface(const Surface& src_surface, const Surface& dst_surface,
|
||||
SurfaceInterval copy_interval);
|
||||
|
||||
/// Load a texture from 3DS memory to OpenGL and cache it (if not already cached)
|
||||
Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale,
|
||||
bool load_if_create);
|
||||
|
||||
/// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from
|
||||
/// 3DS memory to OpenGL and caches it (if not already cached)
|
||||
CachedSurface* GetSurfaceRect(const CachedSurface& params, bool match_res_scale,
|
||||
bool load_if_create, MathUtil::Rectangle<int>& out_rect);
|
||||
SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale,
|
||||
bool load_if_create);
|
||||
|
||||
/// Gets a surface based on the texture configuration
|
||||
CachedSurface* GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config);
|
||||
/// Get a surface based on the texture configuration
|
||||
Surface GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config);
|
||||
|
||||
/// Gets the color and depth surfaces and rect (resolution scaled) based on the framebuffer
|
||||
/// configuration
|
||||
std::tuple<CachedSurface*, CachedSurface*, MathUtil::Rectangle<int>> GetFramebufferSurfaces(
|
||||
const Pica::FramebufferRegs::FramebufferConfig& config);
|
||||
/// Get the color and depth surfaces based on the framebuffer configuration
|
||||
SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
|
||||
const MathUtil::Rectangle<s32>& viewport_rect);
|
||||
|
||||
/// Attempt to get a surface that exactly matches the fill region and format
|
||||
CachedSurface* TryGetFillSurface(const GPU::Regs::MemoryFillConfig& config);
|
||||
/// Get a surface that matches the fill config
|
||||
Surface GetFillSurface(const GPU::Regs::MemoryFillConfig& config);
|
||||
|
||||
/// Write the surface back to memory
|
||||
void FlushSurface(CachedSurface* surface);
|
||||
/// Get a surface that matches a "texture copy" display transfer config
|
||||
SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params);
|
||||
|
||||
/// Write any cached resources overlapping the region back to memory (if dirty) and optionally
|
||||
/// invalidate them in the cache
|
||||
void FlushRegion(PAddr addr, u32 size, const CachedSurface* skip_surface, bool invalidate);
|
||||
/// Write any cached resources overlapping the region back to memory (if dirty)
|
||||
void FlushRegion(PAddr addr, u32 size, Surface flush_surface = nullptr);
|
||||
|
||||
/// Mark region as being invalidated by region_owner (nullptr if 3DS memory)
|
||||
void InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner);
|
||||
|
||||
/// Flush all cached resources tracked by this cache manager
|
||||
void FlushAll();
|
||||
|
||||
private:
|
||||
void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface);
|
||||
|
||||
/// Update surface's texture for given region when necessary
|
||||
void ValidateSurface(const Surface& surface, PAddr addr, u32 size);
|
||||
|
||||
/// Create a new surface
|
||||
Surface CreateSurface(const SurfaceParams& params);
|
||||
|
||||
/// Register surface into the cache
|
||||
void RegisterSurface(const Surface& surface);
|
||||
|
||||
/// Remove surface from the cache
|
||||
void UnregisterSurface(const Surface& surface);
|
||||
|
||||
/// Increase/decrease the number of surface in pages touching the specified region
|
||||
void UpdatePagesCachedCount(PAddr addr, u32 size, int delta);
|
||||
|
||||
SurfaceCache surface_cache;
|
||||
OGLFramebuffer transfer_framebuffers[2];
|
||||
PageMap cached_pages;
|
||||
SurfaceMap dirty_regions;
|
||||
SurfaceSet remove_surfaces;
|
||||
|
||||
OGLFramebuffer read_framebuffer;
|
||||
OGLFramebuffer draw_framebuffer;
|
||||
|
||||
OGLVertexArray attributeless_vao;
|
||||
OGLBuffer d24s8_abgr_buffer;
|
||||
GLsizeiptr d24s8_abgr_buffer_size;
|
||||
OGLShader d24s8_abgr_shader;
|
||||
GLint d24s8_abgr_tbo_size_u_id;
|
||||
GLint d24s8_abgr_viewport_u_id;
|
||||
};
|
||||
|
@ -36,7 +36,7 @@ public:
|
||||
if (handle == 0)
|
||||
return;
|
||||
glDeleteTextures(1, &handle);
|
||||
OpenGLState::ResetTexture(handle);
|
||||
OpenGLState::GetCurState().ResetTexture(handle).Apply();
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
@ -69,7 +69,7 @@ public:
|
||||
if (handle == 0)
|
||||
return;
|
||||
glDeleteSamplers(1, &handle);
|
||||
OpenGLState::ResetSampler(handle);
|
||||
OpenGLState::GetCurState().ResetSampler(handle).Apply();
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
@ -102,7 +102,7 @@ public:
|
||||
if (handle == 0)
|
||||
return;
|
||||
glDeleteProgram(handle);
|
||||
OpenGLState::ResetProgram(handle);
|
||||
OpenGLState::GetCurState().ResetProgram(handle).Apply();
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
@ -135,7 +135,7 @@ public:
|
||||
if (handle == 0)
|
||||
return;
|
||||
glDeleteBuffers(1, &handle);
|
||||
OpenGLState::ResetBuffer(handle);
|
||||
OpenGLState::GetCurState().ResetBuffer(handle).Apply();
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
@ -168,7 +168,7 @@ public:
|
||||
if (handle == 0)
|
||||
return;
|
||||
glDeleteVertexArrays(1, &handle);
|
||||
OpenGLState::ResetVertexArray(handle);
|
||||
OpenGLState::GetCurState().ResetVertexArray(handle).Apply();
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
@ -201,7 +201,7 @@ public:
|
||||
if (handle == 0)
|
||||
return;
|
||||
glDeleteFramebuffers(1, &handle);
|
||||
OpenGLState::ResetFramebuffer(handle);
|
||||
OpenGLState::GetCurState().ResetFramebuffer(handle).Apply();
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
|
@ -41,7 +41,7 @@ struct LightSrc {
|
||||
};
|
||||
|
||||
layout (std140) uniform shader_data {
|
||||
vec2 framebuffer_scale;
|
||||
int framebuffer_scale;
|
||||
int alphatest_ref;
|
||||
float depth_scale;
|
||||
float depth_offset;
|
||||
|
@ -69,6 +69,17 @@ OpenGLState::OpenGLState() {
|
||||
draw.uniform_buffer = 0;
|
||||
draw.shader_program = 0;
|
||||
|
||||
scissor.enabled = false;
|
||||
scissor.x = 0;
|
||||
scissor.y = 0;
|
||||
scissor.width = 0;
|
||||
scissor.height = 0;
|
||||
|
||||
viewport.x = 0;
|
||||
viewport.y = 0;
|
||||
viewport.width = 0;
|
||||
viewport.height = 0;
|
||||
|
||||
clip_distance = {};
|
||||
}
|
||||
|
||||
@ -193,7 +204,7 @@ void OpenGLState::Apply() const {
|
||||
// Lighting LUTs
|
||||
if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) {
|
||||
glActiveTexture(TextureUnits::LightingLUT.Enum());
|
||||
glBindTexture(GL_TEXTURE_BUFFER, cur_state.lighting_lut.texture_buffer);
|
||||
glBindTexture(GL_TEXTURE_BUFFER, lighting_lut.texture_buffer);
|
||||
}
|
||||
|
||||
// Fog LUT
|
||||
@ -260,6 +271,26 @@ void OpenGLState::Apply() const {
|
||||
glUseProgram(draw.shader_program);
|
||||
}
|
||||
|
||||
// Scissor test
|
||||
if (scissor.enabled != cur_state.scissor.enabled) {
|
||||
if (scissor.enabled) {
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
} else {
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
}
|
||||
}
|
||||
|
||||
if (scissor.x != cur_state.scissor.x || scissor.y != cur_state.scissor.y ||
|
||||
scissor.width != cur_state.scissor.width || scissor.height != cur_state.scissor.height) {
|
||||
glScissor(scissor.x, scissor.y, scissor.width, scissor.height);
|
||||
}
|
||||
|
||||
if (viewport.x != cur_state.viewport.x || viewport.y != cur_state.viewport.y ||
|
||||
viewport.width != cur_state.viewport.width ||
|
||||
viewport.height != cur_state.viewport.height) {
|
||||
glViewport(viewport.x, viewport.y, viewport.width, viewport.height);
|
||||
}
|
||||
|
||||
// Clip distance
|
||||
for (size_t i = 0; i < clip_distance.size(); ++i) {
|
||||
if (clip_distance[i] != cur_state.clip_distance[i]) {
|
||||
@ -274,62 +305,68 @@ void OpenGLState::Apply() const {
|
||||
cur_state = *this;
|
||||
}
|
||||
|
||||
void OpenGLState::ResetTexture(GLuint handle) {
|
||||
for (auto& unit : cur_state.texture_units) {
|
||||
OpenGLState& OpenGLState::ResetTexture(GLuint handle) {
|
||||
for (auto& unit : texture_units) {
|
||||
if (unit.texture_2d == handle) {
|
||||
unit.texture_2d = 0;
|
||||
}
|
||||
}
|
||||
if (cur_state.lighting_lut.texture_buffer == handle)
|
||||
cur_state.lighting_lut.texture_buffer = 0;
|
||||
if (cur_state.fog_lut.texture_buffer == handle)
|
||||
cur_state.fog_lut.texture_buffer = 0;
|
||||
if (cur_state.proctex_noise_lut.texture_buffer == handle)
|
||||
cur_state.proctex_noise_lut.texture_buffer = 0;
|
||||
if (cur_state.proctex_color_map.texture_buffer == handle)
|
||||
cur_state.proctex_color_map.texture_buffer = 0;
|
||||
if (cur_state.proctex_alpha_map.texture_buffer == handle)
|
||||
cur_state.proctex_alpha_map.texture_buffer = 0;
|
||||
if (cur_state.proctex_lut.texture_buffer == handle)
|
||||
cur_state.proctex_lut.texture_buffer = 0;
|
||||
if (cur_state.proctex_diff_lut.texture_buffer == handle)
|
||||
cur_state.proctex_diff_lut.texture_buffer = 0;
|
||||
if (lighting_lut.texture_buffer == handle)
|
||||
lighting_lut.texture_buffer = 0;
|
||||
if (fog_lut.texture_buffer == handle)
|
||||
fog_lut.texture_buffer = 0;
|
||||
if (proctex_noise_lut.texture_buffer == handle)
|
||||
proctex_noise_lut.texture_buffer = 0;
|
||||
if (proctex_color_map.texture_buffer == handle)
|
||||
proctex_color_map.texture_buffer = 0;
|
||||
if (proctex_alpha_map.texture_buffer == handle)
|
||||
proctex_alpha_map.texture_buffer = 0;
|
||||
if (proctex_lut.texture_buffer == handle)
|
||||
proctex_lut.texture_buffer = 0;
|
||||
if (proctex_diff_lut.texture_buffer == handle)
|
||||
proctex_diff_lut.texture_buffer = 0;
|
||||
return *this;
|
||||
}
|
||||
|
||||
void OpenGLState::ResetSampler(GLuint handle) {
|
||||
for (auto& unit : cur_state.texture_units) {
|
||||
OpenGLState& OpenGLState::ResetSampler(GLuint handle) {
|
||||
for (auto& unit : texture_units) {
|
||||
if (unit.sampler == handle) {
|
||||
unit.sampler = 0;
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void OpenGLState::ResetProgram(GLuint handle) {
|
||||
if (cur_state.draw.shader_program == handle) {
|
||||
cur_state.draw.shader_program = 0;
|
||||
OpenGLState& OpenGLState::ResetProgram(GLuint handle) {
|
||||
if (draw.shader_program == handle) {
|
||||
draw.shader_program = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void OpenGLState::ResetBuffer(GLuint handle) {
|
||||
if (cur_state.draw.vertex_buffer == handle) {
|
||||
cur_state.draw.vertex_buffer = 0;
|
||||
OpenGLState& OpenGLState::ResetBuffer(GLuint handle) {
|
||||
if (draw.vertex_buffer == handle) {
|
||||
draw.vertex_buffer = 0;
|
||||
}
|
||||
if (cur_state.draw.uniform_buffer == handle) {
|
||||
cur_state.draw.uniform_buffer = 0;
|
||||
if (draw.uniform_buffer == handle) {
|
||||
draw.uniform_buffer = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void OpenGLState::ResetVertexArray(GLuint handle) {
|
||||
if (cur_state.draw.vertex_array == handle) {
|
||||
cur_state.draw.vertex_array = 0;
|
||||
OpenGLState& OpenGLState::ResetVertexArray(GLuint handle) {
|
||||
if (draw.vertex_array == handle) {
|
||||
draw.vertex_array = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void OpenGLState::ResetFramebuffer(GLuint handle) {
|
||||
if (cur_state.draw.read_framebuffer == handle) {
|
||||
cur_state.draw.read_framebuffer = 0;
|
||||
OpenGLState& OpenGLState::ResetFramebuffer(GLuint handle) {
|
||||
if (draw.read_framebuffer == handle) {
|
||||
draw.read_framebuffer = 0;
|
||||
}
|
||||
if (cur_state.draw.draw_framebuffer == handle) {
|
||||
cur_state.draw.draw_framebuffer = 0;
|
||||
if (draw.draw_framebuffer == handle) {
|
||||
draw.draw_framebuffer = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
@ -124,25 +124,40 @@ public:
|
||||
GLuint shader_program; // GL_CURRENT_PROGRAM
|
||||
} draw;
|
||||
|
||||
struct {
|
||||
bool enabled; // GL_SCISSOR_TEST
|
||||
GLint x;
|
||||
GLint y;
|
||||
GLsizei width;
|
||||
GLsizei height;
|
||||
} scissor;
|
||||
|
||||
struct {
|
||||
GLint x;
|
||||
GLint y;
|
||||
GLsizei width;
|
||||
GLsizei height;
|
||||
} viewport;
|
||||
|
||||
std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE
|
||||
|
||||
OpenGLState();
|
||||
|
||||
/// Get the currently active OpenGL state
|
||||
static const OpenGLState& GetCurState() {
|
||||
static OpenGLState GetCurState() {
|
||||
return cur_state;
|
||||
}
|
||||
|
||||
/// Apply this state as the current OpenGL state
|
||||
void Apply() const;
|
||||
|
||||
/// Resets and unbinds any references to the given resource in the current OpenGL state
|
||||
static void ResetTexture(GLuint handle);
|
||||
static void ResetSampler(GLuint handle);
|
||||
static void ResetProgram(GLuint handle);
|
||||
static void ResetBuffer(GLuint handle);
|
||||
static void ResetVertexArray(GLuint handle);
|
||||
static void ResetFramebuffer(GLuint handle);
|
||||
/// Resets any references to the given resource
|
||||
OpenGLState& ResetTexture(GLuint handle);
|
||||
OpenGLState& ResetSampler(GLuint handle);
|
||||
OpenGLState& ResetProgram(GLuint handle);
|
||||
OpenGLState& ResetBuffer(GLuint handle);
|
||||
OpenGLState& ResetVertexArray(GLuint handle);
|
||||
OpenGLState& ResetFramebuffer(GLuint handle);
|
||||
|
||||
private:
|
||||
static OpenGLState cur_state;
|
||||
|
@ -11,7 +11,7 @@ namespace Pica {
|
||||
namespace Shader {
|
||||
struct OutputVertex;
|
||||
}
|
||||
}
|
||||
} // namespace Pica
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
@ -22,6 +22,7 @@ class SWRasterizer : public RasterizerInterface {
|
||||
void NotifyPicaRegisterChanged(u32 id) override {}
|
||||
void FlushAll() override {}
|
||||
void FlushRegion(PAddr addr, u32 size) override {}
|
||||
void InvalidateRegion(PAddr addr, u32 size) override {}
|
||||
void FlushAndInvalidateRegion(PAddr addr, u32 size) override {}
|
||||
};
|
||||
|
||||
|
@ -9,9 +9,9 @@
|
||||
namespace VideoCore {
|
||||
|
||||
// 8x8 Z-Order coordinate from 2D coordinates
|
||||
static inline u32 MortonInterleave(u32 x, u32 y) {
|
||||
static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
|
||||
static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
|
||||
static constexpr u32 MortonInterleave(u32 x, u32 y) {
|
||||
constexpr u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
|
||||
constexpr u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
|
||||
return xlut[x % 8] + ylut[y % 8];
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user