Compare commits

...

8 commits

Author SHA1 Message Date
Jakly 96b96734cf
Merge 4199c99cb6 into 10798c3464 2024-05-07 17:11:27 +00:00
Jaklyy 4199c99cb6 fix a bunch of shit 2024-05-07 13:07:08 -04:00
Jaklyy 1f411c17d0 correctly handle shadow masks with the raster scfg bit 2024-05-06 23:38:46 -04:00
Nadia Holmquist Pedersen 10798c3464 fix README build badges finally 2024-05-05 08:40:37 +02:00
Nadia Holmquist Pedersen ee2c6cc7c2 actually add the cmake script too 2024-05-05 08:14:00 +02:00
Nadia Holmquist Pedersen 474bf6e784 Set default optimization flags less intrusively 2024-05-05 08:10:21 +02:00
Nadia Holmquist Pedersen 35cea5e1d7 Fix zstd ROM loading issues
* fix use-after-free of inContent
* don't try to free the DStream twice
2024-05-04 18:16:24 +02:00
Jakly 6112aa120a
Pu region sizing/bounds fix (#2024)
* fix the pu region's end point overflowing

According to gericom it cannot overflow at all

* set a minimum and a better maximum for the pu region size

* fix pu logging

* PU regions with a size of 31 always take up the entire address space

also tweak some logging a little more

* start is actually force aligned by size, oops

* small tweaks

* hopefully more clear code

* math is for nerds
2024-05-02 17:44:59 +02:00
9 changed files with 69 additions and 27 deletions

View file

@ -8,6 +8,7 @@ endif()
set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
set(CMAKE_USER_MAKE_RULES_OVERRIDE "${CMAKE_SOURCE_DIR}/cmake/DefaultBuildFlags.cmake")
option(USE_VCPKG "Use vcpkg for dependency packages" OFF)
if (USE_VCPKG)
@ -78,14 +79,6 @@ if (ENABLE_LTO)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
endif()
if (CMAKE_CXX_COMPILER_ID STREQUAL GNU)
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -Og")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og")
endif()
string(REPLACE "-O2" "-O3" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}")
string(REPLACE "-O2" "-O3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
if (NOT APPLE)
set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} -s")
endif()

View file

@ -6,10 +6,9 @@
<a href="https://www.gnu.org/licenses/gpl-3.0" alt="License: GPLv3"><img src="https://img.shields.io/badge/License-GPL%20v3-%23ff554d.svg"></a>
<a href="https://kiwiirc.com/client/irc.badnik.net/?nick=IRC-Source_?#melonds" alt="IRC channel: #melonds"><img src="https://img.shields.io/badge/IRC%20chat-%23melonds-%23dd2e44.svg"></a>
<br>
<a href="https://github.com/melonDS-emu/melonDS/actions?query=workflow%3A%22CMake+Build+%28Windows+x86-64%29%22+event%3Apush"><img src="https://img.shields.io/github/actions/workflow/status/melonDS-emu/melonDS/build-windows.yml?label=Windows%20x86-64&logo=GitHub&branch=master"></img></a>
<a href="https://github.com/melonDS-emu/melonDS/actions?query=workflow%3A%22CMake+Build+%28Ubuntu+x86-64%29%22+event%3Apush"><img src="https://img.shields.io/github/actions/workflow/status/melonDS-emu/melonDS/build-ubuntu.yml?label=Linux%20x86-64&logo=GitHub"></img></a>
<a href="https://github.com/melonDS-emu/melonDS/actions?query=workflow%3A%22CMake+Build+%28Ubuntu+aarch64%29%22+event%3Apush"><img src="https://img.shields.io/github/actions/workflow/status/melonDS-emu/melonDS/build-ubuntu-aarch64.yml?label=Linux%20ARM64&logo=GitHub"></img></a>
<a href="https://github.com/melonDS-emu/melonDS/actions/workflows/build-macos-universal.yml?query=event%3Apush"><img src="https://img.shields.io/github/actions/workflow/status/melonDS-emu/melonDS/build-macos.yml?label=macOS%20Universal&logo=GitHub"></img></a>
<a href="https://github.com/melonDS-emu/melonDS/actions/workflows/build-windows.yml?query=event%3Apush"><img src="https://github.com/melonDS-emu/melonDS/actions/workflows/build-windows.yml/badge.svg" /></a>
<a href="https://github.com/melonDS-emu/melonDS/actions/workflows/build-ubuntu.yml?query=event%3Apush"><img src="https://github.com/melonDS-emu/melonDS/actions/workflows/build-ubuntu.yml/badge.svg" /></a>
<a href="https://github.com/melonDS-emu/melonDS/actions/workflows/build-macos.yml?query=event%3Apush"><img src="https://github.com/melonDS-emu/melonDS/actions/workflows/build-macos.yml/badge.svg" /></a>
</p>
DS emulator, sorta

View file

@ -0,0 +1,9 @@
if (CMAKE_C_COMPILER_ID STREQUAL GNU)
set(CMAKE_C_FLAGS_DEBUG_INIT "-g -Og")
endif()
if (CMAKE_CXX_COMPILER_ID STREQUAL GNU)
set(CMAKE_CXX_FLAGS_DEBUG_INIT "-g -Og")
endif()
string(REPLACE "-O2" "-O3" CMAKE_C_FLAGS_RELEASE_INIT "${CMAKE_C_FLAGS_RELEASE_INIT}")
string(REPLACE "-O2" "-O3" CMAKE_CXX_FLAGS_RELEASE_INIT "${CMAKE_CXX_FLAGS_RELEASE_INIT}")

View file

@ -186,10 +186,14 @@ void ARMv5::UpdatePURegion(u32 n)
return;
}
u32 start = rgn >> 12;
u32 sz = 2 << ((rgn >> 1) & 0x1F);
u32 end = start + (sz >> 12);
// TODO: check alignment of start
// notes:
// * min size of a pu region is 4KiB (12 bits)
// * size is calculated as size + 1, but the 12 lsb of address space are ignored, therefore we need it as size + 1 - 12, or size - 11
// * pu regions are aligned based on their size
u32 size = std::max((int)((rgn>>1) & 0x1F) - 11, 0); // obtain the size, subtract 11 and clamp to a min of 0.
u32 start = ((rgn >> 12) >> size) << size; // determine the start offset, and use shifts to force alignment with a multiple of the size.
u32 end = start + (1<<size); // add 1 left shifted by size to start to determine end point
// dont need to bounds check the end point because the force alignment inherently prevents it from breaking
u8 usermask = 0;
u8 privmask = 0;
@ -239,7 +243,7 @@ void ARMv5::UpdatePURegion(u32 n)
"PU region %d: %08X-%08X, user=%02X priv=%02X, %08X/%08X\n",
n,
start << 12,
end << 12,
(end << 12) - 1,
usermask,
privmask,
PU_DataRW,
@ -579,12 +583,12 @@ void ARMv5::CP15Write(u32 id, u32 val)
std::snprintf(log_output,
sizeof(log_output),
"PU: region %d = %08X : %s, %08X-%08X\n",
"PU: region %d = %08X : %s, start: %08X size: %02X\n",
(id >> 4) & 0xF,
val,
val & 1 ? "enabled" : "disabled",
val & 0xFFFFF000,
(val & 0xFFFFF000) + (2 << ((val & 0x3E) >> 1))
(val & 0x3E) >> 1
);
Log(LogLevel::Debug, "%s", log_output);
// Some implementations of Log imply a newline, so we build up the line before printing it

View file

@ -299,6 +299,7 @@ void GPU3D::Reset() noexcept
NumPolygons = 0;
CurRAMBank = 0;
ShadowSent = true;
FlushRequest = 0;
FlushAttributes = 0;
@ -358,6 +359,7 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
file->Var32(&RenderClearAttr2);
file->Bool32(&RenderRasterRev);
file->Bool32(&ShadowSent);
file->Var16(&RenderXPos);
@ -463,6 +465,7 @@ void GPU3D::DoSavestate(Savestate* file) noexcept
file->Bool32(&poly->FacingView);
file->Bool32(&poly->Translucent);
file->Bool32(&poly->ClearStencil);
file->Bool32(&poly->IsShadowMask);
file->Bool32(&poly->IsShadow);
@ -1225,6 +1228,18 @@ void GPU3D::SubmitPolygon() noexcept
poly->IsShadowMask = ((CurPolygonAttr & 0x3F000030) == 0x00000030);
poly->IsShadow = ((CurPolygonAttr & 0x30) == 0x30) && !poly->IsShadowMask;
// yes we need specifically the rasterizer bit, not the gx bit
poly->ClearStencil = false;
if (NDS.GetSCFGRasterBit() && (FlushAttributes & 1) && poly->Translucent)
{
if (poly->IsShadowMask && ShadowSent)
{
ShadowSent = false;
poly->ClearStencil = true;
}
}
if (poly->IsShadow) ShadowSent = true;
if (!poly->Translucent) NumOpaquePolygons++;
@ -2056,6 +2071,7 @@ void GPU3D::ExecuteCommand() noexcept
case 0x50: // flush
VertexPipelineCmdDelayed4();
ShadowSent = true;
FlushRequest = 1;
FlushAttributes = entry.Param & 0x3;
CycleCount = 325;

View file

@ -68,6 +68,7 @@ struct Polygon
bool FacingView;
bool Translucent;
bool ClearStencil; // used by the dsi's revised rasterizer mode
bool IsShadowMask;
bool IsShadow;
@ -328,6 +329,8 @@ public:
// used to fix stencil buffer's frame-to-frame persistence not working properly under extreme misuse of shadow masks/shadows
bool ForceRerender = false;
bool DontRerenderLoop = false;
bool ShadowSent = false; // used to track whether the clear stencil flag should be set when the raster scfg bit is active
u32 FlushRequest = 0;
u32 FlushAttributes = 0;

View file

@ -125,6 +125,8 @@ void SoftRenderer::Reset(GPU& gpu)
ShadowRendered[0] = false;
ShadowRendered[1] = false;
ShadowRenderedi[0] = false;
ShadowRenderedi[1] = false;
SetupRenderThread(gpu);
EnableRenderThread();
}
@ -762,10 +764,9 @@ void SoftRenderer::RenderShadowMaskScanline(const GPU3D& gpu3d, RendererPolygon*
bool (*fnDepthTest)(s32 dstz, s32 z, u32 attr, u32 dstattr, u8 flags);
// stencil buffer is only cleared when beginning a shadow mask after a shadow polygon is rendered
// the "Revised" Rasterizer Circuit bugs out stencil buffer clearing
// TODO: under some circumstances that i am struggling to reproduce it is possible for this bit to actually *fix* bugs with misused shadow masks
// while this is in effect, toggling the scfg bit appears to glitch the stencil buffer for a frame?
if (ShadowRendered[y&0x1] && !gpu3d.RenderRasterRev)
// the state of whether a polygon was or wasn't rendered can persist between frames
// the Revised Rasterizer Circuit handles clearing the stencil buffer elsewhere
if (ShadowRendered[y&0x1] && (!gpu3d.RenderRasterRev || polygon->ClearStencil))
{
StencilCleared = true;
memset(&StencilBuffer[256 * (y&0x1)], 0, 256);
@ -961,7 +962,7 @@ void SoftRenderer::RenderPolygonScanline(GPU& gpu, RendererPolygon* rp, s32 y)
if (polygon->IsShadow)
{
ShadowRendered[y&0x1] = true;
if (!gpu.GPU3D.RenderRasterRev || !polygon->Translucent) ShadowRendered[y&0x1] = true;
if (wireframe) return; // TODO: this probably still counts towards timings.
if (!StencilCleared)
{
@ -1593,6 +1594,22 @@ void SoftRenderer::RenderScanline(GPU& gpu, s32 y, int npolys)
{
RendererPolygon* rp = &PolygonList[i];
Polygon* polygon = rp->PolyData;
//we actually handle clearing the stencil buffer here when the revision bit is set, this allows for a polygon to clear it on every scanline, even ones it isn't part of.
if (gpu.GPU3D.RenderRasterRev)
{
if (polygon->ClearStencil && polygon->Translucent && ShadowRenderedi[(y&0x1)])
{
StencilCleared = true;
memset(&StencilBuffer[256 * (y&0x1)], 0, 256);
ShadowRenderedi[(y&0x1)] = false;
}
else if (polygon->IsShadow && polygon->Translucent)
{
ShadowRenderedi[(y&0x1)] = true;
}
}
if (y >= polygon->YTop && (y < polygon->YBottom || (y == polygon->YTop && polygon->YBottom == polygon->YTop)))
{

View file

@ -510,6 +510,7 @@ private:
u8 StencilBuffer[256*2];
bool ShadowRendered[2];
bool ShadowRenderedi[2];
bool StencilCleared;
bool Enabled;

View file

@ -948,8 +948,8 @@ u32 DecompressROM(const u8* inContent, const u32 inSize, unique_ptr<u8[]>& outCo
if (realSize != ZSTD_CONTENTSIZE_UNKNOWN)
{
outContent = make_unique<u8[]>(realSize);
u64 decompressed = ZSTD_decompress(outContent.get(), realSize, inContent, inSize);
auto newOutContent = make_unique<u8[]>(realSize);
u64 decompressed = ZSTD_decompress(newOutContent.get(), realSize, inContent, inSize);
if (ZSTD_isError(decompressed))
{
@ -957,6 +957,7 @@ u32 DecompressROM(const u8* inContent, const u32 inSize, unique_ptr<u8[]>& outCo
return 0;
}
outContent = std::move(newOutContent);
return realSize;
}
else
@ -1011,7 +1012,6 @@ u32 DecompressROM(const u8* inContent, const u32 inSize, unique_ptr<u8[]>& outCo
}
} while (inBuf.pos < inBuf.size);
ZSTD_freeDStream(dStream);
outContent = make_unique<u8[]>(outBuf.pos);
memcpy(outContent.get(), outBuf.dst, outBuf.pos);