Move memory trapping infrastructure outside of NCE

This commit is contained in:
lynxnb 2024-03-01 00:00:15 +01:00 committed by TheASVigilante
parent 9dd03dc2bb
commit 83111c2ae7
21 changed files with 319 additions and 264 deletions

View file

@ -141,6 +141,7 @@ add_library(skyline SHARED
${source_DIR}/skyline/common/spin_lock.cpp
${source_DIR}/skyline/common/uuid.cpp
${source_DIR}/skyline/common/trace.cpp
${source_DIR}/skyline/common/trap_manager.cpp
${source_DIR}/skyline/logger/logger.cpp
${source_DIR}/skyline/nce/guest.S
${source_DIR}/skyline/nce.cpp

View file

@ -3,6 +3,8 @@
#pragma once
#include <list>
#include <vector>
#include "utils.h"
#include "span.h"

View file

@ -0,0 +1,173 @@
// SPDX-License-Identifier: GPL-3.0-or-later
// Copyright © 2024 Strato Team and Contributors (https://github.com/strato-emu/)
#include <sys/mman.h>
#include "trace.h"
#include "trap_manager.h"
namespace skyline {
CallbackEntry::CallbackEntry(TrapProtection protection, LockCallback lockCallback, TrapCallback readCallback, TrapCallback writeCallback) : protection{protection}, lockCallback{std::move(lockCallback)}, readCallback{std::move(readCallback)}, writeCallback{std::move(writeCallback)} {}
constexpr TrapHandle::TrapHandle(const TrapMap::GroupHandle &handle) : TrapMap::GroupHandle(handle) {}
TrapHandle TrapManager::CreateTrap(span<span<u8>> regions, const LockCallback &lockCallback, const TrapCallback &readCallback, const TrapCallback &writeCallback) {
TRACE_EVENT("host", "TrapManager::CreateTrap");
std::scoped_lock lock{trapMutex};
TrapHandle handle{trapMap.Insert(regions, CallbackEntry{TrapProtection::None, lockCallback, readCallback, writeCallback})};
return handle;
}
void TrapManager::TrapRegions(TrapHandle handle, bool writeOnly) {
TRACE_EVENT("host", "TrapManager::TrapRegions");
std::scoped_lock lock{trapMutex};
auto protection{writeOnly ? TrapProtection::WriteOnly : TrapProtection::ReadWrite};
handle->value.protection = protection;
ReprotectIntervals(handle->intervals, protection);
}
void TrapManager::RemoveTrap(TrapHandle handle) {
TRACE_EVENT("host", "TrapManager::RemoveTrap");
std::scoped_lock lock{trapMutex};
handle->value.protection = TrapProtection::None;
ReprotectIntervals(handle->intervals, TrapProtection::None);
}
void TrapManager::DeleteTrap(TrapHandle handle) {
TRACE_EVENT("host", "TrapManager::DeleteTrap");
std::scoped_lock lock{trapMutex};
handle->value.protection = TrapProtection::None;
ReprotectIntervals(handle->intervals, TrapProtection::None);
trapMap.Remove(handle);
}
void TrapManager::ReprotectIntervals(const std::vector<TrapMap::Interval> &intervals, TrapProtection protection) {
TRACE_EVENT("host", "TrapManager::ReprotectIntervals");
auto reprotectIntervalsWithFunction = [&intervals](auto getProtection) {
for (auto region : intervals) {
region = region.Align(constant::PageSize);
mprotect(region.start, region.Size(), getProtection(region));
}
};
// We need to determine the lowest protection possible for the given interval
switch (protection) {
case TrapProtection::None:
reprotectIntervalsWithFunction([&](auto region) {
auto entries{trapMap.GetRange(region)};
TrapProtection lowestProtection{TrapProtection::None};
for (const auto &entry : entries) {
auto entryProtection{entry.get().protection};
if (entryProtection > lowestProtection) {
lowestProtection = entryProtection;
if (entryProtection == TrapProtection::ReadWrite)
return PROT_NONE;
}
}
switch (lowestProtection) {
case TrapProtection::None:
return PROT_READ | PROT_WRITE | PROT_EXEC;
case TrapProtection::WriteOnly:
return PROT_READ | PROT_EXEC;
case TrapProtection::ReadWrite:
return PROT_NONE;
}
});
break;
case TrapProtection::WriteOnly:
reprotectIntervalsWithFunction([&](auto region) {
auto entries{trapMap.GetRange(region)};
for (const auto &entry : entries)
if (entry.get().protection == TrapProtection::ReadWrite)
return PROT_NONE;
return PROT_READ | PROT_EXEC;
});
break;
case TrapProtection::ReadWrite:
reprotectIntervalsWithFunction([&](auto region) {
return PROT_NONE; // No checks are needed as this is already the highest level of protection
});
break;
}
}
static TrapManager *staticTrap{nullptr};
void TrapManager::InstallStaticInstance() {
staticTrap = this;
}
bool TrapManager::TrapHandler(u8 *address, bool write) {
assert(staticTrap != nullptr);
return staticTrap->TrapHandler(address, write);
}
bool TrapManager::HandleTrap(u8 *address, bool write) {
TRACE_EVENT("host", "TrapManager::TrapHandler");
LockCallback lockCallback{};
while (true) {
if (lockCallback) {
// We want to avoid a deadlock of holding trapMutex while locking the resource inside a callback while another thread holding the resource's mutex waits on trapMutex, we solve this by quitting the loop if a callback would be blocking and attempt to lock the resource externally
lockCallback();
lockCallback = {};
}
std::scoped_lock lock(trapMutex);
// Retrieve any callbacks for the page that was faulted
auto [entries, intervals]{trapMap.GetAlignedRecursiveRange<constant::PageSize>(address)};
if (entries.empty())
return false; // There's no callbacks associated with this page
// Do callbacks for every entry in the intervals
if (write) {
for (auto entryRef : entries) {
auto &entry{entryRef.get()};
if (entry.protection == TrapProtection::None)
// We don't need to do the callback if the entry doesn't require any protection already
continue;
if (!entry.writeCallback()) {
lockCallback = entry.lockCallback;
break;
}
entry.protection = TrapProtection::None; // We don't need to protect this entry anymore
}
if (lockCallback)
continue; // We need to retry the loop because a callback was blocking
} else {
bool allNone{true}; // If all entries require no protection, we can protect to allow all accesses
for (auto entryRef : entries) {
auto &entry{entryRef.get()};
if (entry.protection < TrapProtection::ReadWrite) {
// We don't need to do the callback if the entry can already handle read accesses
allNone = allNone && entry.protection == TrapProtection::None;
continue;
}
if (!entry.readCallback()) {
lockCallback = entry.lockCallback;
break;
}
entry.protection = TrapProtection::WriteOnly; // We only need to trap writes to this entry
}
if (lockCallback)
continue; // We need to retry the loop because a callback was blocking
write = allNone;
}
int permission{PROT_READ | (write ? PROT_WRITE : 0) | PROT_EXEC};
for (const auto &interval : intervals)
// Reprotect the interval to the lowest protection level that the callbacks performed allow
mprotect(interval.start, interval.Size(), permission);
return true;
}
}
}

View file

@ -0,0 +1,99 @@
// SPDX-License-Identifier: GPL-3.0-or-later
// Copyright © 2024 Strato Team and Contributors (https://github.com/strato-emu/)
#pragma once
#include <functional>
#include <mutex>
#include "interval_map.h"
namespace skyline {
/**
* @brief The level of protection that is required for a callback entry
*/
enum class TrapProtection {
None = 0, //!< No protection is required
WriteOnly = 1, //!< Only write protection is required
ReadWrite = 2, //!< Both read and write protection are required
};
using TrapCallback = std::function<bool()>;
using LockCallback = std::function<void()>;
struct CallbackEntry {
TrapProtection protection; //!< The least restrictive protection that this callback needs to have
LockCallback lockCallback;
TrapCallback readCallback, writeCallback;
CallbackEntry(TrapProtection protection, LockCallback lockCallback, TrapCallback readCallback, TrapCallback writeCallback);
};
using TrapMap = IntervalMap<u8 *, CallbackEntry>;
/**
* @brief An opaque handle to a group of trapped region
*/
class TrapHandle : private TrapMap::GroupHandle {
constexpr TrapHandle(const TrapMap::GroupHandle &handle);
friend class TrapManager;
};
class TrapManager {
public:
/**
* @brief Creates a region of guest memory that can be trapped with a callback for when an access to it has been made
* @param lockCallback A callback to lock the resource that is being trapped, it must block until the resource is locked but unlock it prior to returning
* @param readCallback A callback for read accesses to the trapped region, it must not block and return a boolean if it would block
* @param writeCallback A callback for write accesses to the trapped region, it must not block and return a boolean if it would block
* @note The handle **must** be deleted using DeleteTrap before the NCE instance is destroyed
* @note It is UB to supply a region of host memory rather than guest memory
* @note This doesn't trap the region in itself, any trapping must be done via TrapRegions(...)
*/
TrapHandle CreateTrap(span<span<u8>> regions, const LockCallback &lockCallback, const TrapCallback &readCallback, const TrapCallback &writeCallback);
/**
* @brief Re-traps a region of memory after protections were removed
* @param writeOnly If the trap is optimally for write-only accesses, this is not guarenteed
*/
void TrapRegions(TrapHandle handle, bool writeOnly);
/**
* @brief Removes protections from a region of memory
*/
void RemoveTrap(TrapHandle handle);
/**
* @brief Deletes a trap handle and removes the protection from the region
*/
void DeleteTrap(TrapHandle handle);
/**
* @brief Handles a trap
* @param address The address that was trapped
* @param write If the access was a write
* @return If the access should be allowed
*/
bool HandleTrap(u8 *address, bool write);
/**
* @brief Installs this instance as the static instance used by the trap handler
*/
void InstallStaticInstance();
/**
* @brief The trap manager handler function
*/
static bool TrapHandler(u8 *address, bool write);
private:
/**
* @brief Reprotects the intervals to the least restrictive protection given the supplied protection
*/
void ReprotectIntervals(const std::vector<TrapMap::Interval> &intervals, TrapProtection protection);
private:
std::mutex trapMutex; //!< Synchronizes the accesses to the trap map
TrapMap trapMap; //!< A map of all intervals and corresponding callbacks that have been registered
};
}

View file

@ -25,7 +25,7 @@ namespace skyline::gpu {
// We can't just capture this in the lambda since the lambda could exceed the lifetime of the buffer
std::weak_ptr<Buffer> weakThis{shared_from_this()};
trapHandle = gpu.state.nce->CreateTrap(*guest, [weakThis] {
trapHandle = gpu.state.process->trap.CreateTrap(*guest, [weakThis] {
auto buffer{weakThis.lock()};
if (!buffer)
return;
@ -346,7 +346,7 @@ namespace skyline::gpu {
if (dirtyState == DirtyState::GpuDirty)
return;
gpu.state.nce->TrapRegions(*trapHandle, false); // This has to occur prior to any synchronization as it'll skip trapping
gpu.state.process->trap.TrapRegions(*trapHandle, false); // This has to occur prior to any synchronization as it'll skip trapping
if (dirtyState == DirtyState::CpuDirty)
SynchronizeHost(true); // Will transition the Buffer to Clean
@ -369,7 +369,7 @@ namespace skyline::gpu {
Buffer::Buffer(LinearAllocatorState<> &delegateAllocator, GPU &gpu, GuestBuffer guest, size_t id, bool direct)
: gpu{gpu},
guest{guest},
mirror{gpu.state.process->memory.CreateMirror(guest)},
mirror{gpu.state.process->memory.CreateMirror(guest)},
delegate{delegateAllocator.EmplaceUntracked<BufferDelegate>(this)},
isDirect{direct},
id{id},
@ -392,7 +392,7 @@ namespace skyline::gpu {
Buffer::~Buffer() {
if (trapHandle)
gpu.state.nce->DeleteTrap(*trapHandle);
gpu.state.process->trap.DeleteTrap(*trapHandle);
SynchronizeGuest(true);
if (mirror.valid())
munmap(mirror.data(), mirror.size());
@ -430,7 +430,7 @@ namespace skyline::gpu {
void Buffer::Invalidate() {
if (trapHandle) {
gpu.state.nce->DeleteTrap(*trapHandle);
gpu.state.process->trap.DeleteTrap(*trapHandle);
trapHandle = {};
}
@ -455,7 +455,7 @@ namespace skyline::gpu {
AdvanceSequence(); // We are modifying GPU backing contents so advance to the next sequence
if (!skipTrap)
gpu.state.nce->TrapRegions(*trapHandle, true); // Trap any future CPU writes to this buffer, must be done before the memcpy so that any modifications during the copy are tracked
gpu.state.process->trap.TrapRegions(*trapHandle, true); // Trap any future CPU writes to this buffer, must be done before the memcpy so that any modifications during the copy are tracked
}
std::memcpy(backing->data(), mirror.data(), mirror.size());
@ -483,7 +483,7 @@ namespace skyline::gpu {
}
if (!skipTrap)
gpu.state.nce->TrapRegions(*trapHandle, true);
gpu.state.process->trap.TrapRegions(*trapHandle, true);
return true;
}

View file

@ -6,7 +6,7 @@
#include <boost/functional/hash.hpp>
#include <common/linear_allocator.h>
#include <common/spin_lock.h>
#include <nce.h>
#include <common/trap_manager.h>
#include <gpu/tag_allocator.h>
#include "usage_tracker.h"
#include "megabuffer.h"
@ -60,7 +60,7 @@ namespace skyline::gpu {
std::optional<memory::Buffer> backing;
std::optional<memory::ImportedBuffer> directBacking;
std::optional<nce::NCE::TrapHandle> trapHandle{}; //!< (Staged) The handle of the traps for the guest mappings
std::optional<TrapHandle> trapHandle{}; //!< (Staged) The handle of the traps for the guest mappings
enum class DirtyState {
Clean, //!< The CPU mappings are in sync with the GPU buffer

View file

@ -30,7 +30,7 @@ namespace skyline::gpu::interconnect {
soc::gm20b::ChannelContext &channelCtx;
CommandExecutor &executor;
GPU &gpu;
nce::NCE &nce;
TrapManager &trap;
kernel::MemoryManager &memory;
};

View file

@ -28,7 +28,7 @@ namespace skyline::gpu::interconnect {
auto newIt{mirrorMap.emplace(blockMapping.data(), std::make_unique<MirrorEntry>(ctx.memory.CreateMirror(blockMapping)))};
// We need to create the trap after allocating the entry so that we have an `invalid` pointer we can pass in
auto trapHandle{ctx.nce.CreateTrap(blockMapping, [mutex = &trapMutex]() {
auto trapHandle{ctx.trap.CreateTrap(blockMapping, [mutex = &trapMutex]() {
std::scoped_lock lock{*mutex};
return;
}, []() { return true; }, [entry = newIt.first->second.get(), mutex = &trapMutex]() {
@ -42,7 +42,7 @@ namespace skyline::gpu::interconnect {
})};
// Write only trap
ctx.nce.TrapRegions(trapHandle, true);
ctx.trap.TrapRegions(trapHandle, true);
entry = newIt.first->second.get();
entry->trap = trapHandle;
@ -64,7 +64,7 @@ namespace skyline::gpu::interconnect {
entry->dirty = false;
if (entry->trapCount <= MirrorEntry::SkipTrapThreshold)
ctx.nce.TrapRegions(*entry->trap, true);
ctx.trap.TrapRegions(*entry->trap, true);
} else if (auto it{entry->cache.find(blockMapping.data() + blockOffset)}; it != entry->cache.end()) {
return it->second;
}

View file

@ -4,6 +4,7 @@
#pragma once
#include <tsl/robin_map.h>
#include <common/trap_manager.h>
#include "common.h"
namespace skyline::gpu::interconnect {
@ -18,7 +19,7 @@ namespace skyline::gpu::interconnect {
struct MirrorEntry {
span<u8> mirror;
tsl::robin_map<u8 *, std::pair<ShaderBinary, u64>> cache;
std::optional<nce::NCE::TrapHandle> trap;
std::optional<TrapHandle> trap;
static constexpr u32 SkipTrapThreshold{20}; //!< Threshold for the number of times a mirror trap needs to be hit before we fallback to always hashing
u32 trapCount{}; //!< The number of times the trap has been hit, used to avoid trapping in cases where the constant retraps would harm performance

View file

@ -11,11 +11,11 @@
namespace skyline::gpu::interconnect::kepler_compute {
KeplerCompute::KeplerCompute(GPU &gpu,
soc::gm20b::ChannelContext &channelCtx,
nce::NCE &nce,
TrapManager &trap,
kernel::MemoryManager &memoryManager,
DirtyManager &manager,
const EngineRegisterBundle &registerBundle)
: ctx{channelCtx, channelCtx.executor, gpu, nce, memoryManager},
: ctx{channelCtx, channelCtx.executor, gpu, trap, memoryManager},
pipelineState{manager, registerBundle.pipelineStateRegisters},
samplers{manager, registerBundle.samplerPoolRegisters},
textures{manager, registerBundle.texturePoolRegisters} {

View file

@ -6,6 +6,7 @@
#include <gpu/descriptor_allocator.h>
#include <gpu/interconnect/common/samplers.h>
#include <gpu/interconnect/common/textures.h>
#include <common/trap_manager.h>
#include "constant_buffers.h"
#include "pipeline_state.h"
@ -34,7 +35,7 @@ namespace skyline::gpu::interconnect::kepler_compute {
public:
KeplerCompute(GPU &gpu,
soc::gm20b::ChannelContext &channelCtx,
nce::NCE &nce,
TrapManager &trap,
kernel::MemoryManager &memoryManager,
DirtyManager &manager,
const EngineRegisterBundle &registerBundle);

View file

@ -13,11 +13,11 @@
namespace skyline::gpu::interconnect::maxwell3d {
Maxwell3D::Maxwell3D(GPU &gpu,
soc::gm20b::ChannelContext &channelCtx,
nce::NCE &nce,
TrapManager &trap,
skyline::kernel::MemoryManager &memoryManager,
DirtyManager &manager,
const EngineRegisterBundle &registerBundle)
: ctx{channelCtx, channelCtx.executor, gpu, nce, memoryManager},
: ctx{channelCtx, channelCtx.executor, gpu, trap, memoryManager},
activeState{manager, registerBundle.activeStateRegisters},
clearEngineRegisters{registerBundle.clearRegisters},
constantBuffers{manager, registerBundle.constantBufferSelectorRegisters},

View file

@ -7,6 +7,7 @@
#include <gpu/interconnect/common/samplers.h>
#include <gpu/interconnect/common/textures.h>
#include <soc/gm20b/gmmu.h>
#include <common/trap_manager.h>
#include "common.h"
#include "active_state.h"
#include "constant_buffers.h"
@ -83,7 +84,7 @@ namespace skyline::gpu::interconnect::maxwell3d {
Maxwell3D(GPU &gpu,
soc::gm20b::ChannelContext &channelCtx,
nce::NCE &nce,
TrapManager &trap,
kernel::MemoryManager &memoryManager,
DirtyManager &manager,
const EngineRegisterBundle &registerBundle);

View file

@ -155,7 +155,7 @@ namespace skyline::gpu {
// We can't just capture `this` in the lambda since the lambda could exceed the lifetime of the buffer
std::weak_ptr<Texture> weakThis{weak_from_this()};
trapHandle = gpu.state.nce->CreateTrap(mappings, [weakThis] {
trapHandle = gpu.state.process->trap.CreateTrap(mappings, [weakThis] {
auto texture{weakThis.lock()};
if (!texture)
return;
@ -633,7 +633,7 @@ namespace skyline::gpu {
Texture::~Texture() {
SynchronizeGuest(true);
if (trapHandle)
gpu.state.nce->DeleteTrap(*trapHandle);
gpu.state.process->trap.DeleteTrap(*trapHandle);
if (alignedMirror.valid())
munmap(alignedMirror.data(), alignedMirror.size());
}
@ -745,7 +745,7 @@ namespace skyline::gpu {
if (gpuDirty && dirtyState == DirtyState::Clean) {
// If a texture is Clean then we can just transition it to being GPU dirty and retrap it
dirtyState = DirtyState::GpuDirty;
gpu.state.nce->TrapRegions(*trapHandle, false);
gpu.state.process->trap.TrapRegions(*trapHandle, false);
FreeGuest();
return;
} else if (dirtyState != DirtyState::CpuDirty) {
@ -753,7 +753,7 @@ namespace skyline::gpu {
}
dirtyState = gpuDirty ? DirtyState::GpuDirty : DirtyState::Clean;
gpu.state.nce->TrapRegions(*trapHandle, !gpuDirty); // Trap any future CPU reads (optionally) + writes to this texture
gpu.state.process->trap.TrapRegions(*trapHandle, !gpuDirty); // Trap any future CPU reads (optionally) + writes to this texture
}
// From this point on Clean -> CPU dirty state transitions can occur, GPU dirty -> * transitions will always require the full lock to be held and thus won't occur
@ -791,7 +791,7 @@ namespace skyline::gpu {
std::scoped_lock lock{stateMutex};
if (gpuDirty && dirtyState == DirtyState::Clean) {
dirtyState = DirtyState::GpuDirty;
gpu.state.nce->TrapRegions(*trapHandle, false);
gpu.state.process->trap.TrapRegions(*trapHandle, false);
FreeGuest();
return;
} else if (dirtyState != DirtyState::CpuDirty) {
@ -799,7 +799,7 @@ namespace skyline::gpu {
}
dirtyState = gpuDirty ? DirtyState::GpuDirty : DirtyState::Clean;
gpu.state.nce->TrapRegions(*trapHandle, !gpuDirty); // Trap any future CPU reads (optionally) + writes to this texture
gpu.state.process->trap.TrapRegions(*trapHandle, !gpuDirty); // Trap any future CPU reads (optionally) + writes to this texture
}
auto stagingBuffer{SynchronizeHostImpl()};
@ -829,7 +829,7 @@ namespace skyline::gpu {
if (cpuDirty && dirtyState == DirtyState::Clean) {
dirtyState = DirtyState::CpuDirty;
if (!skipTrap)
gpu.state.nce->DeleteTrap(*trapHandle);
gpu.state.process->trap.DeleteTrap(*trapHandle);
return;
} else if (dirtyState != DirtyState::GpuDirty) {
return;
@ -867,9 +867,9 @@ namespace skyline::gpu {
if (!skipTrap)
if (cpuDirty)
gpu.state.nce->DeleteTrap(*trapHandle);
gpu.state.process->trap.DeleteTrap(*trapHandle);
else
gpu.state.nce->TrapRegions(*trapHandle, true); // Trap any future CPU writes to this texture
gpu.state.process->trap.TrapRegions(*trapHandle, true); // Trap any future CPU writes to this texture
}
std::shared_ptr<TextureView> Texture::GetView(vk::ImageViewType type, vk::ImageSubresourceRange range, texture::Format pFormat, vk::ComponentMapping mapping) {

View file

@ -7,7 +7,7 @@
#include <range/v3/algorithm.hpp>
#include <common/spin_lock.h>
#include <common/lockable_shared_ptr.h>
#include <nce.h>
#include "common/trap_manager.h"
#include <gpu/tag_allocator.h>
#include <gpu/memory_manager.h>
#include <gpu/usage_tracker.h>
@ -384,7 +384,7 @@ namespace skyline::gpu {
span<u8> mirror{}; //!< A contiguous mirror of all the guest mappings to allow linear access on the CPU
span<u8> alignedMirror{}; //!< The mirror mapping aligned to page size to reflect the full mapping
std::optional<nce::NCE::TrapHandle> trapHandle{}; //!< The handle of the traps for the guest mappings
std::optional<TrapHandle> trapHandle{}; //!< The handle of the traps for the guest mappings
enum class DirtyState {
Clean, //!< The CPU mappings are in sync with the GPU texture
CpuDirty, //!< The CPU mappings have been modified but the GPU texture is not up to date

View file

@ -17,7 +17,9 @@ namespace skyline::kernel::type {
return memory + (constant::TlsSlotSize * index++);
}
KProcess::KProcess(const DeviceState &state) : memory(state), KSyncObject(state, KType::KProcess) {}
KProcess::KProcess(const DeviceState &state) : memory(state), KSyncObject(state, KType::KProcess) {
trap.InstallStaticInstance();
}
KProcess::~KProcess() {
std::scoped_lock guard{threadMutex};

View file

@ -3,6 +3,7 @@
#pragma once
#include <common/trap_manager.h>
#include <vfs/npdm.h>
#include "KThread.h"
#include "KTransferMemory.h"
@ -23,6 +24,7 @@ namespace skyline {
class KProcess : public KSyncObject {
public: // We have intermittent public/private members to ensure proper construction/destruction order
MemoryManager memory;
TrapManager trap;
private:
std::mutex threadMutex; //!< Synchronizes thread creation to prevent a race between thread creation and thread killing

View file

@ -3,7 +3,7 @@
#include <fstream>
#include <cxxabi.h>
#include <unistd.h>
#include <linux/elf.h>
#include "common/signal.h"
#include "common/trace.h"
#include "os.h"
@ -150,7 +150,7 @@ namespace skyline::nce {
if (signal == SIGSEGV)
// If we get a guest access violation then we want to handle any accesses that may be from a trapped region
if (state.nce->TrapHandler(reinterpret_cast<u8 *>(info->si_addr), true))
if (TrapManager::TrapHandler(reinterpret_cast<u8 *>(info->si_addr), true))
return;
if (signal != SIGINT) {
@ -180,10 +180,8 @@ namespace skyline::nce {
*tls = nullptr;
}
static NCE *staticNce{nullptr}; //!< A static instance of NCE for use in the signal handler
void NCE::HostSignalHandler(int signal, siginfo *info, ucontext *ctx) {
if (staticNce && staticNce->TrapHandler(reinterpret_cast<u8 *>(info->si_addr), true))
if (TrapManager::TrapHandler(reinterpret_cast<u8 *>(info->si_addr), true))
return;
bool runningUnderDebugger{[]() {
@ -233,15 +231,10 @@ namespace skyline::nce {
NCE::NCE(const DeviceState &state) : state(state) {
signal::SetTlsRestorer(&NceTlsRestorer);
staticNce = this;
signal::SetGuestSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, nce::NCE::SignalHandler);
signal::SetHostSignalHandler({SIGSEGV}, nce::NCE::HostSignalHandler);
}
NCE::~NCE() {
staticNce = nullptr;
}
constexpr size_t TrampolineSize{18}; // Size of the main SVC trampoline function in u32 units
/**
@ -597,152 +590,4 @@ namespace skyline::nce {
hookIndex++;
}
}
NCE::CallbackEntry::CallbackEntry(TrapProtection protection, LockCallback lockCallback, TrapCallback readCallback, TrapCallback writeCallback) : protection{protection}, lockCallback{std::move(lockCallback)}, readCallback{std::move(readCallback)}, writeCallback{std::move(writeCallback)} {}
void NCE::ReprotectIntervals(const std::vector<TrapMap::Interval> &intervals, TrapProtection protection) {
TRACE_EVENT("host", "NCE::ReprotectIntervals");
auto reprotectIntervalsWithFunction = [&intervals](auto getProtection) {
for (auto region : intervals) {
region = region.Align(constant::PageSize);
mprotect(region.start, region.Size(), getProtection(region));
}
};
// We need to determine the lowest protection possible for the given interval
if (protection == TrapProtection::None) {
reprotectIntervalsWithFunction([&](auto region) {
auto entries{trapMap.GetRange(region)};
TrapProtection lowestProtection{TrapProtection::None};
for (const auto &entry : entries) {
auto entryProtection{entry.get().protection};
if (entryProtection > lowestProtection) {
lowestProtection = entryProtection;
if (entryProtection == TrapProtection::ReadWrite)
return PROT_NONE;
}
}
switch (lowestProtection) {
case TrapProtection::None:
return PROT_READ | PROT_WRITE | PROT_EXEC;
case TrapProtection::WriteOnly:
return PROT_READ | PROT_EXEC;
case TrapProtection::ReadWrite:
return PROT_NONE;
}
});
} else if (protection == TrapProtection::WriteOnly) {
reprotectIntervalsWithFunction([&](auto region) {
auto entries{trapMap.GetRange(region)};
for (const auto &entry : entries)
if (entry.get().protection == TrapProtection::ReadWrite)
return PROT_NONE;
return PROT_READ | PROT_EXEC;
});
} else if (protection == TrapProtection::ReadWrite) {
reprotectIntervalsWithFunction([&](auto region) {
return PROT_NONE; // No checks are needed as this is already the highest level of protection
});
}
}
bool NCE::TrapHandler(u8 *address, bool write) {
TRACE_EVENT("host", "NCE::TrapHandler");
LockCallback lockCallback{};
while (true) {
if (lockCallback) {
// We want to avoid a deadlock of holding trapMutex while locking the resource inside a callback while another thread holding the resource's mutex waits on trapMutex, we solve this by quitting the loop if a callback would be blocking and attempt to lock the resource externally
lockCallback();
lockCallback = {};
}
std::scoped_lock lock(trapMutex);
// Retrieve any callbacks for the page that was faulted
auto [entries, intervals]{trapMap.GetAlignedRecursiveRange<constant::PageSize>(address)};
if (entries.empty())
return false; // There's no callbacks associated with this page
// Do callbacks for every entry in the intervals
if (write) {
for (auto entryRef : entries) {
auto &entry{entryRef.get()};
if (entry.protection == TrapProtection::None)
// We don't need to do the callback if the entry doesn't require any protection already
continue;
if (!entry.writeCallback()) {
lockCallback = entry.lockCallback;
break;
}
entry.protection = TrapProtection::None; // We don't need to protect this entry anymore
}
if (lockCallback)
continue; // We need to retry the loop because a callback was blocking
} else {
bool allNone{true}; // If all entries require no protection, we can protect to allow all accesses
for (auto entryRef : entries) {
auto &entry{entryRef.get()};
if (entry.protection < TrapProtection::ReadWrite) {
// We don't need to do the callback if the entry can already handle read accesses
allNone = allNone && entry.protection == TrapProtection::None;
continue;
}
if (!entry.readCallback()) {
lockCallback = entry.lockCallback;
break;
}
entry.protection = TrapProtection::WriteOnly; // We only need to trap writes to this entry
}
if (lockCallback)
continue; // We need to retry the loop because a callback was blocking
write = allNone;
}
int permission{PROT_READ | (write ? PROT_WRITE : 0) | PROT_EXEC};
for (const auto &interval : intervals)
// Reprotect the interval to the lowest protection level that the callbacks performed allow
mprotect(interval.start, interval.Size(), permission);
return true;
}
}
constexpr NCE::TrapHandle::TrapHandle(const TrapMap::GroupHandle &handle) : TrapMap::GroupHandle(handle) {}
NCE::TrapHandle NCE::CreateTrap(span<span<u8>> regions, const LockCallback &lockCallback, const TrapCallback &readCallback, const TrapCallback &writeCallback) {
TRACE_EVENT("host", "NCE::CreateTrap");
std::scoped_lock lock{trapMutex};
TrapHandle handle{trapMap.Insert(regions, CallbackEntry{TrapProtection::None, lockCallback, readCallback, writeCallback})};
return handle;
}
void NCE::TrapRegions(TrapHandle handle, bool writeOnly) {
TRACE_EVENT("host", "NCE::TrapRegions");
std::scoped_lock lock{trapMutex};
auto protection{writeOnly ? TrapProtection::WriteOnly : TrapProtection::ReadWrite};
handle->value.protection = protection;
ReprotectIntervals(handle->intervals, protection);
}
void NCE::RemoveTrap(TrapHandle handle) {
TRACE_EVENT("host", "NCE::RemoveTrap");
std::scoped_lock lock{trapMutex};
handle->value.protection = TrapProtection::None;
ReprotectIntervals(handle->intervals, TrapProtection::None);
}
void NCE::DeleteTrap(TrapHandle handle) {
TRACE_EVENT("host", "NCE::DeleteTrap");
std::scoped_lock lock{trapMutex};
handle->value.protection = TrapProtection::None;
ReprotectIntervals(handle->intervals, TrapProtection::None);
trapMap.Remove(handle);
}
}

View file

@ -3,11 +3,8 @@
#pragma once
#include <sys/wait.h>
#include <linux/elf.h>
#include "common.h"
#include "hle/symbol_hooks.h"
#include "common/interval_map.h"
namespace skyline::nce {
/**
@ -19,37 +16,6 @@ namespace skyline::nce {
std::vector<hle::HookedSymbol> hookedSymbols; //!< The list of symbols that are hooked, these have a specific ordering that is hardcoded into the hooked functions
/**
* @brief The level of protection that is required for a callback entry
*/
enum class TrapProtection {
None = 0, //!< No protection is required
WriteOnly = 1, //!< Only write protection is required
ReadWrite = 2, //!< Both read and write protection are required
};
using TrapCallback = std::function<bool()>;
using LockCallback = std::function<void()>;
struct CallbackEntry {
TrapProtection protection; //!< The least restrictive protection that this callback needs to have
LockCallback lockCallback;
TrapCallback readCallback, writeCallback;
CallbackEntry(TrapProtection protection, LockCallback lockCallback, TrapCallback readCallback, TrapCallback writeCallback);
};
std::mutex trapMutex; //!< Synchronizes the accesses to the trap map
using TrapMap = IntervalMap<u8*, CallbackEntry>;
TrapMap trapMap; //!< A map of all intervals and corresponding callbacks that have been registered
/**
* @brief Reprotects the intervals to the least restrictive protection given the supplied protection
*/
void ReprotectIntervals(const std::vector<TrapMap::Interval>& intervals, TrapProtection protection);
bool TrapHandler(u8* address, bool write);
static void SvcHandler(u16 svcId, ThreadContext *ctx);
/**
@ -98,8 +64,6 @@ namespace skyline::nce {
*/
NCE(const DeviceState &state);
~NCE();
struct PatchData {
size_t size; //!< Size of the .patch section
std::vector<size_t> offsets; //!< Offsets in .text of instructions that need to be patched
@ -117,41 +81,5 @@ namespace skyline::nce {
static size_t GetHookSectionSize(span<hle::HookedSymbolEntry> entries);
void WriteHookSection(span<hle::HookedSymbolEntry> entries, span<u32> hookSection);
/**
* @brief An opaque handle to a group of trapped region
*/
class TrapHandle : private TrapMap::GroupHandle {
constexpr TrapHandle(const TrapMap::GroupHandle &handle);
friend NCE;
};
/**
* @brief Creates a region of guest memory that can be trapped with a callback for when an access to it has been made
* @param lockCallback A callback to lock the resource that is being trapped, it must block until the resource is locked but unlock it prior to returning
* @param readCallback A callback for read accesses to the trapped region, it must not block and return a boolean if it would block
* @param writeCallback A callback for write accesses to the trapped region, it must not block and return a boolean if it would block
* @note The handle **must** be deleted using DeleteTrap before the NCE instance is destroyed
* @note It is UB to supply a region of host memory rather than guest memory
* @note This doesn't trap the region in itself, any trapping must be done via TrapRegions(...)
*/
TrapHandle CreateTrap(span<span<u8>> regions, const LockCallback& lockCallback, const TrapCallback& readCallback, const TrapCallback& writeCallback);
/**
* @brief Re-traps a region of memory after protections were removed
* @param writeOnly If the trap is optimally for write-only accesses, this is not guarenteed
*/
void TrapRegions(TrapHandle handle, bool writeOnly);
/**
* @brief Removes protections from a region of memory
*/
void RemoveTrap(TrapHandle handle);
/**
* @brief Deletes a trap handle and removes the protection from the region
*/
void DeleteTrap(TrapHandle handle);
};
}

View file

@ -20,7 +20,7 @@ namespace skyline::soc::gm20b::engine {
channelCtx{channelCtx},
i2m{state, channelCtx},
dirtyManager{registers},
interconnect{*state.gpu, channelCtx, *state.nce, state.process->memory, dirtyManager, MakeEngineRegisters(registers)} {}
interconnect{*state.gpu, channelCtx, state.process->trap, state.process->memory, dirtyManager, MakeEngineRegisters(registers)} {}
__attribute__((always_inline)) void KeplerCompute::CallMethod(u32 method, u32 argument) {
LOGV("Called method in Kepler compute: 0x{:X} args: 0x{:X}", method, argument);

View file

@ -108,7 +108,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d {
syncpoints{state.soc->host1x.syncpoints},
i2m{state, channelCtx},
dirtyManager{registers},
interconnect{*state.gpu, channelCtx, *state.nce, state.process->memory, dirtyManager, MakeEngineRegisters(registers)},
interconnect{*state.gpu, channelCtx, state.process->trap, state.process->memory, dirtyManager, MakeEngineRegisters(registers)},
channelCtx{channelCtx} {
channelCtx.executor.AddFlushCallback([this]() { FlushEngineState(); });
InitializeRegisters();