Implement utils::memory_map_fd (partial)

Improve JIT profiling dump format (data + name, mmap)
Improve objdump interception util (better speed, fix bugs)
Rename spu_ubertrampoline to __ub+number
This commit is contained in:
Nekotekina 2022-01-26 04:48:20 +03:00
parent ffe00e8619
commit dba2baba9c
5 changed files with 139 additions and 68 deletions

View file

@ -24,34 +24,61 @@ void jit_announce(uptr func, usz size, std::string_view name)
return;
}
if (!name.empty())
// If directory ASMJIT doesn't exist, nothing will be written
static constexpr u64 c_dump_size = 0x1'0000'0000;
static constexpr u64 c_index_size = c_dump_size / 16;
static atomic_t<u64> g_index_off = 0;
static atomic_t<u64> g_data_off = c_index_size;
static void* g_asm = []() -> void*
{
// If directory ASMJIT doesn't exist, nothing will be written
static const fs::file s_asm = []()
{
fs::remove_all(fs::get_cache_dir() + "/ASMJIT/", false);
fs::remove_all(fs::get_cache_dir() + "/ASMJIT/", false);
return fs::file(fmt::format("%s/ASMJIT/.objects", fs::get_cache_dir()), fs::rewrite + fs::append);
}();
fs::file objs(fmt::format("%s/ASMJIT/.objects", fs::get_cache_dir()), fs::read + fs::rewrite);
if (s_asm)
if (!objs || !objs.trunc(c_dump_size))
{
// Dump object: addr + size + bytes
s_asm.write(fmt::format("%s%s%s",
std::string_view(reinterpret_cast<char*>(&func), 8),
std::string_view(reinterpret_cast<char*>(&size), 8),
std::string_view(reinterpret_cast<char*>(func), size)));
return nullptr;
}
if (s_asm && name[0] != '_')
{
// Save some objects separately
fs::file dump(fmt::format("%s/ASMJIT/%s", fs::get_cache_dir(), name), fs::rewrite);
return utils::memory_map_fd(objs.get_handle(), c_dump_size, utils::protection::rw);
}();
if (dump)
{
dump.write(reinterpret_cast<uchar*>(func), size);
}
if (g_asm && size < c_index_size)
{
struct entry
{
u64 addr; // RPCS3 process address
u32 size; // Function size
u32 off; // Function offset
};
// Write index entry at the beginning of file, and data + NTS name at fixed offset
const u64 index_off = g_index_off.fetch_add(1);
const u64 size_all = size + name.size() + 1;
const u64 data_off = g_data_off.fetch_add(size_all);
// If either index or data area is exhausted, nothing will be written
if (index_off < c_index_size / sizeof(entry) && data_off + size_all < c_dump_size)
{
entry& index = static_cast<entry*>(g_asm)[index_off];
std::memcpy(static_cast<char*>(g_asm) + data_off, reinterpret_cast<char*>(func), size);
std::memcpy(static_cast<char*>(g_asm) + data_off + size, name.data(), name.size());
index.size = static_cast<u32>(size);
index.off = static_cast<u32>(data_off);
atomic_storage<u64>::store(index.addr, func);
}
}
if (g_asm && !name.empty() && name[0] != '_')
{
// Save some objects separately
fs::file dump(fmt::format("%s/ASMJIT/%s", fs::get_cache_dir(), name), fs::rewrite);
if (dump)
{
dump.write(reinterpret_cast<uchar*>(func), size);
}
}

View file

@ -21,10 +21,7 @@
#include <cstdint>
#include <unistd.h>
#include <sys/file.h>
#include <sys/wait.h>
#include <sys/sendfile.h>
#include <spawn.h>
#include <unordered_map>
#include <sys/mman.h>
#include <string>
#include <vector>
#include <charconv>
@ -53,58 +50,92 @@ int main(int argc, char* argv[])
// Get cache path
home += "/rpcs3/ASMJIT/";
// Get object names
// Get objects
int fd = open((home + ".objects").c_str(), O_RDONLY);
if (fd < 0)
return 1;
// Addr -> offset;size in .objects
std::unordered_map<std::uint64_t, std::pair<std::uint64_t, std::uint64_t>> objects;
// Map 4GiB (full size)
const auto data = mmap(nullptr, 0x10000'0000, PROT_READ, MAP_SHARED, fd, 0);
while (true)
struct entry
{
// Size is name size, not object size
std::uint64_t ptr, size;
if (read(fd, &ptr, 8) != 8 || read(fd, &size, 8) != 8)
break;
std::uint64_t off = lseek(fd, 0, SEEK_CUR);
objects.emplace(ptr, std::make_pair(off, size));
lseek(fd, size, SEEK_CUR);
}
std::uint64_t addr;
std::uint32_t size;
std::uint32_t off;
};
// Index part (precedes actual data)
const auto index = static_cast<const entry*>(data);
const entry* found = nullptr;
std::string out_file;
std::vector<std::string> args;
std::uint64_t addr = 0;
for (int i = 0; i < argc; i++)
{
// Replace args
std::string arg = argv[i];
if (arg.find("--start-address=0x") == 0)
if (std::uintptr_t(data) != -1 && arg.find("--start-address=0x") == 0)
{
// Decode address and try to find the object
std::uint64_t addr = -1;
std::from_chars(arg.data() + strlen("--start-address=0x"), arg.data() + arg.size(), addr, 16);
if (objects.count(addr))
for (int j = 0; j < 0x100'0000; j++)
{
// Extract object into a tmp file
lseek(fd, objects[addr].first, SEEK_SET);
const int fd2 = open("/tmp/rpcs3.objdump.tmp", O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
sendfile(fd2, fd, nullptr, objects[addr].second);
close(fd2);
if (index[j].addr == 0)
{
break;
}
if (index[j].addr == addr)
{
found = index + j;
break;
}
}
if (found)
{
// Extract object into a new file (read file name from the mapped memory)
const char* name = static_cast<char*>(data) + found->off + found->size;
if (name[0])
{
out_file = home + name;
}
else
{
out_file = "/tmp/rpcs3.objdump." + std::to_string(getpid());
unlink(out_file.c_str());
}
const int fd2 = open(out_file.c_str(), O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
if (fd2 > 0)
{
// Don't overwrite if exists
write(fd2, static_cast<char*>(data) + found->off, found->size);
close(fd2);
}
args.emplace_back("--adjust-vma=" + to_hex(addr));
continue;
}
}
if (objects.count(addr) && arg.find("--stop-address=0x") == 0)
if (found && arg.find("--stop-address=0x") == 0)
{
continue;
}
if (objects.count(addr) && arg == "-d")
if (found && arg == "-d")
{
arg = "-D";
}
@ -117,14 +148,14 @@ int main(int argc, char* argv[])
args.emplace_back(std::move(arg));
}
if (objects.count(addr))
if (found)
{
args.pop_back();
args.emplace_back("-b");
args.emplace_back("binary");
args.emplace_back("-m");
args.emplace_back("i386");
args.emplace_back("/tmp/rpcs3.objdump.tmp");
args.emplace_back("i386:x86-64");
args.emplace_back(std::move(out_file));
}
args[0] = "/usr/bin/objdump";
@ -138,12 +169,11 @@ int main(int argc, char* argv[])
new_argv.push_back(nullptr);
if (objects.count(addr))
if (found)
{
int fds[2];
pipe(fds);
// objdump is broken; fix address truncation
if (fork() > 0)
{
close(fds[1]);
@ -158,20 +188,6 @@ int main(int argc, char* argv[])
if (c == '\n')
{
// Replace broken address
if ((buf[0] >= '0' && buf[0] <= '9') || (buf[0] >= 'a' && buf[0] <= 'f'))
{
std::uint64_t ptr = -1;
auto cvt = std::from_chars(buf.data(), buf.data() + buf.size(), ptr, 16);
if (cvt.ec == std::errc() && ptr < addr)
{
auto fix = to_hex((ptr - std::uint32_t(addr)) + addr, false);
write(STDOUT_FILENO, fix.data(), fix.size());
buf = std::string(cvt.ptr);
}
}
write(STDOUT_FILENO, buf.data(), buf.size());
buf.clear();
}

View file

@ -1074,7 +1074,9 @@ spu_function_t spu_runtime::rebuild_ubertrampoline(u32 id_inst)
workload.clear();
result = reinterpret_cast<spu_function_t>(reinterpret_cast<u64>(wxptr));
jit_announce(wxptr, raw - wxptr, "spu_ubertrampoline");
std::string fname;
fmt::append(fname, "__ub%u", m_flat_list.size());
jit_announce(wxptr, raw - wxptr, fname);
}
if (auto _old = stuff_it->trampoline.compare_and_swap(nullptr, result))

View file

@ -5,6 +5,12 @@
namespace utils
{
#ifdef _WIN32
using native_handle = void*;
#else
using native_handle = int;
#endif
// Memory protection type
enum class protection
{
@ -43,6 +49,9 @@ namespace utils
// Lock pages in memory
bool memory_lock(void* pointer, usz size);
// Map file descriptor
void* memory_map_fd(native_handle fd, usz size, protection prot);
// Shared memory handle
class shm
{

View file

@ -296,6 +296,23 @@ namespace utils
#endif
}
void* memory_map_fd(native_handle fd, usz size, protection prot)
{
#ifdef _WIN32
// TODO
return nullptr;
#else
const auto result = ::mmap(nullptr, size, +prot, MAP_SHARED, fd, 0);
if (result == reinterpret_cast<void*>(uptr{umax}))
{
[[unlikely]] return nullptr;
}
return result;
#endif
}
shm::shm(u32 size, u32 flags)
: m_flags(flags)
, m_size(utils::align(size, 0x10000))