refactor: make replaceBulk truly bulk-processing, try to optimise a bit

miiverse extra load times gone from 15 seconds to 2 seconds, though that's mostly because I changed the search range 😎
This commit is contained in:
Ash Logan 2023-08-11 19:56:20 +10:00
parent 50b1c9a594
commit d62765bab9
4 changed files with 54 additions and 32 deletions

View file

@ -33,7 +33,7 @@ CFLAGS := -g -Wall -O2 -ffunction-sections \
CFLAGS += $(INCLUDE) -D__WIIU__ -D__WUT__ -D__WUPS__
CXXFLAGS := $(CFLAGS)
CXXFLAGS := $(CFLAGS) -std=c++20
ASFLAGS := -g $(ARCH)
LDFLAGS = -g $(ARCH) $(RPXSPECS) -Wl,-Map,$(notdir $*.map)

View file

@ -21,7 +21,6 @@
#include <optional>
#include <coreinit/debug.h>
#include <coreinit/filesystem.h>
#include <coreinit/memory.h>
#include <nsysnet/nssl.h>
#include "ca_pem.h" // generated at buildtime
@ -36,32 +35,32 @@ const char wave_new[] = {
0x00, 0x00, 0x00, 0x00, 0x2E, 0x70, 0x72, 0x65, 0x74, 0x65, 0x6E, 0x64,
0x6F, 0x2E, 0x63, 0x63, 0x00
};
const char miiverse_green_highlight[] = {
const unsigned char miiverse_green_highlight[] = {
0x82, 0xff, 0x05, 0xff, 0x82, 0xff, 0x05, 0xff, 0x1d, 0xff, 0x04, 0xff, 0x1d, 0xff, 0x04, 0xff
};
const char juxt_purple_highlight[] = {
const unsigned char juxt_purple_highlight[] = {
0x5d, 0x4a, 0x9a, 0xff, 0x5d, 0x4a, 0x9a, 0xff, 0x5d, 0x4a, 0x9a, 0xff, 0x5d, 0x4a, 0x9a, 0xff
};
const char miiverse_green_touch1[] = {
const unsigned char miiverse_green_touch1[] = {
0x94, 0xd9, 0x2a, 0x00, 0x57, 0xbd, 0x12, 0xff
};
const char juxt_purple_touch1[] = {
const unsigned char juxt_purple_touch1[] = {
0x5d, 0x4a, 0x9a, 0x00, 0x5d, 0x4a, 0x9a, 0xff
};
const char miiverse_green_touch2[] = {
const unsigned char miiverse_green_touch2[] = {
0x57, 0xbd, 0x12, 0x00, 0x94, 0xd9, 0x2a, 0xff
};
const char juxt_purple_touch2[] = {
const unsigned char juxt_purple_touch2[] = {
0x5d, 0x4a, 0x9a, 0x00, 0x5d, 0x4a, 0x9a, 0xff
};
static std::optional <FSFileHandle> rootca_pem_handle{};
const replacement replacements[] = {
{miiverse_green_highlight, juxt_purple_highlight},
{miiverse_green_touch1, juxt_purple_touch1},
{miiverse_green_touch2, juxt_purple_touch2},
};
static std::optional<FSFileHandle> rootca_pem_handle{};
DECL_FUNCTION(int, FSOpenFile, FSClient *client, FSCmdBlock *block, char *path, const char *mode, uint32_t *handle,
int error) {
@ -101,17 +100,11 @@ DECL_FUNCTION(FSStatus, FSReadFile, FSClient *client, FSCmdBlock *block, uint8_t
}
if (rootca_pem_handle && *rootca_pem_handle == handle) {
memset(buffer, 0, size);
strcpy((char *) buffer, (const char *) ca_pem);
strlcpy((char *) buffer, (const char *) ca_pem, size * count);
//this can't be done above (in the FSOpenFile hook) since it's not loaded yet.
replaceBulk(0x10000000, 0x10000000, miiverse_green_highlight, sizeof(miiverse_green_highlight),
juxt_purple_highlight, sizeof(juxt_purple_highlight));
replaceBulk(0x10000000, 0x10000000, miiverse_green_touch1, sizeof(miiverse_green_touch1), juxt_purple_touch1,
sizeof(juxt_purple_touch1));
replaceBulk(0x10000000, 0x10000000, miiverse_green_touch2, sizeof(miiverse_green_touch2), juxt_purple_touch2,
sizeof(juxt_purple_touch2));
//the hardcoded offsets suck but they really are at Random Places In The Heap
replaceBulk(0x11000000, 0x02000000, replacements);
return (FSStatus) count;
}

View file

@ -17,6 +17,7 @@
#include <kernel/kernel.h>
#include <coreinit/memorymap.h>
#include <algorithm>
bool replace(uint32_t start, uint32_t size, const char* original_val, size_t original_val_sz, const char* new_val, size_t new_val_sz) {
for (uint32_t addr = start; addr < start + size - original_val_sz; addr++) {
@ -32,13 +33,34 @@ bool replace(uint32_t start, uint32_t size, const char* original_val, size_t ori
return false;
}
void replaceBulk(uint32_t start, uint32_t size, const char* original_val, size_t original_val_sz, const char* new_val, size_t new_val_sz) {
for (uint32_t addr = start; addr < start + size - original_val_sz; addr++) {
int ret = memcmp(original_val, (void*)addr, original_val_sz);
if (ret == 0) {
DEBUG_FUNCTION_LINE("found bulk @%08x", addr);
KernelCopyData(OSEffectiveToPhysical(addr), OSEffectiveToPhysical((uint32_t)new_val), new_val_sz);
DEBUG_FUNCTION_LINE("new bulk @%08x", addr);
void replaceBulk(uint32_t start, uint32_t size, std::span<const replacement> replacements) {
// work out the biggest input replacement
auto max_sz = std::max_element(replacements.begin(), replacements.end(), [](auto& a, auto& b) {
return a.orig.size_bytes() < b.orig.size_bytes();
})->orig.size_bytes();
int counts[replacements.size()];
for (auto& c : counts) {
c = 0;
}
for (uint32_t addr = start; addr < start + size - max_sz; addr++) {
for (int i = 0; i < (int)replacements.size(); i++) {
const auto& replacement = replacements[i];
int ret = memcmp((void*)addr, replacement.orig.data(), replacement.orig.size_bytes());
if (ret == 0) {
KernelCopyData(
OSEffectiveToPhysical(addr),
OSEffectiveToPhysical((uint32_t)replacement.repl.data()),
replacement.repl.size_bytes()
);
counts[i]++;
break; // don't check the other replacements
}
}
}
for (auto c : counts) {
DEBUG_FUNCTION_LINE("replaced %d times", c);
}
}

View file

@ -16,6 +16,13 @@
#include <cstdint>
#include <cstddef>
#include <span>
bool replace(uint32_t start, uint32_t size, const char* original_val, size_t original_val_sz, const char* new_val, size_t new_val_sz);
void replaceBulk(uint32_t start, uint32_t size, const char* original_val, size_t original_val_sz, const char* new_val, size_t new_val_sz);
struct replacement {
std::span<const uint8_t> orig;
std::span<const uint8_t> repl;
};
void replaceBulk(uint32_t start, uint32_t size, std::span<const replacement> replacements);