- Full TLB in ARAM for GC (-DARAM_TLBCACHE & TLB-Cache.c)

- GC ROM cache is ~3.93MB (63x64Kb blocks) in ARAM
- Moved ARQ_Init and ARQ_Reset to ARAM_manager_init
- TODO: revise ROM cache or possibly remove it from ARAM to L1
This commit is contained in:
emukidid 2010-04-27 22:17:27 +00:00
parent 7105989276
commit aba3e986c1
8 changed files with 142 additions and 78 deletions

View file

@ -8,7 +8,7 @@ CFLAGS = -g -O3 -Wall $(MACHDEP) $(INCLUDE) \
-DCPU_SHUTDOWN -DSPC700_SHUTDOWN -DVAR_CYCLES -DSOUND \
-DNOASM -DNGC -DNOASM -DPIXEL_FORMAT=RGB565 \
-fno-exceptions -Wno-unused-parameter -pipe \
-DUSE_GUI -DNGC -DHW_DOL -DGLN64_GX -DUSE_TLB_CACHE -DARAM_BLOCKCACHE \
-DUSE_GUI -DNGC -DHW_DOL -DGLN64_GX -DUSE_TLB_CACHE -DARAM_BLOCKCACHE -DARAM_TLBCACHE \
-DTHREADED_AUDIO -DUSE_RECOMP_CACHE -DPPC_DYNAREC -DFASTMEM -DRELEASE -DMENU_V2\
#-DSHOW_DEBUG #-DUSE_TLB_CACHE \
#-DPPC_DYNAREC -DNO_BT -DPROFILE #-DSHOW_DEBUG #-DDEBUGON #-DPRINTGECKO
@ -75,7 +75,7 @@ OBJ =main/rom_gc.o \
r4300/ARAM-blocks.o \
r4300/Recomp-Cache-Heap.o \
gc_memory/tlb.o \
gc_memory/TLB-Cache-hash.o \
gc_memory/TLB-Cache.o \
gc_memory/memory.o \
gc_memory/ARAM.o \
gc_memory/dma.o \

View file

@ -24,6 +24,7 @@
#include <malloc.h>
#include <stdio.h>
#include <ogc/aram.h>
#include <ogc/arqueue.h>
#include "ARAM.h"
typedef struct {
@ -42,8 +43,9 @@ static int initialized=0;
AR_GetSize() == 16,777,216 bytes
bytes type
1. 16,384 DSP reserved
2. 245,760 empty
3. 12,320,768 ROM cache (47*256kb blocks)
2. 49,152 empty
3. 4,128,768 ROM cache (63*64kb blocks)
3. 8,388,608 TLB LUTs
4. 4,194,304 blocks (dynarec)
*/
@ -52,9 +54,9 @@ void ARAM_manager_init(void){
AR_Init(NULL, 0);
max_blocks = (AR_GetSize() - (256*1024) - (4*1024*1024))/BLOCK_SIZE;
max_blocks = (AR_GetSize() - (64*1024) - (4*1024*1024) - (8*1024*1024))/BLOCK_SIZE;
ARAM_blocks = malloc(max_blocks * sizeof(ARAM_block));
int i, addr = 256*1024;
int i, addr = 64*1024;
for(i=0; i<max_blocks; ++i){
ARAM_blocks[i].valid = FALSE;
ARAM_blocks[i].addr = addr;
@ -65,6 +67,8 @@ void ARAM_manager_init(void){
alloced_blocks = 0;
initialized = 1;
ARQ_Reset();
ARQ_Init();
}
void ARAM_manager_deinit(void){

View file

@ -24,7 +24,7 @@
#ifndef ARAM_H
#define ARAM_H
#define BLOCK_SIZE (256*1024)
#define BLOCK_SIZE (64*1024)
void ARAM_manager_init(void);
void ARAM_manager_deinit(void);

View file

@ -1,5 +1,5 @@
/**
* Wii64 - TLB-Cache-hash.c
* Wii64 - TLB-Cache-hash.c (Deprecated)
* Copyright (C) 2007, 2008, 2009 Mike Slegeir
* Copyright (C) 2007, 2008, 2009 emu_kidid
*

View file

@ -1,11 +1,13 @@
/**
* Wii64 - TLB-Cache.c (Deprecated)
* Copyright (C) 2007, 2008, 2009 emu_kidid
* Copyright (C) 2007, 2008, 2009 Mike Slegeir
*
* This is how the TLB LUT should be accessed, this way it won't waste RAM
* This is how the TLB LUT should be accessed, this way it won't waste RAM, but ARAM
*
* Wii64 homepage: http://www.emulatemii.com
* email address: tehpola@gmail.com
* email address: emukidid@gmail.com
*
*
* This program is free software; you can redistribute it and/
@ -20,81 +22,138 @@
*
**/
/*
FIXME: The DMA transfers seem to overflow small buffers
*/
#ifdef ARAM_TLBCACHE
#include <ogc/arqueue.h>
#include <gccore.h>
#include <stdlib.h>
#include <malloc.h>
#include <string.h>
#include <stdio.h>
#include "ARAM.h"
#include "TLB-Cache.h"
#include "../gui/DEBUG.h"
static ARQRequest ARQ_request_TLB;
#define TLB_W_TYPE 0
#define TLB_R_TYPE 1
#define TLB_W_CACHE_ADDR 0x400000
#define TLB_R_CACHE_ADDR 0x800000
#define CACHED_TLB_ENTRIES 1024
#define CACHED_TLB_SIZE CACHED_TLB_ENTRIES * 4
//TLB LUT W
unsigned long tlb_w_block[CACHED_TLB_ENTRIES] __attribute__((aligned(32))); //last 4kb chunk of ptrs pulled
static u32 tlb_w_dirty = 0;
static u32 tlb_w_last_addr = 0;
//TLB LUT R
unsigned long tlb_r_block[CACHED_TLB_ENTRIES] __attribute__((aligned(32))); //last 4kb chunk of ptrs pulled
static u32 tlb_r_dirty = 0;
static u32 tlb_r_last_addr = 0;
static int* TLB_LUT_r, * TLB_LUT_w;
// We must moved 32-byte aligned, 32 byte chunks of the TLB
static unsigned int value[8] __attribute__((aligned(32)));
static ARQRequest ARQ_request;
void TLBCache_init(void){
ARQ_Init();
// FIXME: I'm going to assume that nothings in the ARAM yet
ARAM_block_alloc_contiguous(&TLB_LUT_r, 'T', 4);
ARAM_block_alloc_contiguous(&TLB_LUT_w, 'T', 4);
// Zero out the LUTs
int chunkSize = ARQ_GetChunkSize(), offset = 0;
char* zeroes = memalign(32, chunkSize);
memset(zeroes, 0, chunkSize);
DCFlushRange(zeroes, chunkSize);
while(offset < 4 * 1024 * 1024){
ARQ_PostRequest(&ARQ_request, 0x0, AR_MRAMTOARAM, ARQ_PRIO_HI,
TLB_LUT_r + offset, zeroes, chunkSize);
ARQ_PostRequest(&ARQ_request, 0x0, AR_MRAMTOARAM, ARQ_PRIO_HI,
TLB_LUT_w + offset, zeroes, chunkSize);
offset += chunkSize;
int i = 0;
for(i=0;i<0x100000;i++) {
TLBCache_set_w(i,0);
TLBCache_set_r(i,0);
}
free(zeroes);
}
void TLBCache_deinit(void){
ARAM_block_free_contiguous(&TLB_LUT_r, 4);
ARAM_block_free_contiguous(&TLB_LUT_w, 4);
TLBCache_init();
}
unsigned int inline TLBCache_get_r(unsigned int page){
//printf("TLBCache_get_r(%08x)\n", page);
DCInvalidateRange(value, 32);
ARQ_PostRequest(&ARQ_request, 0x718, AR_ARAMTOMRAM, ARQ_PRIO_LO,
TLB_LUT_r + (page&(~0x7)), value, 32);
return value[page&0x7];
unsigned int TLBCache_get_r(unsigned int page){
int block_byte_address = ((page - (page % CACHED_TLB_ENTRIES)) * 4);
if(block_byte_address != tlb_r_last_addr) { //if addr isn't in the last block
if(tlb_r_dirty) { //current chunk needs to be written back to ARAM
ARAM_WriteTLBBlock(tlb_r_last_addr, TLB_R_TYPE);
tlb_r_dirty=0;
}
ARAM_ReadTLBBlock(block_byte_address, TLB_R_TYPE); //read the block the addr we want lies in (aligned to 4kb)
tlb_r_last_addr = block_byte_address; //the start of this block
}
return tlb_r_block[(page % CACHED_TLB_ENTRIES)];
}
unsigned int inline TLBCache_get_w(unsigned int page){
//printf("TLBCache_get_w(%08x)\n", page);
DCInvalidateRange(value, 32);
ARQ_PostRequest(&ARQ_request, 0x718, AR_ARAMTOMRAM, ARQ_PRIO_LO,
TLB_LUT_w + (page&(~0x7)), value, 32);
return value[page&0x7];
unsigned int TLBCache_get_w(unsigned int page){
int block_byte_address = ((page - (page % CACHED_TLB_ENTRIES)) * 4);
if(block_byte_address != tlb_w_last_addr) { //if addr isn't in the last block
if(tlb_w_dirty) { //current chunk needs to be written back to ARAM
ARAM_WriteTLBBlock(tlb_w_last_addr, TLB_W_TYPE);
tlb_w_dirty=0;
}
ARAM_ReadTLBBlock(block_byte_address, TLB_W_TYPE); //read the block the addr we want lies in (aligned to 4kb)
tlb_w_last_addr = block_byte_address; //the start of this block
}
return tlb_w_block[(page % CACHED_TLB_ENTRIES)];
}
void inline TLBCache_set_r(unsigned int page, unsigned int val){
//printf("TLBCache_set_r(%08x, %08x)\n", page, val);
DCInvalidateRange(value, 32);
ARQ_PostRequest(&ARQ_request, 0x718, AR_ARAMTOMRAM, ARQ_PRIO_LO,
TLB_LUT_r + (page&(~0x7)), value, 32);
value[page&0x7] = val;
DCFlushRange(value, 32);
ARQ_PostRequest(&ARQ_request, 0x718, AR_MRAMTOARAM, ARQ_PRIO_LO,
TLB_LUT_r + (page&(~0x7)), value, 32);
void TLBCache_set_r(unsigned int page, unsigned int val){
int block_byte_address = ((page - (page % CACHED_TLB_ENTRIES)) * 4);
if(block_byte_address != tlb_r_last_addr) { //the block we want to write to is not cached
ARAM_WriteTLBBlock(tlb_r_last_addr, TLB_R_TYPE); //we need to put the current chunk back into ARAM and pull out another
ARAM_ReadTLBBlock(block_byte_address, TLB_R_TYPE); //read the block the addr we want lies in (aligned to 4kb)
tlb_r_last_addr = block_byte_address; //the start of this block
}
tlb_r_block[(page % CACHED_TLB_ENTRIES)] = val; //set the value
tlb_r_dirty=1; // this block, although new, is dirty from now
}
void inline TLBCache_set_w(unsigned int page, unsigned int val){
//printf("TLBCache_set_w(%08x, %08x)\n", page, val);
DCInvalidateRange(value, 32);
ARQ_PostRequest(&ARQ_request, 0x718, AR_ARAMTOMRAM, ARQ_PRIO_LO,
TLB_LUT_w + (page&(~0x7)), value, 32);
value[page&0x7] = val;
DCFlushRange(value, 32);
ARQ_PostRequest(&ARQ_request, 0x718, AR_MRAMTOARAM, ARQ_PRIO_LO,
TLB_LUT_w + (page&(~0x7)), value, 32);
void TLBCache_set_w(unsigned int page, unsigned int val){
int block_byte_address = ((page - (page % CACHED_TLB_ENTRIES)) * 4);
if(block_byte_address != tlb_w_last_addr) { //the block we want to write to is not cached
ARAM_WriteTLBBlock(tlb_w_last_addr, TLB_W_TYPE); //we need to put the current chunk back into ARAM and pull out another
ARAM_ReadTLBBlock(block_byte_address, TLB_W_TYPE); //read the block the addr we want lies in (aligned to 4kb)
tlb_w_last_addr = block_byte_address; //the start of this block
}
tlb_w_block[(page % CACHED_TLB_ENTRIES)] = val; //set the value
tlb_w_dirty=1; // this block, although new, is dirty from now
}
void TLBCache_dump_w(gzFile *f) {
int i = 0;
for(i=0;i<0x100000;i++) {
unsigned long val = TLBCache_get_w(i);
gzwrite(f, &val, sizeof(unsigned long));
}
}
void TLBCache_dump_r(gzFile *f) {
int i = 0;
for(i=0;i<0x100000;i++) {
unsigned long val = TLBCache_get_r(i);
gzwrite(f, &val, sizeof(unsigned long));
}
}
//addr == addr of 4kb block of PowerPC_block ptrs to pull out from ARAM
void ARAM_ReadTLBBlock(u32 addr, int type)
{
int base_addr = (type == TLB_W_TYPE) ? TLB_W_CACHE_ADDR : TLB_R_CACHE_ADDR;
int dest_addr = (type == TLB_W_TYPE) ? (int)&tlb_w_block[0] : (int)&tlb_r_block[0];
ARQ_PostRequest(&ARQ_request_TLB, 0x2EAD, AR_ARAMTOMRAM, ARQ_PRIO_LO,
(int)(base_addr + addr), dest_addr, CACHED_TLB_SIZE);
DCInvalidateRange((void*)dest_addr, CACHED_TLB_SIZE);
}
//addr == addr of 4kb block of PowerPC_block ptrs to pull out from ARAM
void ARAM_WriteTLBBlock(u32 addr, int type)
{
int base_addr = (type == TLB_W_TYPE) ? TLB_W_CACHE_ADDR : TLB_R_CACHE_ADDR;
int dest_addr = (type == TLB_W_TYPE) ? (int)&tlb_w_block[0] : (int)&tlb_r_block[0];
DCFlushRange((void*)dest_addr, CACHED_TLB_SIZE);
ARQ_PostRequest(&ARQ_request_TLB, 0x10AD, AR_MRAMTOARAM, ARQ_PRIO_HI,
(int)(base_addr + addr), dest_addr, CACHED_TLB_SIZE);
}
#endif

View file

@ -54,6 +54,9 @@ void inline TLBCache_set_w(unsigned int page, unsigned int val);
void TLBCache_dump_r(gzFile *f);
void TLBCache_dump_w(gzFile *f);
void ARAM_ReadTLBBlock(unsigned int addr, int type);
void ARAM_WriteTLBBlock(unsigned int addr, int type);
#endif
#endif

View file

@ -54,7 +54,7 @@ void LoadingBar_showBar(float percent, const char* string);
#define BLOCK_MASK (BLOCK_SIZE-1)
#define OFFSET_MASK (0xFFFFFFFF-BLOCK_MASK)
#define BLOCK_SHIFT (18) //only change ME and BLOCK_SIZE in gc_memory/aram.h
#define BLOCK_SHIFT (16) //only change ME and BLOCK_SIZE in gc_memory/aram.h
#define MAX_ROMSIZE (64*1024*1024)
#define NUM_BLOCKS (MAX_ROMSIZE/BLOCK_SIZE)
@ -68,14 +68,14 @@ static char readBefore = 0;
#define L1_BLOCK_SIZE (4*1024) //63 * 4kb = ~256kb
#define L1_BLOCK_MASK (L1_BLOCK_SIZE-1)
#define L1_BLOCK_SHIFT (12)
#define L1_NUM_BLOCKS (63) //16368kb / 256kb = 63
#define L1_NUM_BLOCKS (8) //16368kb / 256kb = 63
static u8 L1[L1_NUM_BLOCKS][L1_BLOCK_SIZE];
static int L1tag[L1_NUM_BLOCKS];
static u32 L1LRU[L1_NUM_BLOCKS];
static u32 nextL1LRUValue;
#endif
ARQRequest ARQ_request;
static ARQRequest ARQ_request_ROM;
extern void showLoadProgress(float progress);
extern void pauseAudio(void);
extern void resumeAudio(void);
@ -83,8 +83,6 @@ extern BOOL hasLoadedROM;
void ROMCache_init(fileBrowser_file* file){
readBefore = 0; //de-init byteswapping
ARQ_Reset();
ARQ_Init();
ROM_too_big = (file->size) > (ARAM_block_available_contiguous() * BLOCK_SIZE);
ROM_size = (file->size);
#ifdef USE_ROM_CACHE_L1
@ -119,7 +117,7 @@ static void inline ROMCache_load_block(char* block, int rom_offset){
bytes_read = romFile_readFile(ROM_file, buffer, bytes_to_read);
byte_swap(buffer, bytes_read);
DCFlushRange(buffer, bytes_read);
ARQ_PostRequest(&ARQ_request, 0x10AD, ARQ_MRAMTOARAM, ARQ_PRIO_HI,
ARQ_PostRequest(&ARQ_request_ROM, 0x10AD, ARQ_MRAMTOARAM, ARQ_PRIO_HI,
block + offset, buffer, bytes_read);
offset += bytes_read;
@ -153,7 +151,7 @@ void ARAM_ReadFromBlock(char *block,int startOffset, int bytes, char *dest)
char* buffer = memalign(32,bytes);
ARQ_PostRequest(&ARQ_request, 0x2EAD, AR_ARAMTOMRAM, ARQ_PRIO_LO,
ARQ_PostRequest(&ARQ_request_ROM, 0x2EAD, AR_ARAMTOMRAM, ARQ_PRIO_LO,
block + startOffset, buffer, bytes);
DCInvalidateRange(buffer, bytes);
memcpy(dest, buffer+(originalStartOffset%32), originalBytes);
@ -256,7 +254,7 @@ int ROMCache_load(fileBrowser_file* file){
}
byte_swap((char*)buffer, bytes_read);
DCFlushRange(buffer, bytes_read);
ARQ_PostRequest(&ARQ_request, 0x10AD, AR_MRAMTOARAM, ARQ_PRIO_HI,
ARQ_PostRequest(&ARQ_request_ROM, 0x10AD, AR_MRAMTOARAM, ARQ_PRIO_HI,
block + offset, buffer, bytes_read);
offset += bytes_read;
@ -290,7 +288,7 @@ int ROMCache_load(fileBrowser_file* file){
}
byte_swap((char*)buffer, bytes_read);
DCFlushRange(buffer, bytes_read);
ARQ_PostRequest(&ARQ_request, 0x10AD, AR_MRAMTOARAM, ARQ_PRIO_HI,
ARQ_PostRequest(&ARQ_request_ROM, 0x10AD, AR_MRAMTOARAM, ARQ_PRIO_HI,
ROM + offset, buffer, bytes_read);
offset += bytes_read;

View file

@ -31,12 +31,12 @@
#include "ARAM-blocks.h"
#include "../gui/DEBUG.h"
extern ARQRequest ARQ_request;
static ARQRequest ARQ_request_blocks;
#define BLOCKS_CACHE_ADDR 0xC00000
#define CACHED_BLOCK_ENTRIES 1024
#define CACHED_BLOCK_SIZE CACHED_BLOCK_ENTRIES * 4
PowerPC_block* cached_block[CACHED_BLOCK_SIZE/sizeof(PowerPC_block*)] __attribute__((aligned(32))); //last 4kb chunk of ptrs pulled
PowerPC_block* cached_block[CACHED_BLOCK_ENTRIES] __attribute__((aligned(32))); //last 4kb chunk of ptrs pulled
static u32 cached_dirty = 0;
static u32 cached_last_addr = 0;
@ -69,7 +69,7 @@ void blocks_set(u32 addr, PowerPC_block* ptr){
//addr == addr of 4kb block of PowerPC_block ptrs to pull out from ARAM
void ARAM_ReadBlock(u32 addr)
{
ARQ_PostRequest(&ARQ_request, 0x2EAD, AR_ARAMTOMRAM, ARQ_PRIO_LO,
ARQ_PostRequest(&ARQ_request_blocks, 0x2EAD, AR_ARAMTOMRAM, ARQ_PRIO_LO,
(int)(BLOCKS_CACHE_ADDR + addr), (int)&cached_block[0], CACHED_BLOCK_SIZE);
DCInvalidateRange((void*)&cached_block, CACHED_BLOCK_SIZE);
}
@ -78,7 +78,7 @@ void ARAM_ReadBlock(u32 addr)
void ARAM_WriteBlock(u32 addr)
{
DCFlushRange((void*)&cached_block, CACHED_BLOCK_SIZE);
ARQ_PostRequest(&ARQ_request, 0x10AD, AR_MRAMTOARAM, ARQ_PRIO_HI,
ARQ_PostRequest(&ARQ_request_blocks, 0x10AD, AR_MRAMTOARAM, ARQ_PRIO_HI,
(int)(BLOCKS_CACHE_ADDR + addr), (int)&cached_block[0], CACHED_BLOCK_SIZE);
}