Committed new interpreter gen.

This commit is contained in:
Cody Brocious 2012-06-21 08:43:54 -04:00
parent be4fdedaa1
commit 5c3ba53a4c
10 changed files with 588 additions and 20 deletions

View file

@ -1,2 +1,8 @@
#include <stdio.h>
#include <stdlib.h>
#define null NULL
#define WARN(msg) { printf("%s\n", msg); }
#define ASSERT(comp, msg) { if(!(comp)) { printf("%s\n", msg); exit(1); } }
#define BAIL(msg) { printf("%s\n", msg); exit(1); }

View file

@ -1,18 +1,20 @@
#include <stdint.h>
#define EMU_ADDR(off) (void *) (((uint8_t *) MemoryBase) + (off))
class CPU {
public:
CPU(void *base) : MemoryBase(base),
Eip(0),
Eax(0), Ebx(0), Ecx(0), Edx(0),
Ebp(0), Esp(0), Ebi(0), Edi(0) {}
Ebp(0), Esp(0), Edi(0), Esi(0) {}
virtual ~CPU() {}
virtual void Run() {}
uint32_t Eax, Ebx, Ecx, Edx,
Ebp, Esp, Ebi, Edi;
Ebp, Esp, Edi, Esi;
uint32_t Eip;
void *MemoryBase;

View file

@ -0,0 +1,90 @@
#define INSN_ADD(a, b) { a = a + b; }
#define INSN_JMP(a) { eip += a; }
#define INSN_AAA() { WARN("STUB Instruction AAA"); }
#define INSN_AAD(a) { WARN("STUB Instruction AAD"); }
#define INSN_AAM(a) { WARN("STUB Instruction AAM"); }
#define INSN_AAS() { WARN("STUB Instruction AAS"); }
#define INSN_ADC(a, b) { WARN("STUB Instruction ADC"); }
#define INSN_AND(a, b) { WARN("STUB Instruction AND"); }
#define INSN_ARPL(a, b) { WARN("STUB Instruction ARPL"); }
#define INSN_BOUND(a, b) { WARN("STUB Instruction BOUND"); }
#define INSN_CALL(a) { WARN("STUB Instruction CALL"); }
#define INSN_CDQ() { WARN("STUB Instruction CDQ"); }
#define INSN_CLC() { WARN("STUB Instruction CLC"); }
#define INSN_CLD() { WARN("STUB Instruction CLD"); }
#define INSN_CLI() { WARN("STUB Instruction CLI"); }
#define INSN_CMC() { WARN("STUB Instruction CMC"); }
#define INSN_CMP(a, b) { WARN("STUB Instruction CMP"); }
#define INSN_CMPS(a, b) { WARN("STUB Instruction CMPS"); }
#define INSN_CWDE() { WARN("STUB Instruction CWDE"); }
#define INSN_DAA() { WARN("STUB Instruction DAA"); }
#define INSN_DAS() { WARN("STUB Instruction DAS"); }
#define INSN_DEC(a) { WARN("STUB Instruction DEC"); }
#define INSN_ENTER(a, b) { WARN("STUB Instruction ENTER"); }
#define INSN_HLT() { WARN("STUB Instruction HLT"); }
#define INSN_IMUL(a, b, c) { WARN("STUB Instruction IMUL"); }
#define INSN_IN(a, b) { WARN("STUB Instruction IN"); }
#define INSN_INC(a) { WARN("STUB Instruction INC"); }
#define INSN_INS(a, b) { WARN("STUB Instruction INS"); }
#define INSN_INT(a) { WARN("STUB Instruction INT"); }
#define INSN_INT1() { WARN("STUB Instruction INT1"); }
#define INSN_INT3() { WARN("STUB Instruction INT3"); }
#define INSN_INTO() { WARN("STUB Instruction INTO"); }
#define INSN_IRET() { WARN("STUB Instruction IRET"); }
#define INSN_JB(a) { WARN("STUB Instruction JB"); }
#define INSN_JBE(a) { WARN("STUB Instruction JBE"); }
#define INSN_JCXZ(a) { WARN("STUB Instruction JCXZ"); }
#define INSN_JL(a) { WARN("STUB Instruction JL"); }
#define INSN_JLE(a) { WARN("STUB Instruction JLE"); }
#define INSN_JNB(a) { WARN("STUB Instruction JNB"); }
#define INSN_JNBE(a) { WARN("STUB Instruction JNBE"); }
#define INSN_JNL(a) { WARN("STUB Instruction JNL"); }
#define INSN_JNLE(a) { WARN("STUB Instruction JNLE"); }
#define INSN_JNO(a) { WARN("STUB Instruction JNO"); }
#define INSN_JNP(a) { WARN("STUB Instruction JNP"); }
#define INSN_JNS(a) { WARN("STUB Instruction JNS"); }
#define INSN_JNZ(a) { WARN("STUB Instruction JNZ"); }
#define INSN_JO(a) { WARN("STUB Instruction JO"); }
#define INSN_JP(a) { WARN("STUB Instruction JP"); }
#define INSN_JS(a) { WARN("STUB Instruction JS"); }
#define INSN_JZ(a) { WARN("STUB Instruction JZ"); }
#define INSN_LAHF() { WARN("STUB Instruction LAHF"); }
#define INSN_LDS(a, b) { WARN("STUB Instruction LDS"); }
#define INSN_LEA(a, b) { WARN("STUB Instruction LEA"); }
#define INSN_LEAVE() { WARN("STUB Instruction LEAVE"); }
#define INSN_LES(a, b) { WARN("STUB Instruction LES"); }
#define INSN_LODS(a, b) { WARN("STUB Instruction LODS"); }
#define INSN_LOOP(a) { WARN("STUB Instruction LOOP"); }
#define INSN_LOOPNZ(a) { WARN("STUB Instruction LOOPNZ"); }
#define INSN_LOOPZ(a) { WARN("STUB Instruction LOOPZ"); }
#define INSN_MOV(a, b) { WARN("STUB Instruction MOV"); }
#define INSN_MOVS(a, b) { WARN("STUB Instruction MOVS"); }
#define INSN_NOP() { WARN("STUB Instruction NOP"); }
#define INSN_OR(a, b) { WARN("STUB Instruction OR"); }
#define INSN_OUT(a, b) { WARN("STUB Instruction OUT"); }
#define INSN_OUTS(a, b) { WARN("STUB Instruction OUTS"); }
#define INSN_POP(a) { WARN("STUB Instruction POP"); }
#define INSN_POPA() { WARN("STUB Instruction POPA"); }
#define INSN_POPF() { WARN("STUB Instruction POPF"); }
#define INSN_PUSH(a) { WARN("STUB Instruction PUSH"); }
#define INSN_PUSHA() { WARN("STUB Instruction PUSHA"); }
#define INSN_PUSHF() { WARN("STUB Instruction PUSHF"); }
#define INSN_RETfar_off(a) { WARN("STUB Instruction RETfar_off"); }
#define INSN_RETfar() { WARN("STUB Instruction RETfar"); }
#define INSN_RETnear_off(a) { WARN("STUB Instruction RETnear_off"); }
#define INSN_RETnear() { WARN("STUB Instruction RETnear"); }
#define INSN_SAHF() { WARN("STUB Instruction SAHF"); }
#define INSN_SALC() { WARN("STUB Instruction SALC"); }
#define INSN_SBB(a, b) { WARN("STUB Instruction SBB"); }
#define INSN_SCAS(a, b) { WARN("STUB Instruction SCAS"); }
#define INSN_STC() { WARN("STUB Instruction STC"); }
#define INSN_STD() { WARN("STUB Instruction STD"); }
#define INSN_STI() { WARN("STUB Instruction STI"); }
#define INSN_STOS(a, b) { WARN("STUB Instruction STOS"); }
#define INSN_SUB(a, b) { WARN("STUB Instruction SUB"); }
#define INSN_TEST(a, b) { WARN("STUB Instruction TEST"); }
#define INSN_WAIT() { WARN("STUB Instruction WAIT"); }
#define INSN_XCHG(a, b) { WARN("STUB Instruction XCHG"); }
#define INSN_XLAT() { WARN("STUB Instruction XLAT"); }
#define INSN_XOR(a, b) { WARN("STUB Instruction XOR"); }

View file

@ -1,4 +1,5 @@
#include "Interpreter.hpp"
#include "Instructions.hpp"
void Interpreter::Run() {
while(true) {
@ -25,25 +26,10 @@ void Interpreter::Run() {
uint8_t op = *eip++;
Eip++;
printf("Op %02x\n", op);
printf("Eax %08x\n", Eax);
// Instructions
switch(op) {
case 0x05: {// ADD rAX, Iz
int32_t val;
if(opsize) { // 16-bit
val = *(int16_t *) eip;
eip += 2;
} else {
val = *(int32_t *) eip;
eip += 4;
}
Eax += val;
printf("%08x\n", Eax);
break;
}
case 0xEB: {// JMP Ib
eip += (int8_t) *eip++;
break;
}
#include "Interpreter_inst.cgen"
default: {
printf("Undefined opcode %02x @ %08X\n", op, Eip);
exit(1);
@ -52,3 +38,32 @@ void Interpreter::Run() {
Eip = (uint32_t) (eip - (uint8_t *) MemoryBase);
}
}
void *Interpreter::RmAddr(uint8_t mod, uint8_t rm, int size) {
return null;
}
void *Interpreter::Reg(int reg, int size) {
uint32_t *regaddr;
size_t off = 0;
if(reg > 4 && size == 1) {
reg -= 4;
off = 1;
}
switch(reg) {
case 0: regaddr = &Eax; break;
case 1: regaddr = &Ecx; break;
case 2: regaddr = &Edx; break;
case 3: regaddr = &Ebx; break;
case 4: regaddr = &Esp; break;
case 5: regaddr = &Ebp; break;
case 6: regaddr = &Esi; break;
case 7: regaddr = &Edi; break;
}
return ((uint8_t *) regaddr) + off;
}
uint16_t *Interpreter::SegReg(int reg) {
return null;
}

View file

@ -6,4 +6,8 @@ public:
Interpreter(void *base) : CPU(base) {
}
void Run();
void *RmAddr(uint8_t mod, uint8_t rm, int size);
void *Reg(int reg, int size);
uint16_t *SegReg(int reg);
};

View file

@ -3,7 +3,7 @@
int main(void) {
uint8_t memory[11];
memory[0] = 0x05; *((uint32_t *) (memory+1)) = 0x00000001;
memory[5] = 0x66; memory[6] = 0x05; *((uint32_t *) (memory+7)) = 0x00000002;
memory[5] = 0x66; memory[6] = 0x05; *((uint16_t *) (memory+7)) = 0x0007;
memory[9] = 0xEB; memory[10] = 0xF5;
Interpreter interpreter((void *) memory);
interpreter.Run();

View file

@ -0,0 +1,167 @@
from opcodes import opcodes
import sys
onebyte = sorted([opcd for opcd in opcodes.keys() if not isinstance(opcd, tuple)])
fp = file('Core/CPU/Interpreter/Interpreter_inst.cgen', 'w')
registers = {}
for i, reg in enumerate('AL CL DL BL AH CH DH BH'.split(' ')):
registers[reg] = i, 1
for i, reg in enumerate('AX CX DX BX SP BP SI DI'.split(' ')):
registers[reg] = i, 2
for i, reg in enumerate('EAX ECX EDX EBX ESP EBP ESI EDI'.split(' ')):
registers[reg] = i, 4
registers[reg.replace('E', 'R')] = i, 4
segmentRegisters = dict(CS=0, DS=1, SS=2, ES=3, FS=4, GS=5)
size2type = {1: 'uint8_t', 2: 'uint16_t', 4: 'uint32_t'}
for opcd in onebyte:
insn, operands = opcodes[opcd]
print >>fp, 'case 0x%02X: { // %s' % (opcd, '%s %s' % (insn, ','.join(operands)))
hasModrm = False
opsizeDependent = False
for operand in operands:
if operand[0] in 'MGERPQNVWU' and (len(operand) == 1 or operand[1] not in 'S'):
hasModrm = True
if len(operand) > 1 and operand[1] in 'vz':
opsizeDependent = True
if hasModrm:
print >>fp, '\tuint8_t mod = *eip >> 6, reg = (*eip >> 3) & 0x7, rm = *eip & 0x7;'
print >>fp, '\teip++;'
def generateSet(opsize):
def addr(type, expr):
print >>fp, '\t\t%s *_%i = (%s *) %s;' % (type, i, type, expr)
params.append('(*_%i)' % i)
def value(type, expr):
print >>fp, '\t\t%s _%i = (%s) %s;' % (type, i, type, expr)
params.append('_%i' % i)
params = []
for i, operand in enumerate(operands):
if len(operand) == 2:
a, b = list(operand)
if opsizeDependent and b in 'vz':
b = 'w' if opsize else 'd'
else:
a = b = None
if operand.upper() in registers:
reg, size = registers[operand.upper()]
addr(size2type[size], 'Reg(%i, %i)' % (reg, size))
elif operand in segmentRegisters:
reg = segmentRegisters[operand]
addr('int16_t', 'SegReg(%i)' % (reg))
elif a == 'E':
if b == 'b':
addr('int8_t', 'RmAddr(mod, rm, 1)')
elif b == 'w':
addr('int16_t', 'RmAddr(mod, rm, 2)')
elif b == 'd':
addr('int32_t', 'RmAddr(mod, rm, 4)')
else:
assert False
elif a == 'G':
if b == 'b':
addr('int8_t', 'Reg(reg, 1)')
elif b == 'w':
addr('int16_t', 'Reg(reg, 2)')
elif b == 'd':
addr('int32_t', 'Reg(reg, 4)')
else:
assert False
elif a in 'IJ':
if b == 'b':
value('int8_t', '*eip++')
elif b == 'w':
value('int16_t', '*(int16_t *) eip')
print >>fp, '\t\teip += 2;'
elif b == 'd':
value('int32_t', '*(int32_t *) eip')
print >>fp, '\t\teip += 4;'
else:
assert False
elif a == 'M':
if b == 'a':
print >>fp, '\t\tASSERT(mod != 11, "M instructions require memory operands");'
if opsize:
addr('int16_t', 'RmAddr(mod, rm, 2)')
else:
addr('int32_t', 'RmAddr(mod, rm, 4)')
elif b == 'p':
print >>fp, '\t\tBAIL("Mp instructions unsupported.");'
addr('void', '0')
else:
assert False
elif a == 'X':
if b == 'b':
addr('int8_t', 'EMU_ADDR(Esi)')
elif b == 'w':
addr('int16_t', 'EMU_ADDR(Esi)')
elif b == 'd':
addr('int32_t', 'EMU_ADDR(Esi)')
else:
assert False
elif a == 'Y':
print >>fp, '\t\tWARN("Y operands use data segment.");'
if b == 'b':
addr('int8_t', 'EMU_ADDR(Edi)')
elif b == 'w':
addr('int16_t', 'EMU_ADDR(Edi)')
elif b == 'd':
addr('int32_t', 'EMU_ADDR(Edi)')
else:
assert False
elif a == 'A':
print >>fp, '\t\tWARN("Address and no ADSIZE: support.");'
if b == 'p':
addr('int32_t', 'eip')
print >>fp, '\t\teip += 32;'
else:
assert False
elif a == 'O':
print >>fp, '\t\tWARN("Address and no ADSIZE: support.");'
if b == 'b':
addr('int8_t', 'EMU_ADDR(*(int32_t *) eip)')
print >>fp, '\t\teip += 4;'
elif b == 'w':
addr('int16_t', 'EMU_ADDR(*(int32_t *) eip)')
print >>fp, '\t\teip += 4;'
elif b == 'd':
addr('int32_t', 'EMU_ADDR(*(int32_t *) eip)')
print >>fp, '\t\teip += 4;'
else:
assert False
else:
assert False
print >>fp, '\t\tINSN_%s(%s)' % (insn, ', '.join(params))
if opsizeDependent:
print >>fp, '\tif(opsize) {'
generateSet(True)
print >>fp, '\t} else {'
generateSet(False)
print >>fp, '\t}'
else:
print >>fp, '\t{'
generateSet(False)
print >>fp, '\t}'
print >>fp, '\tbreak;'
print >>fp, '}'
stubs = []
for insn, operands in opcodes.values():
paramcount = len(operands)
if (insn, paramcount) not in stubs:
stubs.append((insn, paramcount))
stubs = sorted(stubs, key=lambda x: x[0])
for insn, paramcount in stubs:
print '#define INSN_%s(%s) { WARN("STUB Instruction %s"); }' % (insn, ', '.join(chr(ord('a') + i) for i in xrange(paramcount)), insn)

View file

@ -0,0 +1,23 @@
opcodes = {}
fp = file('Core/Generators/opcodes.txt')
for line in fp:
line = line.strip()
try:
hex, insn, operands = line.split(' ')
operands = operands.split(',')
except:
hex, insn = line.split(' ')
operands = []
if insn[0] == '-':
continue
ops = tuple(int(hex[i:i+2], 16) for i in xrange(0, len(hex), 2))
if len(ops) == 1:
ops, = ops
opcodes[ops] = insn, operands
__all__ = ('opcodes', )

256
Core/Generators/opcodes.txt Normal file
View file

@ -0,0 +1,256 @@
00 ADD Eb,Gb
01 ADD Ev,Gv
02 ADD Gb,Eb
03 ADD Gv,Ev
04 ADD AL,Ib
05 ADD rAX,Iz
06 PUSH ES
07 POP ES
10 ADC Eb,Gb
11 ADC Ev,Gv
12 ADC Gb,Eb
13 ADC Gv,Ev
14 ADC AL,Ib
15 ADC rAX,Iz
16 PUSH SS
17 POP SS
20 AND Eb,Gb
21 AND Ev,Gv
22 AND Gb,Eb
23 AND Gv,Ev
24 AND AL,Ib
25 AND rAx,Iz
26 -
27 DAA
30 XOR Eb,Gb
31 XOR Ev,Gv
32 XOR Gb,Eb
33 XOR Gv,Ev
34 XOR AL,Ib
35 XOR rAX,Iz
36 -
37 AAA
40 INC eAX
41 INC eCX
42 INC eDX
43 INC eBX
44 INC eSP
45 INC eBP
46 INC eSI
47 INC eDI
50 PUSH rAX
51 PUSH rCX
52 PUSH rDX
53 PUSH rBX
54 PUSH rSP
55 PUSH rBP
56 PUSH rSI
57 PUSH rDI
60 PUSHA
61 POPA
62 BOUND Gv,Ma
63 ARPL Ew,Gw
64 -
65 -
66 -
67 -
70 JO Jb
71 JNO Jb
72 JB Jb
73 JNB Jb
74 JZ Jb
75 JNZ Jb
76 JBE Jb
77 JNBE Jb
80 -group
81 -group
82 -group
83 -group
84 TEST Eb,Gb
85 TEST Ev,Gv
86 XCHG Eb,Gb
87 XCHG Ev,Gv
90 NOP
91 XCHG rCX,rAX
92 XCHG rDX,rAX
93 XCHG rBX,rAX
94 XCHG rSP,rAX
95 XCHG rBP,rAX
96 XCHG rSI,rAX
97 XCHG rDI,rAX
A0 MOV AL,Ov
A1 MOV rAX,Ov
A2 MOV Ov,AL
A3 MOV Ov,rAX
A4 MOVS Yb,Xb
A5 MOVS Yv,Xv
A6 CMPS Yb,Xb
A7 CMPS Yv,Xv
B0 MOV AL,Ib
B1 MOV CL,Ib
B2 MOV DL,Ib
B3 MOV BL,Ib
B4 MOV AH,Ib
B5 MOV CH,Ib
B6 MOV DH,Ib
B7 MOV BH,Ib
C0 -group
C1 -group
C2 RETnear_off Iw
C3 RETnear
C4 LES Gv,Mp
C5 LDS Gv,Mp
C6 -group
C7 -group
d0 -group
d1 -group
d2 -group
d3 -group
d4 AAM Ib
d5 AAD Ib
d6 SALC
d7 XLAT
e0 LOOPNZ Jb
e1 LOOPZ Jb
e2 LOOP Jb
e3 JCXZ Jb
e4 IN AL,Ib
e5 IN eAX,Ib
e6 OUT Ib,AL
e7 OUT Ib,eAX
f0 -
f1 INT1
f2 -
f3 -
f4 HLT
f5 CMC
f6 -group
f7 -group
08 OR Eb,Gb
09 OR Ev,Gv
0A OR Gb,Eb
0B OR Gv,Ev
0C OR AL,Ib
0D OR rAX,Iz
0E PUSH CS
0F -group
18 SBB Eb,Gb
19 SBB Ev,Gv
1A SBB Gb,Eb
1B SBB Gv,Ev
1C SBB AL,Ib
1D SBB rAX,Iz
1E PUSH DS
1F POP DS
28 SUB Eb,Gb
29 SUB Ev,Gv
2A SUB Gb,Eb
2B SUB Gv,Ev
2C SUB AL,Ib
2D SUB rAX,Iz
2E -
2F DAS
38 CMP Eb,Gb
39 CMP Ev,Gv
3A CMP Gb,Eb
3B CMP Gv,Ev
3C CMP AL,Ib
3D CMP rAX,Iz
3E -
3F AAS
48 DEC eAX
49 DEC eCX
4A DEC eDX
4B DEC eBX
4C DEC eSP
4D DEC eBP
4E DEC eSI
4F DEC eDI
58 POP rAX
59 POP rCX
5A POP rDX
5B POP rBX
5C POP rSP
5D POP rBP
5E POP rSI
5F POP rDI
68 PUSH Iz
69 IMUL Gv,Ev,Iz
6A PUSH Ib
6B IMUL Gv,Ev,Ib
6C INS Yb,DX
6D INS Yz,DX
6E OUTS DX,Xb
6F OUTS DX,Xz
78 JS Jb
79 JNS Jb
7A JP Jb
7B JNP Jb
7C JL Jb
7D JNL Jb
7E JLE Jb
7F JNLE Jb
88 MOV Eb,Gb
89 MOV Ev,Gv
8A MOV Gb,Eb
8B MOV Gv,Ev
8C --unknown
8D LEA Gv,Ma
8E --unknown
8F -group
98 CWDE
99 CDQ
9A CALL Ap
9B WAIT
9C PUSHF
9D POPF
9E SAHF
9F LAHF
A8 TEST AL,Ib
A9 TEST rAX,Iz
AA STOS Yb,AL
AB STOS Yv,rAX
AC LODS AL,Xb
AD LODS rAX,Xv
AE SCAS Yb,AL
AF SCAS Yv,rAX
B8 MOV rAX,Iv
B9 MOV rCX,Iv
BA MOV rDX,Iv
BB MOV rBX,Iv
BC MOV rSP,Iv
BD MOV rBP,Iv
BE MOV rSI,Iv
BF MOV rDI,Iv
C8 ENTER Iw,Ib
C9 LEAVE
CA RETfar_off Iw
CB RETfar
CC INT3
CD INT Ib
CE INTO
CF IRET
D8 -group
D9 -group
DA -group
DB -group
DC -group
DD -group
DE -group
DF -group
E8 CALL Jz
E9 JMP Jz
EA JMP Ap
EB JMP Jb
EC IN AL,DX
ED IN eAX,DX
EE OUT DX,AL
EF OUT DX,eAX
F8 CLC
F9 STC
FA CLI
FB STI
FC CLD
FD STD
FE -group
FF -group

View file

@ -1 +1,6 @@
env = Environment()
cgen = env.Command('Core/CPU/Interpreter/Interpreter_inst.cgen', 'Core/Generators/InterpreterGen.py', 'python $SOURCE > /dev/null')
Depends(cgen, 'Core/Generators/opcodes.txt')
Program('interpreter', Glob('Core/CPU/Interpreter/*.cpp'))