mirror of
https://github.com/n64dev/cen64.git
synced 2024-05-22 22:08:10 -04:00
8b89df2fdc
Replaced all references to simulation with emulation Updated copyright year Updated .gitignore to reduce chances of random files being uploaded to the repo Added .gitattributes to normalize all text files, and to ignore binary files (which includes the logo and the NEC PDF)
140 lines
2.5 KiB
ArmAsm
140 lines
2.5 KiB
ArmAsm
//
|
|
// arch/x86_64/rsp/gcc/vch.s
|
|
//
|
|
// CEN64: Cycle-Accurate Nintendo 64 Emulator.
|
|
// Copyright (C) 2015, Tyler J. Stachecki.
|
|
//
|
|
// This file is subject to the terms and conditions defined in
|
|
// 'LICENSE', which is part of this source code package.
|
|
//
|
|
|
|
.include "rsp/gcc/defs.h"
|
|
|
|
.text
|
|
|
|
.ifdef __MINGW__
|
|
.globl RSP_VCH
|
|
.def RSP_VCH; .scl 2; .type 32; .endef
|
|
.seh_proc RSP_VCH
|
|
.ifndef __VECTORCALL__
|
|
RSP_VCH:
|
|
movdqa (%r8), %xmm0
|
|
movdqa (%r9), %xmm1
|
|
pxor %xmm2, %xmm2
|
|
.endif
|
|
.else
|
|
.global RSP_VCH
|
|
.type RSP_VCH, @function
|
|
RSP_VCH:
|
|
.endif
|
|
|
|
.ifdef __AVX__
|
|
vpxor %xmm0, %xmm1, vco_lo
|
|
psraw $0xF, vco_lo
|
|
vpxor %xmm0, vco_lo, %xmm3
|
|
psubw vco_lo, %xmm3
|
|
vpsubw %xmm3, %xmm1, %xmm4
|
|
psraw $0xF, %xmm0
|
|
vpcmpeqw %xmm4, %xmm2, acc_lo
|
|
|
|
# vce
|
|
vpcmpeqw vco_lo, %xmm4, vce
|
|
pand vco_lo, vce
|
|
|
|
# !eq
|
|
vpor acc_lo, vce, vco_hi
|
|
pcmpeqw %xmm2, vco_hi
|
|
|
|
# le/ge
|
|
pcmpgtw %xmm2, %xmm4
|
|
por %xmm4, acc_lo
|
|
vpblendvb vco_lo, %xmm0, acc_lo, vcc_hi
|
|
pcmpeqw %xmm2, %xmm4
|
|
vpblendvb vco_lo, %xmm4, %xmm0, vcc_lo
|
|
|
|
# vd
|
|
vpblendvb vco_lo, vcc_lo, vcc_hi, %xmm2
|
|
vpblendvb %xmm2, %xmm3, %xmm1, %xmm0
|
|
movdqa %xmm0, acc_lo
|
|
retq
|
|
|
|
.elseif __SSE4_1__ == 1
|
|
movdqa %xmm1, acc_lo
|
|
movdqa %xmm0, vcc_lo
|
|
movdqa %xmm0, %xmm3
|
|
pxor %xmm1, %xmm0
|
|
psraw $0xF, %xmm0
|
|
pxor %xmm0, %xmm3
|
|
psubw %xmm0, %xmm3
|
|
psubw %xmm3, %xmm1
|
|
pxor %xmm4, %xmm4
|
|
psraw $0xF, vcc_lo
|
|
pcmpeqw %xmm1, %xmm4
|
|
|
|
# vce
|
|
movdqa %xmm0, vce
|
|
pcmpeqw %xmm1, vce
|
|
pand %xmm1, vce
|
|
|
|
# !eq
|
|
movdqa vce, vco_hi
|
|
por %xmm4, vco_hi
|
|
pcmpeqw %xmm2, vco_hi
|
|
|
|
# le/ge
|
|
pcmpgtw %xmm2, %xmm1
|
|
por %xmm1, %xmm4
|
|
movdqa %xmm4, vcc_hi
|
|
pblendvb %xmm0, vcc_lo, vcc_hi
|
|
pcmpeqw %xmm2, %xmm1
|
|
pblendvb %xmm0, %xmm1, vcc_lo
|
|
|
|
# vd
|
|
movdqa %xmm0, vco_lo
|
|
pblendvb %xmm0, vcc_lo, %xmm4
|
|
movdqa %xmm4, %xmm0
|
|
pblendvb %xmm0, %xmm3, acc_lo
|
|
movdqa acc_lo, %xmm0
|
|
retq
|
|
|
|
.else
|
|
movdqa %xmm0, vco_lo
|
|
movdqa %xmm1, vce
|
|
pxor %xmm1, vco_lo
|
|
movdqa %xmm1, vco_hi
|
|
psraw $0xF, vco_lo
|
|
movdqa %xmm0, acc_lo
|
|
movdqa %xmm1, vcc_lo
|
|
pxor vco_lo, acc_lo
|
|
movdqa %xmm1, vcc_hi
|
|
pcmpeqw acc_lo, vce
|
|
pand vco_lo, vcc_lo
|
|
por vco_lo, vcc_hi
|
|
psubw vco_lo, acc_lo
|
|
pand vco_lo, vce
|
|
paddw %xmm0, vcc_lo
|
|
pcmpeqw acc_lo, vco_hi
|
|
pminsw %xmm0, vcc_hi
|
|
paddsw vco_lo, vcc_lo
|
|
por vce, vco_hi
|
|
psraw $0xF, vcc_lo
|
|
pcmpeqw %xmm0, vcc_hi
|
|
movdqa vcc_lo, %xmm3
|
|
pcmpeqw %xmm2, vco_hi
|
|
psubw vcc_hi, %xmm3
|
|
psubw %xmm1, acc_lo
|
|
pand vco_lo, %xmm3
|
|
paddw vcc_hi, %xmm3
|
|
pand %xmm3, acc_lo
|
|
paddw %xmm1, acc_lo
|
|
movdqa acc_lo, %xmm0
|
|
retq
|
|
.endif
|
|
|
|
.ifdef __MINGW__
|
|
.seh_endproc
|
|
.else
|
|
.size RSP_VCH,.-RSP_VCH
|
|
.endif
|
|
|