diff options
Diffstat (limited to 'src/core/arm')
-rw-r--r-- | src/core/arm/arm_interface.h | 2 | ||||
-rw-r--r-- | src/core/arm/disassembler/load_symbol_map.cpp | 2 | ||||
-rw-r--r-- | src/core/arm/disassembler/load_symbol_map.h | 2 | ||||
-rw-r--r-- | src/core/arm/dyncom/arm_dyncom.cpp | 2 | ||||
-rw-r--r-- | src/core/arm/dyncom/arm_dyncom.h | 2 | ||||
-rw-r--r-- | src/core/arm/dyncom/arm_dyncom_dec.h | 2 | ||||
-rw-r--r-- | src/core/arm/dyncom/arm_dyncom_interpreter.cpp | 322 | ||||
-rw-r--r-- | src/core/arm/dyncom/arm_dyncom_interpreter.h | 2 | ||||
-rw-r--r-- | src/core/arm/dyncom/arm_dyncom_run.cpp | 1 | ||||
-rw-r--r-- | src/core/arm/interpreter/arm_interpreter.cpp | 2 | ||||
-rw-r--r-- | src/core/arm/interpreter/arm_interpreter.h | 2 | ||||
-rw-r--r-- | src/core/arm/interpreter/armemu.cpp | 581 | ||||
-rw-r--r-- | src/core/arm/skyeye_common/vfp/vfpsingle.cpp | 7 |
13 files changed, 707 insertions, 222 deletions
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index 3ae528562..c59355339 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -1,5 +1,5 @@ // Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 +// Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once diff --git a/src/core/arm/disassembler/load_symbol_map.cpp b/src/core/arm/disassembler/load_symbol_map.cpp index 55278474b..13d26d170 100644 --- a/src/core/arm/disassembler/load_symbol_map.cpp +++ b/src/core/arm/disassembler/load_symbol_map.cpp @@ -1,5 +1,5 @@ // Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 +// Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include <string> diff --git a/src/core/arm/disassembler/load_symbol_map.h b/src/core/arm/disassembler/load_symbol_map.h index 837cca99b..d28c551c3 100644 --- a/src/core/arm/disassembler/load_symbol_map.h +++ b/src/core/arm/disassembler/load_symbol_map.h @@ -1,5 +1,5 @@ // Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 +// Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once diff --git a/src/core/arm/dyncom/arm_dyncom.cpp b/src/core/arm/dyncom/arm_dyncom.cpp index 6c8ea211e..6d4fb1b48 100644 --- a/src/core/arm/dyncom/arm_dyncom.cpp +++ b/src/core/arm/dyncom/arm_dyncom.cpp @@ -1,5 +1,5 @@ // Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 +// Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include "core/arm/skyeye_common/armcpu.h" diff --git a/src/core/arm/dyncom/arm_dyncom.h b/src/core/arm/dyncom/arm_dyncom.h index 51eea41ed..6fa2a0ba7 100644 --- a/src/core/arm/dyncom/arm_dyncom.h +++ b/src/core/arm/dyncom/arm_dyncom.h @@ -1,5 +1,5 @@ // Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 +// Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once diff --git a/src/core/arm/dyncom/arm_dyncom_dec.h b/src/core/arm/dyncom/arm_dyncom_dec.h index 19d94f369..70eb96e93 100644 --- a/src/core/arm/dyncom/arm_dyncom_dec.h +++ b/src/core/arm/dyncom/arm_dyncom_dec.h @@ -56,8 +56,6 @@ #define RN ((instr >> 16) & 0xF) /*xxxx xxxx xxxx xxxx xxxx xxxx xxxx 1111 */ #define RM (instr & 0xF) -#define BIT(n) ((instr >> (n)) & 1) -#define BITS(a,b) ((instr >> (a)) & ((1 << (1+(b)-(a)))-1)) /* CP15 registers */ #define OPCODE_1 BITS(21, 23) diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp index 68012bffd..460001b1a 100644 --- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp +++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp @@ -1019,6 +1019,15 @@ typedef struct _arm_inst { char component[0]; } arm_inst; +typedef struct generic_arm_inst { + u32 Ra; + u32 Rm; + u32 Rn; + u32 Rd; + u8 op1; + u8 op2; +} generic_arm_inst; + typedef struct _adc_inst { unsigned int I; unsigned int S; @@ -1266,6 +1275,13 @@ typedef struct _smla_inst { unsigned int Rn; } smla_inst; +typedef struct umaal_inst { + unsigned int Rn; + unsigned int Rm; + unsigned int RdHi; + unsigned int RdLo; +} umaal_inst; + typedef struct _umlal_inst { unsigned int S; unsigned int Rm; @@ -2374,15 +2390,41 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(pld)(unsigned int inst, int index) return inst_base; } ARM_INST_PTR INTERPRETER_TRANSLATE(qadd)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("QADD"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(qadd16)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("QADD16"); } ARM_INST_PTR INTERPRETER_TRANSLATE(qadd8)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("QADD8"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(qaddsubx)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("QADDSUBX"); } +ARM_INST_PTR INTERPRETER_TRANSLATE(qadd16)(unsigned int inst, int index) +{ + arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst)); + generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component; + + inst_base->cond = BITS(inst, 28, 31); + inst_base->idx = index; + inst_base->br = NON_BRANCH; + inst_base->load_r15 = 0; + + inst_cream->Rm = BITS(inst, 0, 3); + inst_cream->Rn = BITS(inst, 16, 19); + inst_cream->Rd = BITS(inst, 12, 15); + inst_cream->op1 = BITS(inst, 20, 21); + inst_cream->op2 = BITS(inst, 5, 7); + + return inst_base; +} +ARM_INST_PTR INTERPRETER_TRANSLATE(qaddsubx)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(qadd16)(inst, index); +} ARM_INST_PTR INTERPRETER_TRANSLATE(qdadd)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("QDADD"); } ARM_INST_PTR INTERPRETER_TRANSLATE(qdsub)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("QDSUB"); } ARM_INST_PTR INTERPRETER_TRANSLATE(qsub)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("QSUB"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(qsub16)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("QSUB16"); } ARM_INST_PTR INTERPRETER_TRANSLATE(qsub8)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("QSUB8"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(qsubaddx)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("QSUBADDX"); } +ARM_INST_PTR INTERPRETER_TRANSLATE(qsub16)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(qadd16)(inst, index); +} +ARM_INST_PTR INTERPRETER_TRANSLATE(qsubaddx)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(qadd16)(inst, index); +} ARM_INST_PTR INTERPRETER_TRANSLATE(rev)(unsigned int inst, int index) { arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(rev_inst)); @@ -2462,9 +2504,29 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(rsc)(unsigned int inst, int index) } return inst_base; } -ARM_INST_PTR INTERPRETER_TRANSLATE(sadd16)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SADD16"); } ARM_INST_PTR INTERPRETER_TRANSLATE(sadd8)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SADD8"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(saddsubx)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SADDSUBX"); } +ARM_INST_PTR INTERPRETER_TRANSLATE(sadd16)(unsigned int inst, int index) +{ + arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst)); + generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component; + + inst_base->cond = BITS(inst, 28, 31); + inst_base->idx = index; + inst_base->br = NON_BRANCH; + inst_base->load_r15 = 0; + + inst_cream->Rm = BITS(inst, 0, 3); + inst_cream->Rn = BITS(inst, 16, 19); + inst_cream->Rd = BITS(inst, 12, 15); + inst_cream->op1 = BITS(inst, 20, 21); + inst_cream->op2 = BITS(inst, 5, 7); + + return inst_base; +} +ARM_INST_PTR INTERPRETER_TRANSLATE(saddsubx)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(sadd16)(inst, index); +} ARM_INST_PTR INTERPRETER_TRANSLATE(sbc)(unsigned int inst, int index) { arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(sbc_inst)); @@ -2489,7 +2551,24 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(sbc)(unsigned int inst, int index) } return inst_base; } -ARM_INST_PTR INTERPRETER_TRANSLATE(sel)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SEL"); } +ARM_INST_PTR INTERPRETER_TRANSLATE(sel)(unsigned int inst, int index) +{ + arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(generic_arm_inst)); + generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component; + + inst_base->cond = BITS(inst, 28, 31); + inst_base->idx = index; + inst_base->br = NON_BRANCH; + inst_base->load_r15 = 0; + + inst_cream->Rm = BITS(inst, 0, 3); + inst_cream->Rn = BITS(inst, 16, 19); + inst_cream->Rd = BITS(inst, 12, 15); + inst_cream->op1 = BITS(inst, 20, 22); + inst_cream->op2 = BITS(inst, 5, 7); + + return inst_base; +} ARM_INST_PTR INTERPRETER_TRANSLATE(setend)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SETEND"); } ARM_INST_PTR INTERPRETER_TRANSLATE(shadd16)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SHADD16"); } ARM_INST_PTR INTERPRETER_TRANSLATE(shadd8)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SHADD8"); } @@ -2630,9 +2709,15 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(smusd)(unsigned int inst, int index) { UNI ARM_INST_PTR INTERPRETER_TRANSLATE(srs)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SRS"); } ARM_INST_PTR INTERPRETER_TRANSLATE(ssat)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SSAT"); } ARM_INST_PTR INTERPRETER_TRANSLATE(ssat16)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SSAT16"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(ssub16)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SSUB16"); } ARM_INST_PTR INTERPRETER_TRANSLATE(ssub8)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SSUB8"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(ssubaddx)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("SSUBADDX"); } +ARM_INST_PTR INTERPRETER_TRANSLATE(ssub16)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(sadd16)(inst, index); +} +ARM_INST_PTR INTERPRETER_TRANSLATE(ssubaddx)(unsigned int inst, int index) +{ + return INTERPRETER_TRANSLATE(sadd16)(inst, index); +} ARM_INST_PTR INTERPRETER_TRANSLATE(stc)(unsigned int inst, int index) { arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(stc_inst)); @@ -3010,7 +3095,26 @@ ARM_INST_PTR INTERPRETER_TRANSLATE(uhaddsubx)(unsigned int inst, int index) { UN ARM_INST_PTR INTERPRETER_TRANSLATE(uhsub16)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("UHSUB16"); } ARM_INST_PTR INTERPRETER_TRANSLATE(uhsub8)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("UHSUB8"); } ARM_INST_PTR INTERPRETER_TRANSLATE(uhsubaddx)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("UHSUBADDX"); } -ARM_INST_PTR INTERPRETER_TRANSLATE(umaal)(unsigned int inst, int index) { UNIMPLEMENTED_INSTRUCTION("UMAAL"); } +ARM_INST_PTR INTERPRETER_TRANSLATE(umaal)(unsigned int inst, int index) +{ + arm_inst* const inst_base = (arm_inst*)AllocBuffer(sizeof(arm_inst) + sizeof(umaal_inst)); + umaal_inst* const inst_cream = (umaal_inst*)inst_base->component; + + inst_base->cond = BITS(inst, 28, 31); + inst_base->idx = index; + inst_base->br = NON_BRANCH; + inst_base->load_r15 = 0; + + inst_cream->Rm = BITS(inst, 8, 11); + inst_cream->Rn = BITS(inst, 0, 3); + inst_cream->RdLo = BITS(inst, 12, 15); + inst_cream->RdHi = BITS(inst, 16, 19); + + if (CHECK_RM || CHECK_RN) + inst_base->load_r15 = 1; + + return inst_base; +} ARM_INST_PTR INTERPRETER_TRANSLATE(umlal)(unsigned int inst, int index) { arm_inst *inst_base = (arm_inst *)AllocBuffer(sizeof(arm_inst) + sizeof(umlal_inst)); @@ -3720,9 +3824,9 @@ unsigned InterpreterMainLoop(ARMul_State* state) #define INC_ICOUNTER cpu->icounter++; \ if(cpu->Reg[15] > 0xc0000000) \ cpu->kernel_icounter++; - //if (debug_function(core)) \ + /*if (debug_function(core)) \ if (core->check_int_flag) \ - goto END + goto END*/ //LOG_TRACE(Core_ARM11, "icounter is %llx pc is %x\n", cpu->icounter, cpu->Reg[15]) #else #define INC_ICOUNTER ; @@ -3943,18 +4047,18 @@ unsigned InterpreterMainLoop(ARMul_State* state) #define UPDATE_NFLAG(dst) (cpu->NFlag = BIT(dst, 31) ? 1 : 0) #define UPDATE_ZFLAG(dst) (cpu->ZFlag = dst ? 0 : 1) -// #define UPDATE_CFLAG(dst, lop, rop) (cpu->CFlag = ((ISNEG(lop) && ISPOS(rop)) || \ +/* #define UPDATE_CFLAG(dst, lop, rop) (cpu->CFlag = ((ISNEG(lop) && ISPOS(rop)) || \ (ISNEG(lop) && ISPOS(dst)) || \ - (ISPOS(rop) && ISPOS(dst)))) + (ISPOS(rop) && ISPOS(dst)))) */ #define UPDATE_CFLAG(dst, lop, rop) (cpu->CFlag = ((dst < lop) || (dst < rop))) #define UPDATE_CFLAG_CARRY_FROM_ADD(lop, rop, flag) (cpu->CFlag = (((uint64_t) lop + (uint64_t) rop + (uint64_t) flag) > 0xffffffff) ) #define UPDATE_CFLAG_NOT_BORROW_FROM_FLAG(lop, rop, flag) (cpu->CFlag = ((uint64_t) lop >= ((uint64_t) rop + (uint64_t) flag))) #define UPDATE_CFLAG_NOT_BORROW_FROM(lop, rop) (cpu->CFlag = (lop >= rop)) #define UPDATE_CFLAG_WITH_NOT(dst, lop, rop) (cpu->CFlag = !(dst < lop)) #define UPDATE_CFLAG_WITH_SC cpu->CFlag = cpu->shifter_carry_out -// #define UPDATE_CFLAG_WITH_NOT(dst, lop, rop) cpu->CFlag = !((ISNEG(lop) && ISPOS(rop)) || \ +/* #define UPDATE_CFLAG_WITH_NOT(dst, lop, rop) cpu->CFlag = !((ISNEG(lop) && ISPOS(rop)) || \ (ISNEG(lop) && ISPOS(dst)) || \ - (ISPOS(rop) && ISPOS(dst))) + (ISPOS(rop) && ISPOS(dst))) */ #define UPDATE_VFLAG(dst, lop, rop) (cpu->VFlag = (((lop < 0) && (rop < 0) && (dst >= 0)) || \ ((lop >= 0) && (rop) >= 0 && (dst < 0)))) #define UPDATE_VFLAG_WITH_NOT(dst, lop, rop) (cpu->VFlag = !(((lop < 0) && (rop < 0) && (dst >= 0)) || \ @@ -5483,15 +5587,69 @@ unsigned InterpreterMainLoop(ARMul_State* state) GOTO_NEXT_INST; } QADD_INST: - QADD16_INST: QADD8_INST: + + QADD16_INST: QADDSUBX_INST: + QSUB16_INST: + QSUBADDX_INST: + { + INC_ICOUNTER; + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { + generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component; + const s16 rm_lo = (RM & 0xFFFF); + const s16 rm_hi = ((RM >> 16) & 0xFFFF); + const s16 rn_lo = (RN & 0xFFFF); + const s16 rn_hi = ((RN >> 16) & 0xFFFF); + const u8 op2 = inst_cream->op2; + + s32 lo_result = 0; + s32 hi_result = 0; + + // QADD16 + if (op2 == 0x00) { + lo_result = (rn_lo + rm_lo); + hi_result = (rn_hi + rm_hi); + } + // QASX + else if (op2 == 0x01) { + lo_result = (rn_lo - rm_hi); + hi_result = (rn_hi + rm_lo); + } + // QSAX + else if (op2 == 0x02) { + lo_result = (rn_lo + rm_hi); + hi_result = (rn_hi - rm_lo); + } + // QSUB16 + else if (op2 == 0x03) { + lo_result = (rn_lo - rm_lo); + hi_result = (rn_hi - rm_hi); + } + + if (lo_result > 0x7FFF) + lo_result = 0x7FFF; + else if (lo_result < -0x8000) + lo_result = -0x8000; + + if (hi_result > 0x7FFF) + hi_result = 0x7FFF; + else if (hi_result < -0x8000) + hi_result = -0x8000; + + RD = (lo_result & 0xFFFF) | ((hi_result & 0xFFFF) << 16); + } + + cpu->Reg[15] += GET_INST_SIZE(cpu); + INC_PC(sizeof(generic_arm_inst)); + FETCH_INST; + GOTO_NEXT_INST; + } + QDADD_INST: QDSUB_INST: QSUB_INST: - QSUB16_INST: QSUB8_INST: - QSUBADDX_INST: REV_INST: { INC_ICOUNTER; @@ -5600,9 +5758,71 @@ unsigned InterpreterMainLoop(ARMul_State* state) FETCH_INST; GOTO_NEXT_INST; } - SADD16_INST: SADD8_INST: + + SADD16_INST: SADDSUBX_INST: + SSUBADDX_INST: + SSUB16_INST: + { + INC_ICOUNTER; + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { + generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component; + + const s16 rn_lo = (RN & 0xFFFF); + const s16 rn_hi = ((RN >> 16) & 0xFFFF); + const s16 rm_lo = (RM & 0xFFFF); + const s16 rm_hi = ((RM >> 16) & 0xFFFF); + + s32 lo_result = 0; + s32 hi_result = 0; + + // SADD16 + if (inst_cream->op2 == 0x00) { + lo_result = (rn_lo + rm_lo); + hi_result = (rn_hi + rm_hi); + } + // SASX + else if (inst_cream->op2 == 0x01) { + lo_result = (rn_lo - rm_hi); + hi_result = (rn_hi + rm_lo); + } + // SSAX + else if (inst_cream->op2 == 0x02) { + lo_result = (rn_lo + rm_hi); + hi_result = (rn_hi - rm_lo); + } + // SSUB16 + else if (inst_cream->op2 == 0x03) { + lo_result = (rn_lo - rm_lo); + hi_result = (rn_hi - rm_hi); + } + + RD = (lo_result & 0xFFFF) | ((hi_result & 0xFFFF) << 16); + + if (lo_result >= 0) { + cpu->Cpsr |= (1 << 16); + cpu->Cpsr |= (1 << 17); + } else { + cpu->Cpsr &= ~(1 << 16); + cpu->Cpsr &= ~(1 << 17); + } + + if (hi_result >= 0) { + cpu->Cpsr |= (1 << 18); + cpu->Cpsr |= (1 << 19); + } else { + cpu->Cpsr &= ~(1 << 18); + cpu->Cpsr &= ~(1 << 19); + } + } + + cpu->Reg[15] += GET_INST_SIZE(cpu); + INC_PC(sizeof(generic_arm_inst)); + FETCH_INST; + GOTO_NEXT_INST; + } + SBC_INST: { INC_ICOUNTER; @@ -5641,7 +5861,47 @@ unsigned InterpreterMainLoop(ARMul_State* state) FETCH_INST; GOTO_NEXT_INST; } + SEL_INST: + { + INC_ICOUNTER; + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { + generic_arm_inst* const inst_cream = (generic_arm_inst*)inst_base->component; + + const u32 to = RM; + const u32 from = RN; + const u32 cpsr = cpu->Cpsr; + + u32 result; + if (cpsr & (1 << 16)) + result = from & 0xff; + else + result = to & 0xff; + + if (cpsr & (1 << 17)) + result |= from & 0x0000ff00; + else + result |= to & 0x0000ff00; + + if (cpsr & (1 << 18)) + result |= from & 0x00ff0000; + else + result |= to & 0x00ff0000; + + if (cpsr & (1 << 19)) + result |= from & 0xff000000; + else + result |= to & 0xff000000; + + RD = result; + } + + cpu->Reg[15] += GET_INST_SIZE(cpu); + INC_PC(sizeof(generic_arm_inst)); + FETCH_INST; + GOTO_NEXT_INST; + } + SETEND_INST: SHADD16_INST: SHADD8_INST: @@ -5825,9 +6085,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) SRS_INST: SSAT_INST: SSAT16_INST: - SSUB16_INST: SSUB8_INST: - SSUBADDX_INST: STC_INST: { INC_ICOUNTER; @@ -6374,6 +6632,26 @@ unsigned InterpreterMainLoop(ARMul_State* state) UHSUB8_INST: UHSUBADDX_INST: UMAAL_INST: + { + INC_ICOUNTER; + if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) { + umaal_inst* const inst_cream = (umaal_inst*)inst_base->component; + + const u32 rm = RM; + const u32 rn = RN; + const u32 rd_lo = RDLO; + const u32 rd_hi = RDHI; + + const u64 result = (rm * rn) + rd_lo + rd_hi; + + RDLO = (result & 0xFFFFFFFF); + RDHI = ((result >> 32) & 0xFFFFFFFF); + } + cpu->Reg[15] += GET_INST_SIZE(cpu); + INC_PC(sizeof(umaal_inst)); + FETCH_INST; + GOTO_NEXT_INST; + } UMLAL_INST: { INC_ICOUNTER; diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.h b/src/core/arm/dyncom/arm_dyncom_interpreter.h index 3a2462f55..4791ea25f 100644 --- a/src/core/arm/dyncom/arm_dyncom_interpreter.h +++ b/src/core/arm/dyncom/arm_dyncom_interpreter.h @@ -1,5 +1,5 @@ // Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 +// Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once diff --git a/src/core/arm/dyncom/arm_dyncom_run.cpp b/src/core/arm/dyncom/arm_dyncom_run.cpp index a2026cbf3..b66b92cf5 100644 --- a/src/core/arm/dyncom/arm_dyncom_run.cpp +++ b/src/core/arm/dyncom/arm_dyncom_run.cpp @@ -29,7 +29,6 @@ void switch_mode(arm_core_t *core, uint32_t mode) { - uint32_t tmp1, tmp2; if (core->Mode == mode) { //Mode not changed. //printf("mode not changed\n"); diff --git a/src/core/arm/interpreter/arm_interpreter.cpp b/src/core/arm/interpreter/arm_interpreter.cpp index e2aa5ce92..be04fc1a1 100644 --- a/src/core/arm/interpreter/arm_interpreter.cpp +++ b/src/core/arm/interpreter/arm_interpreter.cpp @@ -1,5 +1,5 @@ // Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 +// Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include "core/arm/interpreter/arm_interpreter.h" diff --git a/src/core/arm/interpreter/arm_interpreter.h b/src/core/arm/interpreter/arm_interpreter.h index ed53d997c..b685215a0 100644 --- a/src/core/arm/interpreter/arm_interpreter.h +++ b/src/core/arm/interpreter/arm_interpreter.h @@ -1,5 +1,5 @@ // Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 +// Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once diff --git a/src/core/arm/interpreter/armemu.cpp b/src/core/arm/interpreter/armemu.cpp index ec40881f8..610e04f10 100644 --- a/src/core/arm/interpreter/armemu.cpp +++ b/src/core/arm/interpreter/armemu.cpp @@ -1356,7 +1356,13 @@ mainswitch: } break; - case 0x04: /* SUB reg */ + case 0x04: /* SUB reg */ + // Signifies UMAAL + if (state->is_v6 && BITS(4, 7) == 0x09) { + if (handle_v6_insn(state, instr)) + break; + } + #ifdef MODET if (BITS (4, 7) == 0xB) { /* STRH immediate offset, no write-back, down, post indexed. */ @@ -3103,12 +3109,18 @@ mainswitch: state->Reg[idest] = (state->Reg[rfis] & 0xFFFF) | ((state->Reg[rlast] << ishi) & 0xFFFF0000); break; } else if ((instr & 0x70) == 0x50) { //pkhtb - u8 idest = BITS(12, 15); - u8 rfis = BITS(16, 19); - u8 rlast = BITS(0, 3); - u8 ishi = BITS(7, 11); - if (ishi == 0)ishi = 0x20; - state->Reg[idest] = (((int)(state->Reg[rlast]) >> (int)(ishi))& 0xFFFF) | ((state->Reg[rfis]) & 0xFFFF0000); + const u8 rd_idx = BITS(12, 15); + const u8 rn_idx = BITS(16, 19); + const u8 rm_idx = BITS(0, 3); + const u8 imm5 = BITS(7, 11); + + ARMword val; + if (imm5 >= 32) + val = (state->Reg[rm_idx] >> 31); + else + val = (state->Reg[rm_idx] >> imm5); + + state->Reg[rd_idx] = (val & 0xFFFF) | ((state->Reg[rn_idx]) & 0xFFFF0000); break; } else if (BIT (4)) { #ifdef MODE32 @@ -5669,16 +5681,29 @@ L_stm_s_takeabort: /* Attempt to emulate an ARMv6 instruction. Returns non-zero upon success. */ - static int - handle_v6_insn (ARMul_State * state, ARMword instr) { - ARMword lhs, temp; - - switch (BITS (20, 27)) { + static int handle_v6_insn(ARMul_State* state, ARMword instr) { + switch (BITS(20, 27)) { case 0x03: printf ("Unhandled v6 insn: ldr\n"); break; - case 0x04: - printf ("Unhandled v6 insn: umaal\n"); + case 0x04: // UMAAL + { + const u8 rm_idx = BITS(8, 11); + const u8 rn_idx = BITS(0, 3); + const u8 rd_lo_idx = BITS(12, 15); + const u8 rd_hi_idx = BITS(16, 19); + + const u32 rm_val = state->Reg[rm_idx]; + const u32 rn_val = state->Reg[rn_idx]; + const u32 rd_lo_val = state->Reg[rd_lo_idx]; + const u32 rd_hi_val = state->Reg[rd_hi_idx]; + + const u64 result = (rn_val * rm_val) + rd_lo_val + rd_hi_val; + + state->Reg[rd_lo_idx] = (result & 0xFFFFFFFF); + state->Reg[rd_hi_idx] = ((result >> 32) & 0xFFFFFFFF); + return 1; + } break; case 0x06: printf ("Unhandled v6 insn: mls/str\n"); @@ -5691,7 +5716,7 @@ L_stm_s_takeabort: /* strex */ u32 l = LHSReg; u32 r = RHSReg; - lhs = LHS; + u32 lhs = LHS; bool enter = false; @@ -5716,7 +5741,7 @@ L_stm_s_takeabort: case 0x19: /* ldrex */ if (BITS(4, 7) == 0x9) { - lhs = LHS; + u32 lhs = LHS; state->currentexaddr = lhs; state->currentexval = ARMul_ReadWord(state, lhs); @@ -5735,7 +5760,7 @@ L_stm_s_takeabort: case 0x1c: if (BITS(4, 7) == 0x9) { /* strexb */ - lhs = LHS; + u32 lhs = LHS; bool enter = false; @@ -5765,11 +5790,11 @@ L_stm_s_takeabort: case 0x1d: if ((BITS(4, 7)) == 0x9) { /* ldrexb */ - temp = LHS; - LoadByte(state, instr, temp, LUNSIGNED); + u32 lhs = LHS; + LoadByte(state, instr, lhs, LUNSIGNED); - state->currentexaddr = temp; - state->currentexval = (u32)ARMul_ReadByte(state, temp); + state->currentexaddr = lhs; + state->currentexval = (u32)ARMul_ReadByte(state, lhs); //state->Reg[BITS(12, 15)] = ARMul_LoadByte(state, state->Reg[BITS(16, 19)]); //printf("ldrexb\n"); @@ -5799,83 +5824,188 @@ L_stm_s_takeabort: case 0x3f: printf ("Unhandled v6 insn: rbit\n"); break; - case 0x61: - if ((instr & 0xFF0) == 0xf70) { //ssub16 - u8 tar = BITS(12, 15); - u8 src1 = BITS(16, 19); - u8 src2 = BITS(0, 3); - s16 a1 = (state->Reg[src1] & 0xFFFF); - s16 a2 = ((state->Reg[src1] >> 0x10) & 0xFFFF); - s16 b1 = (state->Reg[src2] & 0xFFFF); - s16 b2 = ((state->Reg[src2] >> 0x10) & 0xFFFF); - state->Reg[tar] = ((a1 - a2) & 0xFFFF) | (((b1 - b2) & 0xFFFF) << 0x10); - return 1; - } else if ((instr & 0xFF0) == 0xf10) { //sadd16 - u8 tar = BITS(12, 15); - u8 src1 = BITS(16, 19); - u8 src2 = BITS(0, 3); - s16 a1 = (state->Reg[src1] & 0xFFFF); - s16 a2 = ((state->Reg[src1] >> 0x10) & 0xFFFF); - s16 b1 = (state->Reg[src2] & 0xFFFF); - s16 b2 = ((state->Reg[src2] >> 0x10) & 0xFFFF); - state->Reg[tar] = ((a1 + a2) & 0xFFFF) | (((b1 + b2) & 0xFFFF) << 0x10); - return 1; - } else if ((instr & 0xFF0) == 0xf50) { //ssax - u8 tar = BITS(12, 15); - u8 src1 = BITS(16, 19); - u8 src2 = BITS(0, 3); - s16 a1 = (state->Reg[src1] & 0xFFFF); - s16 a2 = ((state->Reg[src1] >> 0x10) & 0xFFFF); - s16 b1 = (state->Reg[src2] & 0xFFFF); - s16 b2 = ((state->Reg[src2] >> 0x10) & 0xFFFF); - state->Reg[tar] = ((a1 + b2) & 0xFFFF) | (((a2 - b1) & 0xFFFF) << 0x10); + case 0x61: // SADD16, SASX, SSAX, and SSUB16 + if ((instr & 0xFF0) == 0xf10 || (instr & 0xFF0) == 0xf30 || + (instr & 0xFF0) == 0xf50 || (instr & 0xFF0) == 0xf70) + { + const u8 rd_idx = BITS(12, 15); + const u8 rm_idx = BITS(0, 3); + const u8 rn_idx = BITS(16, 19); + const s16 rn_lo = (state->Reg[rn_idx] & 0xFFFF); + const s16 rn_hi = ((state->Reg[rn_idx] >> 16) & 0xFFFF); + const s16 rm_lo = (state->Reg[rm_idx] & 0xFFFF); + const s16 rm_hi = ((state->Reg[rm_idx] >> 16) & 0xFFFF); + + s32 lo_result; + s32 hi_result; + + // SADD16 + if ((instr & 0xFF0) == 0xf10) { + lo_result = (rn_lo + rm_lo); + hi_result = (rn_hi + rm_hi); + } + // SASX + else if ((instr & 0xFF0) == 0xf30) { + lo_result = (rn_lo - rm_hi); + hi_result = (rn_hi + rm_lo); + } + // SSAX + else if ((instr & 0xFF0) == 0xf50) { + lo_result = (rn_lo + rm_hi); + hi_result = (rn_hi - rm_lo); + } + // SSUB16 + else { + lo_result = (rn_lo - rm_lo); + hi_result = (rn_hi - rm_hi); + } + + state->Reg[rd_idx] = (lo_result & 0xFFFF) | ((hi_result & 0xFFFF) << 16); + + if (lo_result >= 0) { + state->Cpsr |= (1 << 16); + state->Cpsr |= (1 << 17); + } else { + state->Cpsr &= ~(1 << 16); + state->Cpsr &= ~(1 << 17); + } + + if (hi_result >= 0) { + state->Cpsr |= (1 << 18); + state->Cpsr |= (1 << 19); + } else { + state->Cpsr &= ~(1 << 18); + state->Cpsr &= ~(1 << 19); + } return 1; - } else if ((instr & 0xFF0) == 0xf30) { //sasx - u8 tar = BITS(12, 15); - u8 src1 = BITS(16, 19); - u8 src2 = BITS(0, 3); - s16 a1 = (state->Reg[src1] & 0xFFFF); - s16 a2 = ((state->Reg[src1] >> 0x10) & 0xFFFF); - s16 b1 = (state->Reg[src2] & 0xFFFF); - s16 b2 = ((state->Reg[src2] >> 0x10) & 0xFFFF); - state->Reg[tar] = ((a1 - b2) & 0xFFFF) | (((a2 + b1) & 0xFFFF) << 0x10); + } + // SADD8/SSUB8 + else if ((instr & 0xFF0) == 0xf90 || (instr & 0xFF0) == 0xff0) + { + const u8 rd_idx = BITS(12, 15); + const u8 rm_idx = BITS(0, 3); + const u8 rn_idx = BITS(16, 19); + const u32 rm_val = state->Reg[rm_idx]; + const u32 rn_val = state->Reg[rn_idx]; + + u8 lo_val1; + u8 lo_val2; + u8 hi_val1; + u8 hi_val2; + + // SADD8 + if ((instr & 0xFF0) == 0xf90) { + lo_val1 = (u8)((rn_val & 0xFF) + (rm_val & 0xFF)); + lo_val2 = (u8)(((rn_val >> 8) & 0xFF) + ((rm_val >> 8) & 0xFF)); + hi_val1 = (u8)(((rn_val >> 16) & 0xFF) + ((rm_val >> 16) & 0xFF)); + hi_val2 = (u8)(((rn_val >> 24) & 0xFF) + ((rm_val >> 24) & 0xFF)); + + if (lo_val1 & 0x80) + state->Cpsr |= (1 << 16); + else + state->Cpsr &= ~(1 << 16); + + if (lo_val2 & 0x80) + state->Cpsr |= (1 << 17); + else + state->Cpsr &= ~(1 << 17); + + if (hi_val1 & 0x80) + state->Cpsr |= (1 << 18); + else + state->Cpsr &= ~(1 << 18); + + if (hi_val2 & 0x80) + state->Cpsr |= (1 << 19); + else + state->Cpsr &= ~(1 << 19); + } + // SSUB8 + else { + lo_val1 = (u8)((rn_val & 0xFF) - (rm_val & 0xFF)); + lo_val2 = (u8)(((rn_val >> 8) & 0xFF) - ((rm_val >> 8) & 0xFF)); + hi_val1 = (u8)(((rn_val >> 16) & 0xFF) - ((rm_val >> 16) & 0xFF)); + hi_val2 = (u8)(((rn_val >> 24) & 0xFF) - ((rm_val >> 24) & 0xFF)); + + if (!(lo_val1 & 0x80)) + state->Cpsr |= (1 << 16); + else + state->Cpsr &= ~(1 << 16); + + if (!(lo_val2 & 0x80)) + state->Cpsr |= (1 << 17); + else + state->Cpsr &= ~(1 << 17); + + if (!(hi_val1 & 0x80)) + state->Cpsr |= (1 << 18); + else + state->Cpsr &= ~(1 << 18); + + if (!(hi_val2 & 0x80)) + state->Cpsr |= (1 << 19); + else + state->Cpsr &= ~(1 << 19); + } + + state->Reg[rd_idx] = (lo_val1 | lo_val2 << 8 | hi_val1 << 16 | hi_val2 << 24); return 1; - } else printf ("Unhandled v6 insn: sadd/ssub/ssax/sasx\n"); + } + else { + printf("Unhandled v6 insn: %08x", instr); + } break; - case 0x62: - if ((instr & 0xFF0) == 0xf70) { //QSUB16 - u8 tar = BITS(12, 15); - u8 src1 = BITS(16, 19); - u8 src2 = BITS(0, 3); - s16 a1 = (state->Reg[src1] & 0xFFFF); - s16 a2 = ((state->Reg[src1] >> 0x10) & 0xFFFF); - s16 b1 = (state->Reg[src2] & 0xFFFF); - s16 b2 = ((state->Reg[src2] >> 0x10) & 0xFFFF); - s32 res1 = (a1 - b1); - s32 res2 = (a2 - b2); - if (res1 > 0x7FFF) res1 = 0x7FFF; - if (res2 > 0x7FFF) res2 = 0x7FFF; - if (res1 < 0x7FFF) res1 = -0x8000; - if (res2 < 0x7FFF) res2 = -0x8000; - state->Reg[tar] = (res1 & 0xFFFF) | ((res2 & 0xFFFF) << 0x10); - return 1; - } else if ((instr & 0xFF0) == 0xf10) { //QADD16 - u8 tar = BITS(12, 15); - u8 src1 = BITS(16, 19); - u8 src2 = BITS(0, 3); - s16 a1 = (state->Reg[src1] & 0xFFFF); - s16 a2 = ((state->Reg[src1] >> 0x10) & 0xFFFF); - s16 b1 = (state->Reg[src2] & 0xFFFF); - s16 b2 = ((state->Reg[src2] >> 0x10) & 0xFFFF); - s32 res1 = (a1 + b1); - s32 res2 = (a2 + b2); - if (res1 > 0x7FFF) res1 = 0x7FFF; - if (res2 > 0x7FFF) res2 = 0x7FFF; - if (res1 < 0x7FFF) res1 = -0x8000; - if (res2 < 0x7FFF) res2 = -0x8000; - state->Reg[tar] = ((res1) & 0xFFFF) | (((res2) & 0xFFFF) << 0x10); + case 0x62: // QADD16, QASX, QSAX, and QSUB16 + if ((instr & 0xFF0) == 0xf10 || (instr & 0xFF0) == 0xf30 || + (instr & 0xFF0) == 0xf50 || (instr & 0xFF0) == 0xf70) + { + const u8 rd_idx = BITS(12, 15); + const u8 rn_idx = BITS(16, 19); + const u8 rm_idx = BITS(0, 3); + const s16 rm_lo = (state->Reg[rm_idx] & 0xFFFF); + const s16 rm_hi = ((state->Reg[rm_idx] >> 0x10) & 0xFFFF); + const s16 rn_lo = (state->Reg[rn_idx] & 0xFFFF); + const s16 rn_hi = ((state->Reg[rn_idx] >> 0x10) & 0xFFFF); + + s32 lo_result; + s32 hi_result; + + // QADD16 + if ((instr & 0xFF0) == 0xf10) { + lo_result = (rn_lo + rm_lo); + hi_result = (rn_hi + rm_hi); + } + // QASX + else if ((instr & 0xFF0) == 0xf30) { + lo_result = (rn_lo - rm_hi); + hi_result = (rn_hi + rm_lo); + } + // QSAX + else if ((instr & 0xFF0) == 0xf50) { + lo_result = (rn_lo + rm_hi); + hi_result = (rn_hi - rm_lo); + } + // QSUB16 + else { + lo_result = (rn_lo - rm_lo); + hi_result = (rn_hi - rm_hi); + } + + if (lo_result > 0x7FFF) + lo_result = 0x7FFF; + else if (lo_result < -0x8000) + lo_result = -0x8000; + + if (hi_result > 0x7FFF) + hi_result = 0x7FFF; + else if (hi_result < -0x8000) + hi_result = -0x8000; + + state->Reg[rd_idx] = (lo_result & 0xFFFF) | ((hi_result & 0xFFFF) << 16); return 1; - } else printf ("Unhandled v6 insn: qadd16/qsub16\n"); + } else { + printf("Unhandled v6 insn: %08x", BITS(20, 27)); + } break; case 0x63: printf ("Unhandled v6 insn: shadd/shsub\n"); @@ -5925,11 +6055,29 @@ L_stm_s_takeabort: b2 = ((u8)(from >> 8) + (u8)(to >> 8)); b3 = ((u8)(from >> 16) + (u8)(to >> 16)); b4 = ((u8)(from >> 24) + (u8)(to >> 24)); - if (b1 & 0xffffff00) state->Cpsr |= (1 << 16); - if (b2 & 0xffffff00) state->Cpsr |= (1 << 17); - if (b3 & 0xffffff00) state->Cpsr |= (1 << 18); - if (b4 & 0xffffff00) state->Cpsr |= (1 << 19); + + if (b1 & 0xffffff00) + state->Cpsr |= (1 << 16); + else + state->Cpsr &= ~(1 << 16); + + if (b2 & 0xffffff00) + state->Cpsr |= (1 << 17); + else + state->Cpsr &= ~(1 << 17); + + if (b3 & 0xffffff00) + state->Cpsr |= (1 << 18); + else + state->Cpsr &= ~(1 << 18); + + + if (b4 & 0xffffff00) + state->Cpsr |= (1 << 19); + else + state->Cpsr &= ~(1 << 19); } + state->Reg[rd] = (u32)(b1 | (b2 & 0xff) << 8 | (b3 & 0xff) << 16 | (b4 & 0xff) << 24); return 1; } @@ -6016,22 +6164,28 @@ L_stm_s_takeabort: //ichfly //SSAT16 { - u8 tar = BITS(12, 15); - u8 src = BITS(0, 3); - u8 val = BITS(16, 19) + 1; - s16 a1 = (state->Reg[src]); - s16 a2 = (state->Reg[src] >> 0x10); - s16 min = (s16)(0x8000 >> (16 - val)); - s16 max = 0x7FFF >> (16 - val); - if (min > a1) a1 = min; - if (max < a1) a1 = max; - if (min > a2) a2 = min; - if (max < a2) a2 = max; - u32 temp2 = ((u32)(a2)) << 0x10; - state->Reg[tar] = (a1 & 0xFFFF) | (temp2); + const u8 rd_idx = BITS(12, 15); + const u8 rn_idx = BITS(0, 3); + const u8 num_bits = BITS(16, 19) + 1; + const s16 min = -(0x8000 >> (16 - num_bits)); + const s16 max = (0x7FFF >> (16 - num_bits)); + s16 rn_lo = (state->Reg[rn_idx]); + s16 rn_hi = (state->Reg[rn_idx] >> 16); + + if (rn_lo > max) + rn_lo = max; + else if (rn_lo < min) + rn_lo = min; + + if (rn_hi > max) + rn_hi = max; + else if (rn_hi < min) + rn_hi = min; + + state->Reg[rd_idx] = (rn_lo & 0xFFFF) | ((rn_hi & 0xFFFF) << 16); + return 1; } - return 1; default: break; } @@ -6044,7 +6198,7 @@ L_stm_s_takeabort: break; } - Rm = ((state->Reg[BITS(0, 3)] >> ror) & 0xFF); + Rm = ((state->Reg[BITS(0, 3)] >> ror) & 0xFF) | (((state->Reg[BITS(0, 3)] << (32 - ror)) & 0xFF) & 0xFF); if (Rm & 0x80) Rm |= 0xffffff00; @@ -6053,11 +6207,12 @@ L_stm_s_takeabort: state->Reg[BITS(12, 15)] = Rm; else /* SXTAB */ - state->Reg[BITS(12, 15)] += Rm; + state->Reg[BITS(12, 15)] = state->Reg[BITS(16, 19)] + Rm; return 1; } - case 0x6b: { + case 0x6b: + { ARMword Rm; int ror = -1; @@ -6075,10 +6230,10 @@ L_stm_s_takeabort: ror = 24; break; - case 0xf3: + case 0xf3: // REV DEST = ((RHS & 0xFF) << 24) | ((RHS & 0xFF00)) << 8 | ((RHS & 0xFF0000) >> 8) | ((RHS & 0xFF000000) >> 24); return 1; - case 0xfb: + case 0xfb: // REV16 DEST = ((RHS & 0xFF) << 8) | ((RHS & 0xFF00)) >> 8 | ((RHS & 0xFF0000) << 8) | ((RHS & 0xFF000000) >> 8); return 1; default: @@ -6088,7 +6243,7 @@ L_stm_s_takeabort: if (ror == -1) break; - Rm = ((state->Reg[BITS(0, 3)] >> ror) & 0xFFFF); + Rm = ((state->Reg[BITS(0, 3)] >> ror) & 0xFFFF) | (((state->Reg[BITS(0, 3)] << (32 - ror)) & 0xFFFF) & 0xFFFF); if (Rm & 0x8000) Rm |= 0xffff0000; @@ -6101,18 +6256,33 @@ L_stm_s_takeabort: return 1; } - case 0x6c: - if ((instr & 0xf03f0) == 0xf0070) { //uxtb16 - u8 src1 = BITS(0, 3); - u8 tar = BITS(12, 15); - u32 base = state->Reg[src1]; - u32 shamt = BITS(9,10)* 8; - u32 in = ((base << (32 - shamt)) | (base >> shamt)); - state->Reg[tar] = in & 0x00FF00FF; - return 1; - } else - printf ("Unhandled v6 insn: uxtab16\n"); - break; + case 0x6c: // UXTB16 and UXTAB16 + { + const u8 rm_idx = BITS(0, 3); + const u8 rn_idx = BITS(16, 19); + const u8 rd_idx = BITS(12, 15); + const u32 rm_val = state->Reg[rm_idx]; + const u32 rn_val = state->Reg[rn_idx]; + const u32 rotation = BITS(10, 11) * 8; + const u32 rotated_rm = ((rm_val << (32 - rotation)) | (rm_val >> rotation)); + + // UXTB16 + if ((instr & 0xf03f0) == 0xf0070) { + state->Reg[rd_idx] = rotated_rm & 0x00FF00FF; + } + else { // UXTAB16 + const u8 lo_rotated = (rotated_rm & 0xFF); + const u16 lo_result = (rn_val & 0xFFFF) + (u16)lo_rotated; + + const u8 hi_rotated = (rotated_rm >> 16) & 0xFF; + const u16 hi_result = (rn_val >> 16) + (u16)hi_rotated; + + state->Reg[rd_idx] = ((hi_result << 16) | (lo_result & 0xFFFF)); + } + + return 1; + } + break; case 0x6e: { ARMword Rm; int ror = -1; @@ -6136,18 +6306,27 @@ L_stm_s_takeabort: //ichfly //USAT16 { - u8 tar = BITS(12, 15); - u8 src = BITS(0, 3); - u8 val = BITS(16, 19); - s16 a1 = (state->Reg[src]); - s16 a2 = (state->Reg[src] >> 0x10); - s16 max = 0xFFFF >> (16 - val); - if (max < a1) a1 = max; - if (max < a2) a2 = max; - u32 temp2 = ((u32)(a2)) << 0x10; - state->Reg[tar] = (a1 & 0xFFFF) | (temp2); + const u8 rd_idx = BITS(12, 15); + const u8 rn_idx = BITS(0, 3); + const u8 num_bits = BITS(16, 19); + const s16 max = 0xFFFF >> (16 - num_bits); + s16 rn_lo = (state->Reg[rn_idx]); + s16 rn_hi = (state->Reg[rn_idx] >> 16); + + if (max < rn_lo) + rn_lo = max; + else if (rn_lo < 0) + rn_lo = 0; + + if (max < rn_hi) + rn_hi = max; + else if (rn_hi < 0) + rn_hi = 0; + + state->Reg[rd_idx] = (rn_lo & 0xFFFF) | ((rn_hi << 16) & 0xFFFF); + return 1; } - return 1; + default: break; } @@ -6160,7 +6339,7 @@ L_stm_s_takeabort: break; } - Rm = ((state->Reg[BITS(0, 3)] >> ror) & 0xFF); + Rm = ((state->Reg[BITS(0, 3)] >> ror) & 0xFF) | (((state->Reg[BITS(0, 3)] << (32 - ror)) & 0xFF) & 0xFF); if (BITS(16, 19) == 0xf) /* UXTB */ @@ -6190,9 +6369,13 @@ L_stm_s_takeabort: ror = 24; break; - case 0xfb: - printf("Unhandled v6 insn: revsh\n"); - return 0; + case 0xfb: // REVSH + { + DEST = ((RHS & 0xFF) << 8) | ((RHS & 0xFF00) >> 8); + if (DEST & 0x8000) + DEST |= 0xffff0000; + return 1; + } default: break; } @@ -6200,13 +6383,13 @@ L_stm_s_takeabort: if (ror == -1) break; - Rm = ((state->Reg[BITS(0, 3)] >> ror) & 0xFFFF); + Rm = ((state->Reg[BITS(0, 3)] >> ror) & 0xFFFF) | (((state->Reg[BITS(0, 3)] << (32 - ror)) & 0xFFFF) & 0xFFFF); /* UXT */ /* state->Reg[BITS (12, 15)] = Rm; */ /* dyf add */ if (BITS(16, 19) == 0xf) { - state->Reg[BITS(12, 15)] = (Rm >> (8 * BITS(10, 11))) & 0x0000FFFF; + state->Reg[BITS(12, 15)] = Rm; } else { /* UXTAH */ @@ -6214,7 +6397,7 @@ L_stm_s_takeabort: // printf("rd is %x rn is %x rm is %x rotate is %x\n", state->Reg[BITS (12, 15)], state->Reg[BITS (16, 19)] // , Rm, BITS(10, 11)); // printf("icounter is %lld\n", state->NumInstrs); - state->Reg[BITS(12, 15)] = (state->Reg[BITS(16, 19)] >> (8 * (BITS(10, 11)))) + Rm; + state->Reg[BITS(12, 15)] = state->Reg[BITS(16, 19)] + Rm; // printf("rd is %x\n", state->Reg[BITS (12, 15)]); // exit(-1); } @@ -6222,45 +6405,46 @@ L_stm_s_takeabort: return 1; } case 0x70: - if ((instr & 0xf0d0) == 0xf010) { //smuad //ichfly - u8 tar = BITS(16, 19); - u8 src1 = BITS(0, 3); - u8 src2 = BITS(8, 11); - u8 swap = BIT(5); - s16 a1 = (state->Reg[src1] & 0xFFFF); - s16 a2 = ((state->Reg[src1] >> 0x10) & 0xFFFF); - s16 b1 = swap ? ((state->Reg[src2] >> 0x10) & 0xFFFF) : (state->Reg[src2] & 0xFFFF); - s16 b2 = swap ? (state->Reg[src2] & 0xFFFF) : ((state->Reg[src2] >> 0x10) & 0xFFFF); - state->Reg[tar] = a1*a2 + b1*b2; - return 1; - - } else if ((instr & 0xf0d0) == 0xf050) { //smusd - u8 tar = BITS(16, 19); - u8 src1 = BITS(0, 3); - u8 src2 = BITS(8, 11); - u8 swap = BIT(5); - s16 a1 = (state->Reg[src1] & 0xFFFF); - s16 a2 = ((state->Reg[src1] >> 0x10) & 0xFFFF); - s16 b1 = swap ? ((state->Reg[src2] >> 0x10) & 0xFFFF) : (state->Reg[src2] & 0xFFFF); - s16 b2 = swap ? (state->Reg[src2] & 0xFFFF) : ((state->Reg[src2] >> 0x10) & 0xFFFF); - state->Reg[tar] = a1*a2 - b1*b2; - return 1; - } else if ((instr & 0xd0) == 0x10) { //smlad - u8 tar = BITS(16, 19); - u8 src1 = BITS(0, 3); - u8 src2 = BITS(8, 11); - u8 src3 = BITS(12, 15); - u8 swap = BIT(5); - - u32 a3 = state->Reg[src3]; - - s16 a1 = (state->Reg[src1] & 0xFFFF); - s16 a2 = ((state->Reg[src1] >> 0x10) & 0xFFFF); - s16 b1 = swap ? ((state->Reg[src2] >> 0x10) & 0xFFFF) : (state->Reg[src2] & 0xFFFF); - s16 b2 = swap ? (state->Reg[src2] & 0xFFFF) : ((state->Reg[src2] >> 0x10) & 0xFFFF); - state->Reg[tar] = a1*a2 + b1*b2 + a3; + // ichfly + // SMUAD, SMUSD, SMLAD, and SMLSD + if ((instr & 0xf0d0) == 0xf010 || (instr & 0xf0d0) == 0xf050 || + (instr & 0xd0) == 0x10 || (instr & 0xd0) == 0x50) + { + const u8 rd_idx = BITS(16, 19); + const u8 rn_idx = BITS(0, 3); + const u8 rm_idx = BITS(8, 11); + const u8 ra_idx = BITS(12, 15); + const bool do_swap = (BIT(5) == 1); + + u32 rm_val = state->Reg[rm_idx]; + const u32 rn_val = state->Reg[rn_idx]; + + if (do_swap) + rm_val = (((rm_val & 0xFFFF) << 16) | (rm_val >> 16)); + + const s16 rm_lo = (rm_val & 0xFFFF); + const s16 rm_hi = ((rm_val >> 16) & 0xFFFF); + const s16 rn_lo = (rn_val & 0xFFFF); + const s16 rn_hi = ((rn_val >> 16) & 0xFFFF); + + // SMUAD + if ((instr & 0xf0d0) == 0xf010) { + state->Reg[rd_idx] = (rn_lo * rm_lo) + (rn_hi * rm_hi); + } + // SMUSD + else if ((instr & 0xf0d0) == 0xf050) { + state->Reg[rd_idx] = (rn_lo * rm_lo) - (rn_hi * rm_hi); + } + // SMLAD + else if ((instr & 0xd0) == 0x10) { + state->Reg[rd_idx] = (rn_lo * rm_lo) + (rn_hi * rm_hi) + (s32)state->Reg[ra_idx]; + } + // SMLSD + else { + state->Reg[rd_idx] = ((rn_lo * rm_lo) - (rn_hi * rm_hi)) + (s32)state->Reg[ra_idx]; + } return 1; - } else printf ("Unhandled v6 insn: smuad/smusd/smlad/smlsd\n"); + } break; case 0x74: printf ("Unhandled v6 insn: smlald/smlsld\n"); @@ -6269,7 +6453,30 @@ L_stm_s_takeabort: printf ("Unhandled v6 insn: smmla/smmls/smmul\n"); break; case 0x78: - printf ("Unhandled v6 insn: usad/usada8\n"); + if (BITS(20, 24) == 0x18) + { + const u8 rm_idx = BITS(8, 11); + const u8 rn_idx = BITS(0, 3); + const u8 rd_idx = BITS(16, 19); + + const u32 rm_val = state->Reg[rm_idx]; + const u32 rn_val = state->Reg[rn_idx]; + + const u8 diff1 = (u8)std::labs((rn_val & 0xFF) - (rm_val & 0xFF)); + const u8 diff2 = (u8)std::labs(((rn_val >> 8) & 0xFF) - ((rm_val >> 8) & 0xFF)); + const u8 diff3 = (u8)std::labs(((rn_val >> 16) & 0xFF) - ((rm_val >> 16) & 0xFF)); + const u8 diff4 = (u8)std::labs(((rn_val >> 24) & 0xFF) - ((rm_val >> 24) & 0xFF)); + + u32 finalDif = (diff1 + diff2 + diff3 + diff4); + + // Op is USADA8 if true. + const u8 ra_idx = BITS(12, 15); + if (ra_idx != 15) + finalDif += state->Reg[ra_idx]; + + state->Reg[rd_idx] = finalDif; + return 1; + } break; case 0x7a: printf ("Unhandled v6 insn: usbfx\n"); diff --git a/src/core/arm/skyeye_common/vfp/vfpsingle.cpp b/src/core/arm/skyeye_common/vfp/vfpsingle.cpp index 871900497..6c33d8b78 100644 --- a/src/core/arm/skyeye_common/vfp/vfpsingle.cpp +++ b/src/core/arm/skyeye_common/vfp/vfpsingle.cpp @@ -614,7 +614,7 @@ static u32 vfp_single_ftoui(ARMul_State* state, int sd, int unused, s32 m, u32 f exceptions |= FPSCR_IDC; if (tm & VFP_NAN) - vsm.sign = 0; + vsm.sign = 1; if (vsm.exponent >= 127 + 32) { d = vsm.sign ? 0 : 0xffffffff; @@ -1148,7 +1148,10 @@ static u32 vfp_single_fsub(ARMul_State* state, int sd, int sn, s32 m, u32 fpscr) /* * Subtraction is addition with one sign inverted. */ - return vfp_single_fadd(state, sd, sn, vfp_single_packed_negate(m), fpscr); + if (m != 0x7FC00000) // Only negate if m isn't NaN. + m = vfp_single_packed_negate(m); + + return vfp_single_fadd(state, sd, sn, m, fpscr); } /* |