From a1137cdf678f449d13969235fa611bc871a8589c Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Sun, 16 Sep 2007 20:29:59 +0000 Subject: [PATCH] Add SSE5 (new AMD SSE) instructions support. svn path=/trunk/yasm/; revision=1953 --- modules/arch/x86/gen_x86_insn.py | 427 +++- modules/arch/x86/tests/Makefile.inc | 6 + modules/arch/x86/tests/sse5-all.asm | 509 +++++ modules/arch/x86/tests/sse5-all.hex | 2727 +++++++++++++++++++++++ modules/arch/x86/tests/sse5-basic.asm | 12 + modules/arch/x86/tests/sse5-basic.hex | 59 + modules/arch/x86/tests/sse5-err.asm | 116 + modules/arch/x86/tests/sse5-err.errwarn | 84 + modules/arch/x86/x86arch.h | 14 +- modules/arch/x86/x86bc.c | 40 +- modules/arch/x86/x86cpu.gperf | 6 + modules/arch/x86/x86expr.c | 7 +- modules/arch/x86/x86id.c | 64 +- 13 files changed, 3987 insertions(+), 84 deletions(-) create mode 100644 modules/arch/x86/tests/sse5-all.asm create mode 100644 modules/arch/x86/tests/sse5-all.hex create mode 100644 modules/arch/x86/tests/sse5-basic.asm create mode 100644 modules/arch/x86/tests/sse5-basic.hex create mode 100644 modules/arch/x86/tests/sse5-err.asm create mode 100644 modules/arch/x86/tests/sse5-err.errwarn diff --git a/modules/arch/x86/gen_x86_insn.py b/modules/arch/x86/gen_x86_insn.py index 26fbf650..e891634e 100755 --- a/modules/arch/x86/gen_x86_insn.py +++ b/modules/arch/x86/gen_x86_insn.py @@ -32,7 +32,7 @@ ordered_cpus = [ "P4", "IA64", "Hammer"] ordered_cpu_features = [ "FPU", "Cyrix", "AMD", "MMX", "3DNow", "SMM", "SSE", "SSE2", - "SSE3", "SVM", "PadLock", "SSSE3", "SSE41", "SSE42", "SSE4a"] + "SSE3", "SVM", "PadLock", "SSSE3", "SSE41", "SSE42", "SSE4a", "SSE5"] unordered_cpu_features = ["Priv", "Prot", "Undoc", "Obs"] def cpu_lcd(cpu1, cpu2): @@ -182,6 +182,9 @@ class GroupForm(object): else: raise KeyError("missing opcode") + # DREX opcode0 field + self.drex_oc0 = kwargs.pop("drex_oc0", 0) and 0x08 or 0 + # Build operands string (C array initializer) self.operands = kwargs.pop("operands") for op in self.operands: @@ -201,6 +204,8 @@ class GroupForm(object): self.cpu.add("586") if op.dest == "EA64": self.cpu.add("64") + if op.dest == "DREX": + self.drex_oc0 |= 0x80 # Modifiers self.modifiers = kwargs.pop("modifiers", []) @@ -280,6 +285,8 @@ class GroupForm(object): "%d" % (self.opersize or 0), "%d" % (self.def_opersize_64 or 0), self.special_prefix or "0", + self.drex_oc0 and + ("0x%02X" % self.drex_oc0) or "0", "%d" % self.opcode_len, opcodes_str, "%d" % (self.spare or 0), @@ -4650,6 +4657,23 @@ add_group("sse4imm", operands=[Operand(type="SIMDReg", size=128, dest="Spare"), Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"), Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) +for sz in [32, 64]: + add_group("sse4m%dimm" % sz, + cpu=["SSE41"], + modifiers=["Op2Add"], + prefix=0x66, + opcode=[0x0F, 0x3A, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDReg", size=128, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) + add_group("sse4m%dimm" % sz, + cpu=["SSE41"], + modifiers=["Op2Add"], + prefix=0x66, + opcode=[0x0F, 0x3A, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="Mem", size=sz, relaxed=True, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) add_insn("blendpd", "sse4imm", modifiers=[0x0D]) add_insn("blendps", "sse4imm", modifiers=[0x0C]) @@ -4659,8 +4683,8 @@ add_insn("mpsadbw", "sse4imm", modifiers=[0x42]) add_insn("pblendw", "sse4imm", modifiers=[0x0E]) add_insn("roundpd", "sse4imm", modifiers=[0x09]) add_insn("roundps", "sse4imm", modifiers=[0x08]) -add_insn("roundsd", "sse4imm", modifiers=[0x0B]) -add_insn("roundss", "sse4imm", modifiers=[0x0A]) +add_insn("roundsd", "sse4m64imm", modifiers=[0x0B]) +add_insn("roundss", "sse4m32imm", modifiers=[0x0A]) add_group("sse4xmm0", cpu=["SSE41"], @@ -4843,20 +4867,21 @@ add_group("pinsrq", add_insn("pinsrq", "pinsrq") -add_group("sse4m64", - cpu=["SSE41"], - modifiers=["Op2Add"], - prefix=0x66, - opcode=[0x0F, 0x38, 0x00], - operands=[Operand(type="SIMDReg", size=128, dest="Spare"), - Operand(type="Mem", size=64, relaxed=True, dest="EA")]) -add_group("sse4m64", - cpu=["SSE41"], - modifiers=["Op2Add"], - prefix=0x66, - opcode=[0x0F, 0x38, 0x00], - operands=[Operand(type="SIMDReg", size=128, dest="Spare"), - Operand(type="SIMDReg", size=128, dest="EA")]) +for sz in [16, 32, 64]: + add_group("sse4m%d" % sz, + cpu=["SSE41"], + modifiers=["Op2Add"], + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="Mem", size=sz, relaxed=True, dest="EA")]) + add_group("sse4m%d" % sz, + cpu=["SSE41"], + modifiers=["Op2Add"], + prefix=0x66, + opcode=[0x0F, 0x38, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDReg", size=128, dest="EA")]) add_insn("pmovsxbw", "sse4m64", modifiers=[0x20]) add_insn("pmovsxwd", "sse4m64", modifiers=[0x23]) @@ -4865,41 +4890,11 @@ add_insn("pmovzxbw", "sse4m64", modifiers=[0x30]) add_insn("pmovzxwd", "sse4m64", modifiers=[0x33]) add_insn("pmovzxdq", "sse4m64", modifiers=[0x35]) -add_group("sse4m32", - cpu=["SSE41"], - modifiers=["Op2Add"], - prefix=0x66, - opcode=[0x0F, 0x38, 0x00], - operands=[Operand(type="SIMDReg", size=128, dest="Spare"), - Operand(type="Mem", size=32, relaxed=True, dest="EA")]) -add_group("sse4m32", - cpu=["SSE41"], - modifiers=["Op2Add"], - prefix=0x66, - opcode=[0x0F, 0x38, 0x00], - operands=[Operand(type="SIMDReg", size=128, dest="Spare"), - Operand(type="SIMDReg", size=128, dest="EA")]) - add_insn("pmovsxbd", "sse4m32", modifiers=[0x21]) add_insn("pmovsxwq", "sse4m32", modifiers=[0x24]) add_insn("pmovzxbd", "sse4m32", modifiers=[0x31]) add_insn("pmovzxwq", "sse4m32", modifiers=[0x34]) -add_group("sse4m16", - cpu=["SSE41"], - modifiers=["Op2Add"], - prefix=0x66, - opcode=[0x0F, 0x38, 0x00], - operands=[Operand(type="SIMDReg", size=128, dest="Spare"), - Operand(type="Mem", size=16, relaxed=True, dest="EA")]) -add_group("sse4m16", - cpu=["SSE41"], - modifiers=["Op2Add"], - prefix=0x66, - opcode=[0x0F, 0x38, 0x00], - operands=[Operand(type="SIMDReg", size=128, dest="Spare"), - Operand(type="SIMDReg", size=128, dest="EA")]) - add_insn("pmovsxbq", "sse4m16", modifiers=[0x22]) add_insn("pmovzxbq", "sse4m16", modifiers=[0x32]) @@ -4970,6 +4965,344 @@ add_group("movntss", add_insn("movntss", "movntss") +##################################################################### +# AMD SSE5 instructions +##################################################################### + +add_group("sse5com", + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x25, 0x00], + drex_oc0=0, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) + +for sz in [32, 64]: + add_group("sse5com%d" % sz, + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x25, 0x00], + drex_oc0=0, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDReg", size=128, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) + add_group("sse5com%d" % sz, + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x25, 0x00], + drex_oc0=0, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="Mem", size=sz, relaxed=True, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) + +add_insn("comps", "sse5com", modifiers=[0x2C]) +add_insn("compd", "sse5com", modifiers=[0x2D]) +add_insn("comss", "sse5com32", modifiers=[0x2E]) +add_insn("comsd", "sse5com64", modifiers=[0x2F]) + +add_insn("pcomb", "sse5com", modifiers=[0x4C]) +add_insn("pcomw", "sse5com", modifiers=[0x4D]) +add_insn("pcomd", "sse5com", modifiers=[0x4E]) +add_insn("pcomq", "sse5com", modifiers=[0x4F]) + +add_insn("pcomub", "sse5com", modifiers=[0x6C]) +add_insn("pcomuw", "sse5com", modifiers=[0x6D]) +add_insn("pcomud", "sse5com", modifiers=[0x6E]) +add_insn("pcomuq", "sse5com", modifiers=[0x6F]) + +add_group("cvtph2ps", + cpu=["SSE5"], + opcode=[0x0F, 0x7A, 0x30], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDReg", size=128, dest="EA")]) +add_group("cvtph2ps", + cpu=["SSE5"], + opcode=[0x0F, 0x7A, 0x30], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="Mem", size=64, relaxed=True, dest="EA")]) + +add_insn("cvtph2ps", "cvtph2ps") + +add_group("cvtps2ph", + cpu=["SSE5"], + opcode=[0x0F, 0x7A, 0x31], + operands=[Operand(type="SIMDReg", size=128, dest="EA"), + Operand(type="SIMDReg", size=128, dest="Spare")]) +add_group("cvtps2ph", + cpu=["SSE5"], + opcode=[0x0F, 0x7A, 0x31], + operands=[Operand(type="Mem", size=64, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=128, dest="Spare")]) + +add_insn("cvtps2ph", "cvtps2ph") + +add_group("sse5arith", + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x24, 0x00], + drex_oc0=0, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDRegMatch0", size=128, dest=None), + Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")]) +add_group("sse5arith", + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x24, 0x00], + drex_oc0=1, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDRegMatch0", size=128, dest=None), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=128, dest="Spare")]) +add_group("sse5arith", + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x24, 0x04], + drex_oc0=0, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"), + Operand(type="SIMDRegMatch0", size=128, dest=None)]) +add_group("sse5arith", + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x24, 0x04], + drex_oc0=1, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDRegMatch0", size=128, dest=None)]) + +for sz in [32, 64]: + add_group("sse5arith%d" % sz, + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x24, 0x00], + drex_oc0=0, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDRegMatch0", size=128, dest=None), + Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDReg", size=128, dest="EA")]) + add_group("sse5arith%d" % sz, + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x24, 0x00], + drex_oc0=0, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDRegMatch0", size=128, dest=None), + Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="Mem", size=sz, relaxed=True, dest="EA")]) + add_group("sse5arith%d" % sz, + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x24, 0x00], + drex_oc0=1, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDRegMatch0", size=128, dest=None), + Operand(type="SIMDReg", size=128, dest="EA"), + Operand(type="SIMDReg", size=128, dest="Spare")]) + add_group("sse5arith%d" % sz, + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x24, 0x00], + drex_oc0=1, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDRegMatch0", size=128, dest=None), + Operand(type="Mem", size=sz, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=128, dest="Spare")]) + add_group("sse5arith%d" % sz, + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x24, 0x04], + drex_oc0=0, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDReg", size=128, dest="EA"), + Operand(type="SIMDRegMatch0", size=128, dest=None)]) + add_group("sse5arith%d" % sz, + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x24, 0x04], + drex_oc0=0, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="Mem", size=sz, relaxed=True, dest="EA"), + Operand(type="SIMDRegMatch0", size=128, dest=None)]) + add_group("sse5arith%d" % sz, + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x24, 0x04], + drex_oc0=1, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDReg", size=128, dest="EA"), + Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDRegMatch0", size=128, dest=None)]) + add_group("sse5arith%d" % sz, + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x24, 0x04], + drex_oc0=1, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="Mem", size=sz, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDRegMatch0", size=128, dest=None)]) + +add_insn("fmaddps", "sse5arith", modifiers=[0x00]) +add_insn("fmaddpd", "sse5arith", modifiers=[0x01]) +add_insn("fmaddss", "sse5arith32", modifiers=[0x02]) +add_insn("fmaddsd", "sse5arith64", modifiers=[0x03]) + +add_insn("fmsubps", "sse5arith", modifiers=[0x08]) +add_insn("fmsubpd", "sse5arith", modifiers=[0x09]) +add_insn("fmsubss", "sse5arith32", modifiers=[0x0A]) +add_insn("fmsubsd", "sse5arith64", modifiers=[0x0B]) + +add_insn("fnmaddps", "sse5arith", modifiers=[0x10]) +add_insn("fnmaddpd", "sse5arith", modifiers=[0x11]) +add_insn("fnmaddss", "sse5arith32", modifiers=[0x12]) +add_insn("fnmaddsd", "sse5arith64", modifiers=[0x13]) + +add_insn("fnmsubps", "sse5arith", modifiers=[0x18]) +add_insn("fnmsubpd", "sse5arith", modifiers=[0x19]) +add_insn("fnmsubss", "sse5arith32", modifiers=[0x1A]) +add_insn("fnmsubsd", "sse5arith64", modifiers=[0x1B]) + +add_insn("pcmov", "sse5arith", modifiers=[0x22]) + +add_insn("permps", "sse5arith", modifiers=[0x20]) +add_insn("permpd", "sse5arith", modifiers=[0x21]) +add_insn("pperm", "sse5arith", modifiers=[0x23]) + +add_group("sse5two", + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x7A, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")]) +for sz in [32, 64]: + add_group("sse5two%d" % sz, + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x7A, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDReg", size=128, dest="EA")]) + add_group("sse5two%d" % sz, + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x7A, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="Mem", size=sz, relaxed=True, dest="EA")]) + +add_insn("frczps", "sse5two", modifiers=[0x10]) +add_insn("frczpd", "sse5two", modifiers=[0x11]) +add_insn("frczss", "sse5two32", modifiers=[0x12]) +add_insn("frczsd", "sse5two64", modifiers=[0x13]) + +add_insn("phaddbw", "sse5two", modifiers=[0x41]) +add_insn("phaddbd", "sse5two", modifiers=[0x42]) +add_insn("phaddbq", "sse5two", modifiers=[0x43]) +add_insn("phaddwd", "sse5two", modifiers=[0x46]) +add_insn("phaddwq", "sse5two", modifiers=[0x47]) +add_insn("phadddq", "sse5two", modifiers=[0x4B]) + +add_insn("phaddubw", "sse5two", modifiers=[0x51]) +add_insn("phaddubd", "sse5two", modifiers=[0x52]) +add_insn("phaddubq", "sse5two", modifiers=[0x53]) +add_insn("phadduwd", "sse5two", modifiers=[0x56]) +add_insn("phadduwq", "sse5two", modifiers=[0x57]) +add_insn("phaddudq", "sse5two", modifiers=[0x5B]) + +add_insn("phsubbw", "sse5two", modifiers=[0x61]) +add_insn("phsubwd", "sse5two", modifiers=[0x62]) +add_insn("phsubdq", "sse5two", modifiers=[0x63]) + +add_group("sse5pmacs", + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x24, 0x00], + drex_oc0=0, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"), + Operand(type="SIMDRegMatch0", size=128, dest=None)]) + +add_insn("pmacsww", "sse5pmacs", modifiers=[0x95]) +add_insn("pmacswd", "sse5pmacs", modifiers=[0x96]) +add_insn("pmacsdql", "sse5pmacs", modifiers=[0x97]) +add_insn("pmacsdd", "sse5pmacs", modifiers=[0x9E]) +add_insn("pmacsdqh", "sse5pmacs", modifiers=[0x9F]) + +add_insn("pmacssww", "sse5pmacs", modifiers=[0x85]) +add_insn("pmacsswd", "sse5pmacs", modifiers=[0x86]) +add_insn("pmacssdql", "sse5pmacs", modifiers=[0x87]) +add_insn("pmacssdd", "sse5pmacs", modifiers=[0x8E]) +add_insn("pmacssdqh", "sse5pmacs", modifiers=[0x8F]) + +add_insn("pmadcsswd", "sse5pmacs", modifiers=[0xA6]) +add_insn("pmadcswd", "sse5pmacs", modifiers=[0xB6]) + +add_group("sse5prot", + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x24, 0x40], + drex_oc0=0, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")]) +add_group("sse5prot", + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x24, 0x40], + drex_oc0=1, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=128, dest="Spare")]) +add_group("sse5prot", + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x7B, 0x40], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) + +add_insn("protb", "sse5prot", modifiers=[0x00]) +add_insn("protw", "sse5prot", modifiers=[0x01]) +add_insn("protd", "sse5prot", modifiers=[0x02]) +add_insn("protq", "sse5prot", modifiers=[0x03]) + +add_group("sse5psh", + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x24, 0x44], + drex_oc0=0, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")]) +add_group("sse5psh", + cpu=["SSE5"], + modifiers=["Op2Add"], + opcode=[0x0F, 0x24, 0x44], + drex_oc0=1, + operands=[Operand(type="SIMDReg", size=128, dest="DREX"), + Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=128, dest="Spare")]) + +add_insn("pshlb", "sse5psh", modifiers=[0x00]) +add_insn("pshlw", "sse5psh", modifiers=[0x01]) +add_insn("pshld", "sse5psh", modifiers=[0x02]) +add_insn("pshlq", "sse5psh", modifiers=[0x03]) + +add_insn("pshab", "sse5psh", modifiers=[0x04]) +add_insn("pshaw", "sse5psh", modifiers=[0x05]) +add_insn("pshad", "sse5psh", modifiers=[0x06]) +add_insn("pshaq", "sse5psh", modifiers=[0x07]) + +# roundps, roundpd, roundss, roundsd, ptest are in SSE4.1 + ##################################################################### # AMD 3DNow! instructions ##################################################################### diff --git a/modules/arch/x86/tests/Makefile.inc b/modules/arch/x86/tests/Makefile.inc index 3fc08dab..60406bfc 100644 --- a/modules/arch/x86/tests/Makefile.inc +++ b/modules/arch/x86/tests/Makefile.inc @@ -144,6 +144,12 @@ EXTRA_DIST += modules/arch/x86/tests/sse4.asm EXTRA_DIST += modules/arch/x86/tests/sse4.hex EXTRA_DIST += modules/arch/x86/tests/sse4-err.asm EXTRA_DIST += modules/arch/x86/tests/sse4-err.errwarn +EXTRA_DIST += modules/arch/x86/tests/sse5-all.asm +EXTRA_DIST += modules/arch/x86/tests/sse5-all.hex +EXTRA_DIST += modules/arch/x86/tests/sse5-basic.asm +EXTRA_DIST += modules/arch/x86/tests/sse5-basic.hex +EXTRA_DIST += modules/arch/x86/tests/sse5-err.asm +EXTRA_DIST += modules/arch/x86/tests/sse5-err.errwarn EXTRA_DIST += modules/arch/x86/tests/ssse3.asm EXTRA_DIST += modules/arch/x86/tests/ssse3.c EXTRA_DIST += modules/arch/x86/tests/ssse3.hex diff --git a/modules/arch/x86/tests/sse5-all.asm b/modules/arch/x86/tests/sse5-all.asm new file mode 100644 index 00000000..c4fdd6dd --- /dev/null +++ b/modules/arch/x86/tests/sse5-all.asm @@ -0,0 +1,509 @@ +; Instructions are ordered in SSE5 databook order +; BITS=16 to minimize output length +[bits 16] +compd xmm1, xmm4, xmm7, 5 ; 0F 25 2D 347 10 05 +compd xmm2, xmm5, [0], byte 5 ; 0F 25 2D 056 20 00 00 05 +compd xmm3, xmm6, dqword [0], 5 ; 0F 25 2D 066 30 00 00 05 + +comps xmm1, xmm4, xmm7, 5 ; 0F 25 2C 347 10 05 +comps xmm2, xmm5, [0], byte 5 ; 0F 25 2C 056 20 00 00 05 +comps xmm3, xmm6, dqword [0], 5 ; 0F 25 2C 066 30 00 00 05 + +comsd xmm1, xmm4, xmm7, 5 ; 0F 25 2F 347 10 05 +comsd xmm2, xmm5, [0], byte 5 ; 0F 25 2F 056 20 00 00 05 +comsd xmm3, xmm6, qword [0], 5 ; 0F 25 2F 066 30 00 00 05 + +comss xmm1, xmm4, xmm7, 5 ; 0F 25 2E 347 10 05 +comss xmm2, xmm5, [0], byte 5 ; 0F 25 2E 056 20 00 00 05 +comss xmm3, xmm6, dword [0], 5 ; 0F 25 2E 066 30 00 00 05 + +cvtph2ps xmm1, xmm4 ; 0F 7A 30 314 +cvtph2ps xmm2, [0] ; 0F 7A 30 026 00 00 +cvtph2ps xmm3, qword [0] ; 0F 7A 30 036 00 00 + +cvtps2ph xmm1, xmm4 ; 0F 7A 31 341 +cvtps2ph [0], xmm2 ; 0F 7A 31 026 00 00 +cvtps2ph qword [0], xmm3 ; 0F 7A 31 036 00 00 + +fmaddpd xmm1, xmm1, xmm2, xmm3 ; 0F 24 01 323 10 /or/ 0F 24 01 332 18 +fmaddpd xmm1, xmm1, xmm2, [0] ; 0F 24 01 026 10 00 00 +fmaddpd xmm1, xmm1, xmm2, dqword [0] ; 0F 24 01 026 10 00 00 +fmaddpd xmm1, xmm1, [0], xmm3 ; 0F 24 01 036 18 00 00 +fmaddpd xmm1, xmm1, dqword [0], xmm3 ; 0F 24 01 036 18 00 00 +fmaddpd xmm1, xmm2, xmm3, xmm1 ; 0F 24 05 323 10 /or/ 0F 24 05 332 18 +fmaddpd xmm1, xmm2, [0], xmm1 ; 0F 24 05 026 10 00 00 +fmaddpd xmm1, xmm2, dqword [0], xmm1 ; 0F 24 05 026 10 00 00 +fmaddpd xmm1, [0], xmm3, xmm1 ; 0F 24 05 036 18 00 00 +fmaddpd xmm1, dqword [0], xmm3, xmm1 ; 0F 24 05 036 18 00 00 + +fmaddps xmm1, xmm1, xmm2, xmm3 ; 0F 24 00 323 10 /or/ 0F 24 00 332 18 +fmaddps xmm1, xmm1, xmm2, [0] ; 0F 24 00 026 10 00 00 +fmaddps xmm1, xmm1, xmm2, dqword [0] ; 0F 24 00 026 10 00 00 +fmaddps xmm1, xmm1, [0], xmm3 ; 0F 24 00 036 18 00 00 +fmaddps xmm1, xmm1, dqword [0], xmm3 ; 0F 24 00 036 18 00 00 +fmaddps xmm1, xmm2, xmm3, xmm1 ; 0F 24 04 323 10 /or/ 0F 24 04 332 18 +fmaddps xmm1, xmm2, [0], xmm1 ; 0F 24 04 026 10 00 00 +fmaddps xmm1, xmm2, dqword [0], xmm1 ; 0F 24 04 026 10 00 00 +fmaddps xmm1, [0], xmm3, xmm1 ; 0F 24 04 036 18 00 00 +fmaddps xmm1, dqword [0], xmm3, xmm1 ; 0F 24 04 036 18 00 00 + +fmaddsd xmm1, xmm1, xmm2, xmm3 ; 0F 24 03 323 10 /or/ 0F 24 03 332 18 +fmaddsd xmm1, xmm1, xmm2, [0] ; 0F 24 03 026 10 00 00 +fmaddsd xmm1, xmm1, xmm2, qword [0] ; 0F 24 03 026 10 00 00 +fmaddsd xmm1, xmm1, [0], xmm3 ; 0F 24 03 036 18 00 00 +fmaddsd xmm1, xmm1, qword [0], xmm3 ; 0F 24 03 036 18 00 00 +fmaddsd xmm1, xmm2, xmm3, xmm1 ; 0F 24 07 323 10 /or/ 0F 24 07 332 18 +fmaddsd xmm1, xmm2, [0], xmm1 ; 0F 24 07 026 10 00 00 +fmaddsd xmm1, xmm2, qword [0], xmm1 ; 0F 24 07 026 10 00 00 +fmaddsd xmm1, [0], xmm3, xmm1 ; 0F 24 07 036 18 00 00 +fmaddsd xmm1, qword [0], xmm3, xmm1 ; 0F 24 07 036 18 00 00 + +fmaddss xmm1, xmm1, xmm2, xmm3 ; 0F 24 02 323 10 /or/ 0F 24 02 332 18 +fmaddss xmm1, xmm1, xmm2, [0] ; 0F 24 02 026 10 00 00 +fmaddss xmm1, xmm1, xmm2, dword [0] ; 0F 24 02 026 10 00 00 +fmaddss xmm1, xmm1, [0], xmm3 ; 0F 24 02 036 18 00 00 +fmaddss xmm1, xmm1, dword [0], xmm3 ; 0F 24 02 036 18 00 00 +fmaddss xmm1, xmm2, xmm3, xmm1 ; 0F 24 06 323 10 /or/ 0F 24 06 332 18 +fmaddss xmm1, xmm2, [0], xmm1 ; 0F 24 06 026 10 00 00 +fmaddss xmm1, xmm2, dword [0], xmm1 ; 0F 24 06 026 10 00 00 +fmaddss xmm1, [0], xmm3, xmm1 ; 0F 24 06 036 18 00 00 +fmaddss xmm1, dword [0], xmm3, xmm1 ; 0F 24 06 036 18 00 00 + +fmsubpd xmm1, xmm1, xmm2, xmm3 ; 0F 24 09 323 10 /or/ 0F 24 09 332 18 +fmsubpd xmm1, xmm1, xmm2, [0] ; 0F 24 09 026 10 00 00 +fmsubpd xmm1, xmm1, xmm2, dqword [0] ; 0F 24 09 026 10 00 00 +fmsubpd xmm1, xmm1, [0], xmm3 ; 0F 24 09 036 18 00 00 +fmsubpd xmm1, xmm1, dqword [0], xmm3 ; 0F 24 09 036 18 00 00 +fmsubpd xmm1, xmm2, xmm3, xmm1 ; 0F 24 0D 323 10 /or/ 0F 24 0D 332 18 +fmsubpd xmm1, xmm2, [0], xmm1 ; 0F 24 0D 026 10 00 00 +fmsubpd xmm1, xmm2, dqword [0], xmm1 ; 0F 24 0D 026 10 00 00 +fmsubpd xmm1, [0], xmm3, xmm1 ; 0F 24 0D 036 18 00 00 +fmsubpd xmm1, dqword [0], xmm3, xmm1 ; 0F 24 0D 036 18 00 00 + +fmsubps xmm1, xmm1, xmm2, xmm3 ; 0F 24 08 323 10 /or/ 0F 24 08 332 18 +fmsubps xmm1, xmm1, xmm2, [0] ; 0F 24 08 026 10 00 00 +fmsubps xmm1, xmm1, xmm2, dqword [0] ; 0F 24 08 026 10 00 00 +fmsubps xmm1, xmm1, [0], xmm3 ; 0F 24 08 036 18 00 00 +fmsubps xmm1, xmm1, dqword [0], xmm3 ; 0F 24 08 036 18 00 00 +fmsubps xmm1, xmm2, xmm3, xmm1 ; 0F 24 0C 323 10 /or/ 0F 24 0C 332 18 +fmsubps xmm1, xmm2, [0], xmm1 ; 0F 24 0C 026 10 00 00 +fmsubps xmm1, xmm2, dqword [0], xmm1 ; 0F 24 0C 026 10 00 00 +fmsubps xmm1, [0], xmm3, xmm1 ; 0F 24 0C 036 18 00 00 +fmsubps xmm1, dqword [0], xmm3, xmm1 ; 0F 24 0C 036 18 00 00 + +fmsubsd xmm1, xmm1, xmm2, xmm3 ; 0F 24 0B 323 10 /or/ 0F 24 0B 332 18 +fmsubsd xmm1, xmm1, xmm2, [0] ; 0F 24 0B 026 10 00 00 +fmsubsd xmm1, xmm1, xmm2, qword [0] ; 0F 24 0B 026 10 00 00 +fmsubsd xmm1, xmm1, [0], xmm3 ; 0F 24 0B 036 18 00 00 +fmsubsd xmm1, xmm1, qword [0], xmm3 ; 0F 24 0B 036 18 00 00 +fmsubsd xmm1, xmm2, xmm3, xmm1 ; 0F 24 0F 323 10 /or/ 0F 24 0F 332 18 +fmsubsd xmm1, xmm2, [0], xmm1 ; 0F 24 0F 026 10 00 00 +fmsubsd xmm1, xmm2, qword [0], xmm1 ; 0F 24 0F 026 10 00 00 +fmsubsd xmm1, [0], xmm3, xmm1 ; 0F 24 0F 036 18 00 00 +fmsubsd xmm1, qword [0], xmm3, xmm1 ; 0F 24 0F 036 18 00 00 + +fmsubss xmm1, xmm1, xmm2, xmm3 ; 0F 24 0A 323 10 /or/ 0F 24 0A 332 18 +fmsubss xmm1, xmm1, xmm2, [0] ; 0F 24 0A 026 10 00 00 +fmsubss xmm1, xmm1, xmm2, dword [0] ; 0F 24 0A 026 10 00 00 +fmsubss xmm1, xmm1, [0], xmm3 ; 0F 24 0A 036 18 00 00 +fmsubss xmm1, xmm1, dword [0], xmm3 ; 0F 24 0A 036 18 00 00 +fmsubss xmm1, xmm2, xmm3, xmm1 ; 0F 24 0E 323 10 /or/ 0F 24 0E 332 18 +fmsubss xmm1, xmm2, [0], xmm1 ; 0F 24 0E 026 10 00 00 +fmsubss xmm1, xmm2, dword [0], xmm1 ; 0F 24 0E 026 10 00 00 +fmsubss xmm1, [0], xmm3, xmm1 ; 0F 24 0E 036 18 00 00 +fmsubss xmm1, dword [0], xmm3, xmm1 ; 0F 24 0E 036 18 00 00 + +fnmaddpd xmm1, xmm1, xmm2, xmm3 ; 0F 24 11 323 10 /or/ 0F 24 11 332 18 +fnmaddpd xmm1, xmm1, xmm2, [0] ; 0F 24 11 026 10 00 00 +fnmaddpd xmm1, xmm1, xmm2, dqword [0] ; 0F 24 11 026 10 00 00 +fnmaddpd xmm1, xmm1, [0], xmm3 ; 0F 24 11 036 18 00 00 +fnmaddpd xmm1, xmm1, dqword [0], xmm3 ; 0F 24 11 036 18 00 00 +fnmaddpd xmm1, xmm2, xmm3, xmm1 ; 0F 24 15 323 10 /or/ 0F 24 15 332 18 +fnmaddpd xmm1, xmm2, [0], xmm1 ; 0F 24 15 026 10 00 00 +fnmaddpd xmm1, xmm2, dqword [0], xmm1 ; 0F 24 15 026 10 00 00 +fnmaddpd xmm1, [0], xmm3, xmm1 ; 0F 24 15 036 18 00 00 +fnmaddpd xmm1, dqword [0], xmm3, xmm1 ; 0F 24 15 036 18 00 00 + +fnmaddps xmm1, xmm1, xmm2, xmm3 ; 0F 24 10 323 10 /or/ 0F 24 10 332 18 +fnmaddps xmm1, xmm1, xmm2, [0] ; 0F 24 10 026 10 00 00 +fnmaddps xmm1, xmm1, xmm2, dqword [0] ; 0F 24 10 026 10 00 00 +fnmaddps xmm1, xmm1, [0], xmm3 ; 0F 24 10 036 18 00 00 +fnmaddps xmm1, xmm1, dqword [0], xmm3 ; 0F 24 10 036 18 00 00 +fnmaddps xmm1, xmm2, xmm3, xmm1 ; 0F 24 14 323 10 /or/ 0F 24 14 332 18 +fnmaddps xmm1, xmm2, [0], xmm1 ; 0F 24 14 026 10 00 00 +fnmaddps xmm1, xmm2, dqword [0], xmm1 ; 0F 24 14 026 10 00 00 +fnmaddps xmm1, [0], xmm3, xmm1 ; 0F 24 14 036 18 00 00 +fnmaddps xmm1, dqword [0], xmm3, xmm1 ; 0F 24 14 036 18 00 00 + +fnmaddsd xmm1, xmm1, xmm2, xmm3 ; 0F 24 13 323 10 /or/ 0F 24 13 332 18 +fnmaddsd xmm1, xmm1, xmm2, [0] ; 0F 24 13 026 10 00 00 +fnmaddsd xmm1, xmm1, xmm2, qword [0] ; 0F 24 13 026 10 00 00 +fnmaddsd xmm1, xmm1, [0], xmm3 ; 0F 24 13 036 18 00 00 +fnmaddsd xmm1, xmm1, qword [0], xmm3 ; 0F 24 13 036 18 00 00 +fnmaddsd xmm1, xmm2, xmm3, xmm1 ; 0F 24 17 323 10 /or/ 0F 24 17 332 18 +fnmaddsd xmm1, xmm2, [0], xmm1 ; 0F 24 17 026 10 00 00 +fnmaddsd xmm1, xmm2, qword [0], xmm1 ; 0F 24 17 026 10 00 00 +fnmaddsd xmm1, [0], xmm3, xmm1 ; 0F 24 17 036 18 00 00 +fnmaddsd xmm1, qword [0], xmm3, xmm1 ; 0F 24 17 036 18 00 00 + +fnmaddss xmm1, xmm1, xmm2, xmm3 ; 0F 24 12 323 10 /or/ 0F 24 12 332 18 +fnmaddss xmm1, xmm1, xmm2, [0] ; 0F 24 12 026 10 00 00 +fnmaddss xmm1, xmm1, xmm2, dword [0] ; 0F 24 12 026 10 00 00 +fnmaddss xmm1, xmm1, [0], xmm3 ; 0F 24 12 036 18 00 00 +fnmaddss xmm1, xmm1, dword [0], xmm3 ; 0F 24 12 036 18 00 00 +fnmaddss xmm1, xmm2, xmm3, xmm1 ; 0F 24 16 323 10 /or/ 0F 24 16 332 18 +fnmaddss xmm1, xmm2, [0], xmm1 ; 0F 24 16 026 10 00 00 +fnmaddss xmm1, xmm2, dword [0], xmm1 ; 0F 24 16 026 10 00 00 +fnmaddss xmm1, [0], xmm3, xmm1 ; 0F 24 16 036 18 00 00 +fnmaddss xmm1, dword [0], xmm3, xmm1 ; 0F 24 16 036 18 00 00 + +fnmsubpd xmm1, xmm1, xmm2, xmm3 ; 0F 24 19 323 10 /or/ 0F 24 19 332 18 +fnmsubpd xmm1, xmm1, xmm2, [0] ; 0F 24 19 026 10 00 00 +fnmsubpd xmm1, xmm1, xmm2, dqword [0] ; 0F 24 19 026 10 00 00 +fnmsubpd xmm1, xmm1, [0], xmm3 ; 0F 24 19 036 18 00 00 +fnmsubpd xmm1, xmm1, dqword [0], xmm3 ; 0F 24 19 036 18 00 00 +fnmsubpd xmm1, xmm2, xmm3, xmm1 ; 0F 24 1D 323 10 /or/ 0F 24 1D 332 18 +fnmsubpd xmm1, xmm2, [0], xmm1 ; 0F 24 1D 026 10 00 00 +fnmsubpd xmm1, xmm2, dqword [0], xmm1 ; 0F 24 1D 026 10 00 00 +fnmsubpd xmm1, [0], xmm3, xmm1 ; 0F 24 1D 036 18 00 00 +fnmsubpd xmm1, dqword [0], xmm3, xmm1 ; 0F 24 1D 036 18 00 00 + +fnmsubps xmm1, xmm1, xmm2, xmm3 ; 0F 24 18 323 10 /or/ 0F 24 18 332 18 +fnmsubps xmm1, xmm1, xmm2, [0] ; 0F 24 18 026 10 00 00 +fnmsubps xmm1, xmm1, xmm2, dqword [0] ; 0F 24 18 026 10 00 00 +fnmsubps xmm1, xmm1, [0], xmm3 ; 0F 24 18 036 18 00 00 +fnmsubps xmm1, xmm1, dqword [0], xmm3 ; 0F 24 18 036 18 00 00 +fnmsubps xmm1, xmm2, xmm3, xmm1 ; 0F 24 1C 323 10 /or/ 0F 24 1C 332 18 +fnmsubps xmm1, xmm2, [0], xmm1 ; 0F 24 1C 026 10 00 00 +fnmsubps xmm1, xmm2, dqword [0], xmm1 ; 0F 24 1C 026 10 00 00 +fnmsubps xmm1, [0], xmm3, xmm1 ; 0F 24 1C 036 18 00 00 +fnmsubps xmm1, dqword [0], xmm3, xmm1 ; 0F 24 1C 036 18 00 00 + +fnmsubsd xmm1, xmm1, xmm2, xmm3 ; 0F 24 1B 323 10 /or/ 0F 24 1B 332 18 +fnmsubsd xmm1, xmm1, xmm2, [0] ; 0F 24 1B 026 10 00 00 +fnmsubsd xmm1, xmm1, xmm2, qword [0] ; 0F 24 1B 026 10 00 00 +fnmsubsd xmm1, xmm1, [0], xmm3 ; 0F 24 1B 036 18 00 00 +fnmsubsd xmm1, xmm1, qword [0], xmm3 ; 0F 24 1B 036 18 00 00 +fnmsubsd xmm1, xmm2, xmm3, xmm1 ; 0F 24 1F 323 10 /or/ 0F 24 1F 332 18 +fnmsubsd xmm1, xmm2, [0], xmm1 ; 0F 24 1F 026 10 00 00 +fnmsubsd xmm1, xmm2, qword [0], xmm1 ; 0F 24 1F 026 10 00 00 +fnmsubsd xmm1, [0], xmm3, xmm1 ; 0F 24 1F 036 18 00 00 +fnmsubsd xmm1, qword [0], xmm3, xmm1 ; 0F 24 1F 036 18 00 00 + +fnmsubss xmm1, xmm1, xmm2, xmm3 ; 0F 24 1A 323 10 /or/ 0F 24 1A 332 18 +fnmsubss xmm1, xmm1, xmm2, [0] ; 0F 24 1A 026 10 00 00 +fnmsubss xmm1, xmm1, xmm2, dword [0] ; 0F 24 1A 026 10 00 00 +fnmsubss xmm1, xmm1, [0], xmm3 ; 0F 24 1A 036 18 00 00 +fnmsubss xmm1, xmm1, dword [0], xmm3 ; 0F 24 1A 036 18 00 00 +fnmsubss xmm1, xmm2, xmm3, xmm1 ; 0F 24 1E 323 10 /or/ 0F 24 1E 332 18 +fnmsubss xmm1, xmm2, [0], xmm1 ; 0F 24 1E 026 10 00 00 +fnmsubss xmm1, xmm2, dword [0], xmm1 ; 0F 24 1E 026 10 00 00 +fnmsubss xmm1, [0], xmm3, xmm1 ; 0F 24 1E 036 18 00 00 +fnmsubss xmm1, dword [0], xmm3, xmm1 ; 0F 24 1E 036 18 00 00 + +frczpd xmm1, xmm2 ; 0F 7A 11 312 +frczpd xmm1, [0] ; 0F 7A 11 016 00 00 +frczpd xmm1, dqword [0] ; 0F 7A 11 016 00 00 + +frczps xmm1, xmm2 ; 0F 7A 10 312 +frczps xmm1, [0] ; 0F 7A 10 016 00 00 +frczps xmm1, dqword [0] ; 0F 7A 10 016 00 00 + +frczsd xmm1, xmm2 ; 0F 7A 13 312 +frczsd xmm1, [0] ; 0F 7A 13 016 00 00 +frczsd xmm1, qword [0] ; 0F 7A 13 016 00 00 + +frczss xmm1, xmm2 ; 0F 7A 12 312 +frczss xmm1, [0] ; 0F 7A 12 016 00 00 +frczss xmm1, dword [0] ; 0F 7A 12 016 00 00 + +pcmov xmm1, xmm1, xmm2, xmm3 ; 0F 24 22 323 10 /or/ 0F 24 22 332 18 +pcmov xmm1, xmm1, xmm2, [0] ; 0F 24 22 026 10 00 00 +pcmov xmm1, xmm1, xmm2, dqword [0] ; 0F 24 22 026 10 00 00 +pcmov xmm1, xmm1, [0], xmm3 ; 0F 24 22 036 18 00 00 +pcmov xmm1, xmm1, dqword [0], xmm3 ; 0F 24 22 036 18 00 00 +pcmov xmm1, xmm2, xmm3, xmm1 ; 0F 24 26 323 10 /or/ 0F 24 26 332 18 +pcmov xmm1, xmm2, [0], xmm1 ; 0F 24 26 026 10 00 00 +pcmov xmm1, xmm2, dqword [0], xmm1 ; 0F 24 26 026 10 00 00 +pcmov xmm1, [0], xmm3, xmm1 ; 0F 24 26 036 18 00 00 +pcmov xmm1, dqword [0], xmm3, xmm1 ; 0F 24 26 036 18 00 00 + +pcomb xmm1, xmm4, xmm7, 5 ; 0F 25 4C 347 10 05 +pcomb xmm2, xmm5, [0], byte 5 ; 0F 25 4C 056 20 00 00 05 +pcomb xmm3, xmm6, dqword [0], 5 ; 0F 25 4C 066 30 00 00 05 + +pcomd xmm1, xmm4, xmm7, 5 ; 0F 25 4E 347 10 05 +pcomd xmm2, xmm5, [0], byte 5 ; 0F 25 4E 056 20 00 00 05 +pcomd xmm3, xmm6, dqword [0], 5 ; 0F 25 4E 066 30 00 00 05 + +pcomq xmm1, xmm4, xmm7, 5 ; 0F 25 4F 347 10 05 +pcomq xmm2, xmm5, [0], byte 5 ; 0F 25 4F 056 20 00 00 05 +pcomq xmm3, xmm6, dqword [0], 5 ; 0F 25 4F 066 30 00 00 05 + +pcomub xmm1, xmm4, xmm7, 5 ; 0F 25 6C 347 10 05 +pcomub xmm2, xmm5, [0], byte 5 ; 0F 25 6C 056 20 00 00 05 +pcomub xmm3, xmm6, dqword [0], 5 ; 0F 25 6C 066 30 00 00 05 + +pcomud xmm1, xmm4, xmm7, 5 ; 0F 25 6E 347 10 05 +pcomud xmm2, xmm5, [0], byte 5 ; 0F 25 6E 056 20 00 00 05 +pcomud xmm3, xmm6, dqword [0], 5 ; 0F 25 6E 066 30 00 00 05 + +pcomuq xmm1, xmm4, xmm7, 5 ; 0F 25 6F 347 10 05 +pcomuq xmm2, xmm5, [0], byte 5 ; 0F 25 6F 056 20 00 00 05 +pcomuq xmm3, xmm6, dqword [0], 5 ; 0F 25 6F 066 30 00 00 05 + +pcomuw xmm1, xmm4, xmm7, 5 ; 0F 25 6D 347 10 05 +pcomuw xmm2, xmm5, [0], byte 5 ; 0F 25 6D 056 20 00 00 05 +pcomuw xmm3, xmm6, dqword [0], 5 ; 0F 25 6D 066 30 00 00 05 + +pcomw xmm1, xmm4, xmm7, 5 ; 0F 25 4D 347 10 05 +pcomw xmm2, xmm5, [0], byte 5 ; 0F 25 4D 056 20 00 00 05 +pcomw xmm3, xmm6, dqword [0], 5 ; 0F 25 4D 066 30 00 00 05 + +permpd xmm1, xmm1, xmm2, xmm3 ; 0F 24 21 323 10 /or/ 0F 24 21 332 18 +permpd xmm1, xmm1, xmm2, [0] ; 0F 24 21 026 10 00 00 +permpd xmm1, xmm1, xmm2, dqword [0] ; 0F 24 21 026 10 00 00 +permpd xmm1, xmm1, [0], xmm3 ; 0F 24 21 036 18 00 00 +permpd xmm1, xmm1, dqword [0], xmm3 ; 0F 24 21 036 18 00 00 +permpd xmm1, xmm2, xmm3, xmm1 ; 0F 24 25 323 10 /or/ 0F 24 25 332 18 +permpd xmm1, xmm2, [0], xmm1 ; 0F 24 25 026 10 00 00 +permpd xmm1, xmm2, dqword [0], xmm1 ; 0F 24 25 026 10 00 00 +permpd xmm1, [0], xmm3, xmm1 ; 0F 24 25 036 18 00 00 +permpd xmm1, dqword [0], xmm3, xmm1 ; 0F 24 25 036 18 00 00 + +permps xmm1, xmm1, xmm2, xmm3 ; 0F 24 20 323 10 /or/ 0F 24 20 332 18 +permps xmm1, xmm1, xmm2, [0] ; 0F 24 20 026 10 00 00 +permps xmm1, xmm1, xmm2, dqword [0] ; 0F 24 20 026 10 00 00 +permps xmm1, xmm1, [0], xmm3 ; 0F 24 20 036 18 00 00 +permps xmm1, xmm1, dqword [0], xmm3 ; 0F 24 20 036 18 00 00 +permps xmm1, xmm2, xmm3, xmm1 ; 0F 24 24 323 10 /or/ 0F 24 24 332 18 +permps xmm1, xmm2, [0], xmm1 ; 0F 24 24 026 10 00 00 +permps xmm1, xmm2, dqword [0], xmm1 ; 0F 24 24 026 10 00 00 +permps xmm1, [0], xmm3, xmm1 ; 0F 24 24 036 18 00 00 +permps xmm1, dqword [0], xmm3, xmm1 ; 0F 24 24 036 18 00 00 + +phaddbd xmm1, xmm2 ; 0F 7A 42 312 +phaddbd xmm1, [0] ; 0F 7A 42 016 00 00 +phaddbd xmm1, dqword [0] ; 0F 7A 42 016 00 00 + +phaddbq xmm1, xmm2 ; 0F 7A 43 312 +phaddbq xmm1, [0] ; 0F 7A 43 016 00 00 +phaddbq xmm1, dqword [0] ; 0F 7A 43 016 00 00 + +phaddbw xmm1, xmm2 ; 0F 7A 41 312 +phaddbw xmm1, [0] ; 0F 7A 41 016 00 00 +phaddbw xmm1, dqword [0] ; 0F 7A 41 016 00 00 + +phadddq xmm1, xmm2 ; 0F 7A 4B 312 +phadddq xmm1, [0] ; 0F 7A 4B 016 00 00 +phadddq xmm1, dqword [0] ; 0F 7A 4B 016 00 00 + +phaddubd xmm1, xmm2 ; 0F 7A 52 312 +phaddubd xmm1, [0] ; 0F 7A 52 016 00 00 +phaddubd xmm1, dqword [0] ; 0F 7A 52 016 00 00 + +phaddubq xmm1, xmm2 ; 0F 7A 53 312 +phaddubq xmm1, [0] ; 0F 7A 53 016 00 00 +phaddubq xmm1, dqword [0] ; 0F 7A 53 016 00 00 + +phaddubw xmm1, xmm2 ; 0F 7A 51 312 +phaddubw xmm1, [0] ; 0F 7A 51 016 00 00 +phaddubw xmm1, dqword [0] ; 0F 7A 51 016 00 00 + +phaddudq xmm1, xmm2 ; 0F 7A 5B 312 +phaddudq xmm1, [0] ; 0F 7A 5B 016 00 00 +phaddudq xmm1, dqword [0] ; 0F 7A 5B 016 00 00 + +phadduwd xmm1, xmm2 ; 0F 7A 56 312 +phadduwd xmm1, [0] ; 0F 7A 56 016 00 00 +phadduwd xmm1, dqword [0] ; 0F 7A 56 016 00 00 + +phadduwq xmm1, xmm2 ; 0F 7A 57 312 +phadduwq xmm1, [0] ; 0F 7A 57 016 00 00 +phadduwq xmm1, dqword [0] ; 0F 7A 57 016 00 00 + +phaddwd xmm1, xmm2 ; 0F 7A 46 312 +phaddwd xmm1, [0] ; 0F 7A 46 016 00 00 +phaddwd xmm1, dqword [0] ; 0F 7A 46 016 00 00 + +phaddwq xmm1, xmm2 ; 0F 7A 47 312 +phaddwq xmm1, [0] ; 0F 7A 47 016 00 00 +phaddwq xmm1, dqword [0] ; 0F 7A 47 016 00 00 + +phsubbw xmm1, xmm2 ; 0F 7A 61 312 +phsubbw xmm1, [0] ; 0F 7A 61 016 00 00 +phsubbw xmm1, dqword [0] ; 0F 7A 61 016 00 00 + +phsubdq xmm1, xmm2 ; 0F 7A 63 312 +phsubdq xmm1, [0] ; 0F 7A 63 016 00 00 +phsubdq xmm1, dqword [0] ; 0F 7A 63 016 00 00 + +phsubwd xmm1, xmm2 ; 0F 7A 62 312 +phsubwd xmm1, [0] ; 0F 7A 62 016 00 00 +phsubwd xmm1, dqword [0] ; 0F 7A 62 016 00 00 + +pmacsdd xmm1, xmm4, xmm7, xmm1 ; 0F 24 9E 347 10 +pmacsdd xmm2, xmm5, [0], xmm2 ; 0F 24 9E 056 20 00 00 +pmacsdd xmm3, xmm6, dqword [0], xmm3 ; 0F 24 9E 066 30 00 00 + +pmacsdqh xmm1, xmm4, xmm7, xmm1 ; 0F 24 9F 347 10 +pmacsdqh xmm2, xmm5, [0], xmm2 ; 0F 24 9F 056 20 00 00 +pmacsdqh xmm3, xmm6, dqword [0], xmm3 ; 0F 24 9F 066 30 00 00 + +pmacsdql xmm1, xmm4, xmm7, xmm1 ; 0F 24 97 347 10 +pmacsdql xmm2, xmm5, [0], xmm2 ; 0F 24 97 056 20 00 00 +pmacsdql xmm3, xmm6, dqword [0], xmm3 ; 0F 24 97 066 30 00 00 + +pmacssdd xmm1, xmm4, xmm7, xmm1 ; 0F 24 8E 347 10 +pmacssdd xmm2, xmm5, [0], xmm2 ; 0F 24 8E 056 20 00 00 +pmacssdd xmm3, xmm6, dqword [0], xmm3 ; 0F 24 8E 066 30 00 00 + +pmacssdqh xmm1, xmm4, xmm7, xmm1 ; 0F 24 8F 347 10 +pmacssdqh xmm2, xmm5, [0], xmm2 ; 0F 24 8F 056 20 00 00 +pmacssdqh xmm3, xmm6, dqword [0], xmm3 ; 0F 24 8F 066 30 00 00 + +pmacssdql xmm1, xmm4, xmm7, xmm1 ; 0F 24 87 347 10 +pmacssdql xmm2, xmm5, [0], xmm2 ; 0F 24 87 056 20 00 00 +pmacssdql xmm3, xmm6, dqword [0], xmm3 ; 0F 24 87 066 30 00 00 + +pmacsswd xmm1, xmm4, xmm7, xmm1 ; 0F 24 86 347 10 +pmacsswd xmm2, xmm5, [0], xmm2 ; 0F 24 86 056 20 00 00 +pmacsswd xmm3, xmm6, dqword [0], xmm3 ; 0F 24 86 066 30 00 00 + +pmacssww xmm1, xmm4, xmm7, xmm1 ; 0F 24 85 347 10 +pmacssww xmm2, xmm5, [0], xmm2 ; 0F 24 85 056 20 00 00 +pmacssww xmm3, xmm6, dqword [0], xmm3 ; 0F 24 85 066 30 00 00 + +pmacswd xmm1, xmm4, xmm7, xmm1 ; 0F 24 96 347 10 +pmacswd xmm2, xmm5, [0], xmm2 ; 0F 24 96 056 20 00 00 +pmacswd xmm3, xmm6, dqword [0], xmm3 ; 0F 24 96 066 30 00 00 + +pmacsww xmm1, xmm4, xmm7, xmm1 ; 0F 24 95 347 10 +pmacsww xmm2, xmm5, [0], xmm2 ; 0F 24 95 056 20 00 00 +pmacsww xmm3, xmm6, dqword [0], xmm3 ; 0F 24 95 066 30 00 00 + +pmadcsswd xmm1, xmm4, xmm7, xmm1 ; 0F 24 A6 347 10 +pmadcsswd xmm2, xmm5, [0], xmm2 ; 0F 24 A6 056 20 00 00 +pmadcsswd xmm3, xmm6, dqword [0], xmm3 ; 0F 24 A6 066 30 00 00 + +pmadcswd xmm1, xmm4, xmm7, xmm1 ; 0F 24 B6 347 10 +pmadcswd xmm2, xmm5, [0], xmm2 ; 0F 24 B6 056 20 00 00 +pmadcswd xmm3, xmm6, dqword [0], xmm3 ; 0F 24 B6 066 30 00 00 + +pperm xmm1, xmm1, xmm2, xmm3 ; 0F 24 23 323 10 /or/ 0F 24 23 332 18 +pperm xmm1, xmm1, xmm2, [0] ; 0F 24 23 026 10 00 00 +pperm xmm1, xmm1, xmm2, dqword [0] ; 0F 24 23 026 10 00 00 +pperm xmm1, xmm1, [0], xmm3 ; 0F 24 23 036 18 00 00 +pperm xmm1, xmm1, dqword [0], xmm3 ; 0F 24 23 036 18 00 00 +pperm xmm1, xmm2, xmm3, xmm1 ; 0F 24 27 323 10 /or/ 0F 24 27 332 18 +pperm xmm1, xmm2, [0], xmm1 ; 0F 24 27 026 10 00 00 +pperm xmm1, xmm2, dqword [0], xmm1 ; 0F 24 27 026 10 00 00 +pperm xmm1, [0], xmm3, xmm1 ; 0F 24 27 036 18 00 00 +pperm xmm1, dqword [0], xmm3, xmm1 ; 0F 24 27 036 18 00 00 + +protb xmm1, xmm2, xmm3 ; 0F 24 40 323 10 /or/ 0F 24 40 332 18 +protb xmm1, xmm2, [0] ; 0F 24 40 026 10 00 00 +protb xmm1, xmm2, dqword [0] ; 0F 24 40 026 10 00 00 +protb xmm1, [0], xmm3 ; 0F 24 40 036 18 00 00 +protb xmm1, dqword [0], xmm3 ; 0F 24 40 036 18 00 00 +protb xmm1, xmm2, byte 5 ; 0F 7B 40 312 05 +protb xmm1, [0], byte 5 ; 0F 7B 40 016 00 00 05 +protb xmm1, dqword [0], 5 ; 0F 7B 40 016 00 00 05 + +protd xmm1, xmm2, xmm3 ; 0F 24 42 323 10 /or/ 0F 24 42 332 18 +protd xmm1, xmm2, [0] ; 0F 24 42 026 10 00 00 +protd xmm1, xmm2, dqword [0] ; 0F 24 42 026 10 00 00 +protd xmm1, [0], xmm3 ; 0F 24 42 036 18 00 00 +protd xmm1, dqword [0], xmm3 ; 0F 24 42 036 18 00 00 +protd xmm1, xmm2, byte 5 ; 0F 7B 42 312 05 +protd xmm1, [0], byte 5 ; 0F 7B 42 016 00 00 05 +protd xmm1, dqword [0], 5 ; 0F 7B 42 016 00 00 05 + +protq xmm1, xmm2, xmm3 ; 0F 24 43 323 10 /or/ 0F 24 43 332 18 +protq xmm1, xmm2, [0] ; 0F 24 43 026 10 00 00 +protq xmm1, xmm2, dqword [0] ; 0F 24 43 026 10 00 00 +protq xmm1, [0], xmm3 ; 0F 24 43 036 18 00 00 +protq xmm1, dqword [0], xmm3 ; 0F 24 43 036 18 00 00 +protq xmm1, xmm2, byte 5 ; 0F 7B 43 312 05 +protq xmm1, [0], byte 5 ; 0F 7B 43 016 00 00 05 +protq xmm1, dqword [0], 5 ; 0F 7B 43 016 00 00 05 + +protw xmm1, xmm2, xmm3 ; 0F 24 41 323 10 /or/ 0F 24 41 332 18 +protw xmm1, xmm2, [0] ; 0F 24 41 026 10 00 00 +protw xmm1, xmm2, dqword [0] ; 0F 24 41 026 10 00 00 +protw xmm1, [0], xmm3 ; 0F 24 41 036 18 00 00 +protw xmm1, dqword [0], xmm3 ; 0F 24 41 036 18 00 00 +protw xmm1, xmm2, byte 5 ; 0F 7B 41 312 05 +protw xmm1, [0], byte 5 ; 0F 7B 41 016 00 00 05 +protw xmm1, dqword [0], 5 ; 0F 7B 41 016 00 00 05 + +pshab xmm1, xmm2, xmm3 ; 0F 24 48 323 10 /or/ 0F 24 48 332 18 +pshab xmm1, xmm2, [0] ; 0F 24 48 026 10 00 00 +pshab xmm1, xmm2, dqword [0] ; 0F 24 48 026 10 00 00 +pshab xmm1, [0], xmm3 ; 0F 24 48 036 18 00 00 +pshab xmm1, dqword [0], xmm3 ; 0F 24 48 036 18 00 00 + +pshad xmm1, xmm2, xmm3 ; 0F 24 4A 323 10 /or/ 0F 24 4A 332 18 +pshad xmm1, xmm2, [0] ; 0F 24 4A 026 10 00 00 +pshad xmm1, xmm2, dqword [0] ; 0F 24 4A 026 10 00 00 +pshad xmm1, [0], xmm3 ; 0F 24 4A 036 18 00 00 +pshad xmm1, dqword [0], xmm3 ; 0F 24 4A 036 18 00 00 + +pshaq xmm1, xmm2, xmm3 ; 0F 24 4B 323 10 /or/ 0F 24 4B 332 18 +pshaq xmm1, xmm2, [0] ; 0F 24 4B 026 10 00 00 +pshaq xmm1, xmm2, dqword [0] ; 0F 24 4B 026 10 00 00 +pshaq xmm1, [0], xmm3 ; 0F 24 4B 036 18 00 00 +pshaq xmm1, dqword [0], xmm3 ; 0F 24 4B 036 18 00 00 + +pshaw xmm1, xmm2, xmm3 ; 0F 24 49 323 10 /or/ 0F 24 49 332 18 +pshaw xmm1, xmm2, [0] ; 0F 24 49 026 10 00 00 +pshaw xmm1, xmm2, dqword [0] ; 0F 24 49 026 10 00 00 +pshaw xmm1, [0], xmm3 ; 0F 24 49 036 18 00 00 +pshaw xmm1, dqword [0], xmm3 ; 0F 24 49 036 18 00 00 + +pshlb xmm1, xmm2, xmm3 ; 0F 24 44 323 10 /or/ 0F 24 44 332 18 +pshlb xmm1, xmm2, [0] ; 0F 24 44 026 10 00 00 +pshlb xmm1, xmm2, dqword [0] ; 0F 24 44 026 10 00 00 +pshlb xmm1, [0], xmm3 ; 0F 24 44 036 18 00 00 +pshlb xmm1, dqword [0], xmm3 ; 0F 24 44 036 18 00 00 + +pshld xmm1, xmm2, xmm3 ; 0F 24 46 323 10 /or/ 0F 24 46 332 18 +pshld xmm1, xmm2, [0] ; 0F 24 46 026 10 00 00 +pshld xmm1, xmm2, dqword [0] ; 0F 24 46 026 10 00 00 +pshld xmm1, [0], xmm3 ; 0F 24 46 036 18 00 00 +pshld xmm1, dqword [0], xmm3 ; 0F 24 46 036 18 00 00 + +pshlq xmm1, xmm2, xmm3 ; 0F 24 47 323 10 /or/ 0F 24 47 332 18 +pshlq xmm1, xmm2, [0] ; 0F 24 47 026 10 00 00 +pshlq xmm1, xmm2, dqword [0] ; 0F 24 47 026 10 00 00 +pshlq xmm1, [0], xmm3 ; 0F 24 47 036 18 00 00 +pshlq xmm1, dqword [0], xmm3 ; 0F 24 47 036 18 00 00 + +pshlw xmm1, xmm2, xmm3 ; 0F 24 45 323 10 /or/ 0F 24 45 332 18 +pshlw xmm1, xmm2, [0] ; 0F 24 45 026 10 00 00 +pshlw xmm1, xmm2, dqword [0] ; 0F 24 45 026 10 00 00 +pshlw xmm1, [0], xmm3 ; 0F 24 45 036 18 00 00 +pshlw xmm1, dqword [0], xmm3 ; 0F 24 45 036 18 00 00 + +; SSE5 instructions that are also SSE4.1 instructions + +ptest xmm1, xmm2 ; 66 0F 38 17 312 +ptest xmm1, [0] ; 66 0F 38 17 016 00 00 +ptest xmm1, dqword [0] ; 66 0F 38 17 016 00 00 + +roundpd xmm1, xmm2, 5 ; 66 0F 3A 09 312 05 +roundpd xmm1, [0], byte 5 ; 66 0F 3A 09 016 00 00 05 +roundpd xmm1, dqword [0], 5 ; 66 0F 3A 09 016 00 00 05 + +roundps xmm1, xmm2, 5 ; 66 0F 3A 08 312 05 +roundps xmm1, [0], byte 5 ; 66 0F 3A 08 016 00 00 05 +roundps xmm1, dqword [0], 5 ; 66 0F 3A 08 016 00 00 05 + +roundsd xmm1, xmm2, 5 ; 66 0F 3A 0B 312 05 +roundsd xmm1, [0], byte 5 ; 66 0F 3A 0B 016 00 00 05 +roundsd xmm1, qword [0], 5 ; 66 0F 3A 0B 016 00 00 05 + +roundss xmm1, xmm2, 5 ; 66 0F 3A 0A 312 05 +roundss xmm1, [0], byte 5 ; 66 0F 3A 0A 016 00 00 05 +roundss xmm1, dword [0], 5 ; 66 0F 3A 0A 016 00 00 05 + diff --git a/modules/arch/x86/tests/sse5-all.hex b/modules/arch/x86/tests/sse5-all.hex new file mode 100644 index 00000000..1c9edacf --- /dev/null +++ b/modules/arch/x86/tests/sse5-all.hex @@ -0,0 +1,2727 @@ +0f +25 +2d +e7 +10 +05 +0f +25 +2d +2e +20 +00 +00 +05 +0f +25 +2d +36 +30 +00 +00 +05 +0f +25 +2c +e7 +10 +05 +0f +25 +2c +2e +20 +00 +00 +05 +0f +25 +2c +36 +30 +00 +00 +05 +0f +25 +2f +e7 +10 +05 +0f +25 +2f +2e +20 +00 +00 +05 +0f +25 +2f +36 +30 +00 +00 +05 +0f +25 +2e +e7 +10 +05 +0f +25 +2e +2e +20 +00 +00 +05 +0f +25 +2e +36 +30 +00 +00 +05 +0f +7a +30 +cc +0f +7a +30 +16 +00 +00 +0f +7a +30 +1e +00 +00 +0f +7a +31 +e1 +0f +7a +31 +16 +00 +00 +0f +7a +31 +1e +00 +00 +0f +24 +01 +d3 +10 +0f +24 +01 +16 +10 +00 +00 +0f +24 +01 +16 +10 +00 +00 +0f +24 +01 +1e +18 +00 +00 +0f +24 +01 +1e +18 +00 +00 +0f +24 +05 +d3 +10 +0f +24 +05 +16 +10 +00 +00 +0f +24 +05 +16 +10 +00 +00 +0f +24 +05 +1e +18 +00 +00 +0f +24 +05 +1e +18 +00 +00 +0f +24 +00 +d3 +10 +0f +24 +00 +16 +10 +00 +00 +0f +24 +00 +16 +10 +00 +00 +0f +24 +00 +1e +18 +00 +00 +0f +24 +00 +1e +18 +00 +00 +0f +24 +04 +d3 +10 +0f +24 +04 +16 +10 +00 +00 +0f +24 +04 +16 +10 +00 +00 +0f +24 +04 +1e +18 +00 +00 +0f +24 +04 +1e +18 +00 +00 +0f +24 +03 +d3 +10 +0f +24 +03 +16 +10 +00 +00 +0f +24 +03 +16 +10 +00 +00 +0f +24 +03 +1e +18 +00 +00 +0f +24 +03 +1e +18 +00 +00 +0f +24 +07 +d3 +10 +0f +24 +07 +16 +10 +00 +00 +0f +24 +07 +16 +10 +00 +00 +0f +24 +07 +1e +18 +00 +00 +0f +24 +07 +1e +18 +00 +00 +0f +24 +02 +d3 +10 +0f +24 +02 +16 +10 +00 +00 +0f +24 +02 +16 +10 +00 +00 +0f +24 +02 +1e +18 +00 +00 +0f +24 +02 +1e +18 +00 +00 +0f +24 +06 +d3 +10 +0f +24 +06 +16 +10 +00 +00 +0f +24 +06 +16 +10 +00 +00 +0f +24 +06 +1e +18 +00 +00 +0f +24 +06 +1e +18 +00 +00 +0f +24 +09 +d3 +10 +0f +24 +09 +16 +10 +00 +00 +0f +24 +09 +16 +10 +00 +00 +0f +24 +09 +1e +18 +00 +00 +0f +24 +09 +1e +18 +00 +00 +0f +24 +0d +d3 +10 +0f +24 +0d +16 +10 +00 +00 +0f +24 +0d +16 +10 +00 +00 +0f +24 +0d +1e +18 +00 +00 +0f +24 +0d +1e +18 +00 +00 +0f +24 +08 +d3 +10 +0f +24 +08 +16 +10 +00 +00 +0f +24 +08 +16 +10 +00 +00 +0f +24 +08 +1e +18 +00 +00 +0f +24 +08 +1e +18 +00 +00 +0f +24 +0c +d3 +10 +0f +24 +0c +16 +10 +00 +00 +0f +24 +0c +16 +10 +00 +00 +0f +24 +0c +1e +18 +00 +00 +0f +24 +0c +1e +18 +00 +00 +0f +24 +0b +d3 +10 +0f +24 +0b +16 +10 +00 +00 +0f +24 +0b +16 +10 +00 +00 +0f +24 +0b +1e +18 +00 +00 +0f +24 +0b +1e +18 +00 +00 +0f +24 +0f +d3 +10 +0f +24 +0f +16 +10 +00 +00 +0f +24 +0f +16 +10 +00 +00 +0f +24 +0f +1e +18 +00 +00 +0f +24 +0f +1e +18 +00 +00 +0f +24 +0a +d3 +10 +0f +24 +0a +16 +10 +00 +00 +0f +24 +0a +16 +10 +00 +00 +0f +24 +0a +1e +18 +00 +00 +0f +24 +0a +1e +18 +00 +00 +0f +24 +0e +d3 +10 +0f +24 +0e +16 +10 +00 +00 +0f +24 +0e +16 +10 +00 +00 +0f +24 +0e +1e +18 +00 +00 +0f +24 +0e +1e +18 +00 +00 +0f +24 +11 +d3 +10 +0f +24 +11 +16 +10 +00 +00 +0f +24 +11 +16 +10 +00 +00 +0f +24 +11 +1e +18 +00 +00 +0f +24 +11 +1e +18 +00 +00 +0f +24 +15 +d3 +10 +0f +24 +15 +16 +10 +00 +00 +0f +24 +15 +16 +10 +00 +00 +0f +24 +15 +1e +18 +00 +00 +0f +24 +15 +1e +18 +00 +00 +0f +24 +10 +d3 +10 +0f +24 +10 +16 +10 +00 +00 +0f +24 +10 +16 +10 +00 +00 +0f +24 +10 +1e +18 +00 +00 +0f +24 +10 +1e +18 +00 +00 +0f +24 +14 +d3 +10 +0f +24 +14 +16 +10 +00 +00 +0f +24 +14 +16 +10 +00 +00 +0f +24 +14 +1e +18 +00 +00 +0f +24 +14 +1e +18 +00 +00 +0f +24 +13 +d3 +10 +0f +24 +13 +16 +10 +00 +00 +0f +24 +13 +16 +10 +00 +00 +0f +24 +13 +1e +18 +00 +00 +0f +24 +13 +1e +18 +00 +00 +0f +24 +17 +d3 +10 +0f +24 +17 +16 +10 +00 +00 +0f +24 +17 +16 +10 +00 +00 +0f +24 +17 +1e +18 +00 +00 +0f +24 +17 +1e +18 +00 +00 +0f +24 +12 +d3 +10 +0f +24 +12 +16 +10 +00 +00 +0f +24 +12 +16 +10 +00 +00 +0f +24 +12 +1e +18 +00 +00 +0f +24 +12 +1e +18 +00 +00 +0f +24 +16 +d3 +10 +0f +24 +16 +16 +10 +00 +00 +0f +24 +16 +16 +10 +00 +00 +0f +24 +16 +1e +18 +00 +00 +0f +24 +16 +1e +18 +00 +00 +0f +24 +19 +d3 +10 +0f +24 +19 +16 +10 +00 +00 +0f +24 +19 +16 +10 +00 +00 +0f +24 +19 +1e +18 +00 +00 +0f +24 +19 +1e +18 +00 +00 +0f +24 +1d +d3 +10 +0f +24 +1d +16 +10 +00 +00 +0f +24 +1d +16 +10 +00 +00 +0f +24 +1d +1e +18 +00 +00 +0f +24 +1d +1e +18 +00 +00 +0f +24 +18 +d3 +10 +0f +24 +18 +16 +10 +00 +00 +0f +24 +18 +16 +10 +00 +00 +0f +24 +18 +1e +18 +00 +00 +0f +24 +18 +1e +18 +00 +00 +0f +24 +1c +d3 +10 +0f +24 +1c +16 +10 +00 +00 +0f +24 +1c +16 +10 +00 +00 +0f +24 +1c +1e +18 +00 +00 +0f +24 +1c +1e +18 +00 +00 +0f +24 +1b +d3 +10 +0f +24 +1b +16 +10 +00 +00 +0f +24 +1b +16 +10 +00 +00 +0f +24 +1b +1e +18 +00 +00 +0f +24 +1b +1e +18 +00 +00 +0f +24 +1f +d3 +10 +0f +24 +1f +16 +10 +00 +00 +0f +24 +1f +16 +10 +00 +00 +0f +24 +1f +1e +18 +00 +00 +0f +24 +1f +1e +18 +00 +00 +0f +24 +1a +d3 +10 +0f +24 +1a +16 +10 +00 +00 +0f +24 +1a +16 +10 +00 +00 +0f +24 +1a +1e +18 +00 +00 +0f +24 +1a +1e +18 +00 +00 +0f +24 +1e +d3 +10 +0f +24 +1e +16 +10 +00 +00 +0f +24 +1e +16 +10 +00 +00 +0f +24 +1e +1e +18 +00 +00 +0f +24 +1e +1e +18 +00 +00 +0f +7a +11 +ca +0f +7a +11 +0e +00 +00 +0f +7a +11 +0e +00 +00 +0f +7a +10 +ca +0f +7a +10 +0e +00 +00 +0f +7a +10 +0e +00 +00 +0f +7a +13 +ca +0f +7a +13 +0e +00 +00 +0f +7a +13 +0e +00 +00 +0f +7a +12 +ca +0f +7a +12 +0e +00 +00 +0f +7a +12 +0e +00 +00 +0f +24 +22 +d3 +10 +0f +24 +22 +16 +10 +00 +00 +0f +24 +22 +16 +10 +00 +00 +0f +24 +22 +1e +18 +00 +00 +0f +24 +22 +1e +18 +00 +00 +0f +24 +26 +d3 +10 +0f +24 +26 +16 +10 +00 +00 +0f +24 +26 +16 +10 +00 +00 +0f +24 +26 +1e +18 +00 +00 +0f +24 +26 +1e +18 +00 +00 +0f +25 +4c +e7 +10 +05 +0f +25 +4c +2e +20 +00 +00 +05 +0f +25 +4c +36 +30 +00 +00 +05 +0f +25 +4e +e7 +10 +05 +0f +25 +4e +2e +20 +00 +00 +05 +0f +25 +4e +36 +30 +00 +00 +05 +0f +25 +4f +e7 +10 +05 +0f +25 +4f +2e +20 +00 +00 +05 +0f +25 +4f +36 +30 +00 +00 +05 +0f +25 +6c +e7 +10 +05 +0f +25 +6c +2e +20 +00 +00 +05 +0f +25 +6c +36 +30 +00 +00 +05 +0f +25 +6e +e7 +10 +05 +0f +25 +6e +2e +20 +00 +00 +05 +0f +25 +6e +36 +30 +00 +00 +05 +0f +25 +6f +e7 +10 +05 +0f +25 +6f +2e +20 +00 +00 +05 +0f +25 +6f +36 +30 +00 +00 +05 +0f +25 +6d +e7 +10 +05 +0f +25 +6d +2e +20 +00 +00 +05 +0f +25 +6d +36 +30 +00 +00 +05 +0f +25 +4d +e7 +10 +05 +0f +25 +4d +2e +20 +00 +00 +05 +0f +25 +4d +36 +30 +00 +00 +05 +0f +24 +21 +d3 +10 +0f +24 +21 +16 +10 +00 +00 +0f +24 +21 +16 +10 +00 +00 +0f +24 +21 +1e +18 +00 +00 +0f +24 +21 +1e +18 +00 +00 +0f +24 +25 +d3 +10 +0f +24 +25 +16 +10 +00 +00 +0f +24 +25 +16 +10 +00 +00 +0f +24 +25 +1e +18 +00 +00 +0f +24 +25 +1e +18 +00 +00 +0f +24 +20 +d3 +10 +0f +24 +20 +16 +10 +00 +00 +0f +24 +20 +16 +10 +00 +00 +0f +24 +20 +1e +18 +00 +00 +0f +24 +20 +1e +18 +00 +00 +0f +24 +24 +d3 +10 +0f +24 +24 +16 +10 +00 +00 +0f +24 +24 +16 +10 +00 +00 +0f +24 +24 +1e +18 +00 +00 +0f +24 +24 +1e +18 +00 +00 +0f +7a +42 +ca +0f +7a +42 +0e +00 +00 +0f +7a +42 +0e +00 +00 +0f +7a +43 +ca +0f +7a +43 +0e +00 +00 +0f +7a +43 +0e +00 +00 +0f +7a +41 +ca +0f +7a +41 +0e +00 +00 +0f +7a +41 +0e +00 +00 +0f +7a +4b +ca +0f +7a +4b +0e +00 +00 +0f +7a +4b +0e +00 +00 +0f +7a +52 +ca +0f +7a +52 +0e +00 +00 +0f +7a +52 +0e +00 +00 +0f +7a +53 +ca +0f +7a +53 +0e +00 +00 +0f +7a +53 +0e +00 +00 +0f +7a +51 +ca +0f +7a +51 +0e +00 +00 +0f +7a +51 +0e +00 +00 +0f +7a +5b +ca +0f +7a +5b +0e +00 +00 +0f +7a +5b +0e +00 +00 +0f +7a +56 +ca +0f +7a +56 +0e +00 +00 +0f +7a +56 +0e +00 +00 +0f +7a +57 +ca +0f +7a +57 +0e +00 +00 +0f +7a +57 +0e +00 +00 +0f +7a +46 +ca +0f +7a +46 +0e +00 +00 +0f +7a +46 +0e +00 +00 +0f +7a +47 +ca +0f +7a +47 +0e +00 +00 +0f +7a +47 +0e +00 +00 +0f +7a +61 +ca +0f +7a +61 +0e +00 +00 +0f +7a +61 +0e +00 +00 +0f +7a +63 +ca +0f +7a +63 +0e +00 +00 +0f +7a +63 +0e +00 +00 +0f +7a +62 +ca +0f +7a +62 +0e +00 +00 +0f +7a +62 +0e +00 +00 +0f +24 +9e +e7 +10 +0f +24 +9e +2e +20 +00 +00 +0f +24 +9e +36 +30 +00 +00 +0f +24 +9f +e7 +10 +0f +24 +9f +2e +20 +00 +00 +0f +24 +9f +36 +30 +00 +00 +0f +24 +97 +e7 +10 +0f +24 +97 +2e +20 +00 +00 +0f +24 +97 +36 +30 +00 +00 +0f +24 +8e +e7 +10 +0f +24 +8e +2e +20 +00 +00 +0f +24 +8e +36 +30 +00 +00 +0f +24 +8f +e7 +10 +0f +24 +8f +2e +20 +00 +00 +0f +24 +8f +36 +30 +00 +00 +0f +24 +87 +e7 +10 +0f +24 +87 +2e +20 +00 +00 +0f +24 +87 +36 +30 +00 +00 +0f +24 +86 +e7 +10 +0f +24 +86 +2e +20 +00 +00 +0f +24 +86 +36 +30 +00 +00 +0f +24 +85 +e7 +10 +0f +24 +85 +2e +20 +00 +00 +0f +24 +85 +36 +30 +00 +00 +0f +24 +96 +e7 +10 +0f +24 +96 +2e +20 +00 +00 +0f +24 +96 +36 +30 +00 +00 +0f +24 +95 +e7 +10 +0f +24 +95 +2e +20 +00 +00 +0f +24 +95 +36 +30 +00 +00 +0f +24 +a6 +e7 +10 +0f +24 +a6 +2e +20 +00 +00 +0f +24 +a6 +36 +30 +00 +00 +0f +24 +b6 +e7 +10 +0f +24 +b6 +2e +20 +00 +00 +0f +24 +b6 +36 +30 +00 +00 +0f +24 +23 +d3 +10 +0f +24 +23 +16 +10 +00 +00 +0f +24 +23 +16 +10 +00 +00 +0f +24 +23 +1e +18 +00 +00 +0f +24 +23 +1e +18 +00 +00 +0f +24 +27 +d3 +10 +0f +24 +27 +16 +10 +00 +00 +0f +24 +27 +16 +10 +00 +00 +0f +24 +27 +1e +18 +00 +00 +0f +24 +27 +1e +18 +00 +00 +0f +24 +40 +d3 +10 +0f +24 +40 +16 +10 +00 +00 +0f +24 +40 +16 +10 +00 +00 +0f +24 +40 +1e +18 +00 +00 +0f +24 +40 +1e +18 +00 +00 +0f +7b +40 +ca +05 +0f +7b +40 +0e +00 +00 +05 +0f +7b +40 +0e +00 +00 +05 +0f +24 +42 +d3 +10 +0f +24 +42 +16 +10 +00 +00 +0f +24 +42 +16 +10 +00 +00 +0f +24 +42 +1e +18 +00 +00 +0f +24 +42 +1e +18 +00 +00 +0f +7b +42 +ca +05 +0f +7b +42 +0e +00 +00 +05 +0f +7b +42 +0e +00 +00 +05 +0f +24 +43 +d3 +10 +0f +24 +43 +16 +10 +00 +00 +0f +24 +43 +16 +10 +00 +00 +0f +24 +43 +1e +18 +00 +00 +0f +24 +43 +1e +18 +00 +00 +0f +7b +43 +ca +05 +0f +7b +43 +0e +00 +00 +05 +0f +7b +43 +0e +00 +00 +05 +0f +24 +41 +d3 +10 +0f +24 +41 +16 +10 +00 +00 +0f +24 +41 +16 +10 +00 +00 +0f +24 +41 +1e +18 +00 +00 +0f +24 +41 +1e +18 +00 +00 +0f +7b +41 +ca +05 +0f +7b +41 +0e +00 +00 +05 +0f +7b +41 +0e +00 +00 +05 +0f +24 +48 +d3 +10 +0f +24 +48 +16 +10 +00 +00 +0f +24 +48 +16 +10 +00 +00 +0f +24 +48 +1e +18 +00 +00 +0f +24 +48 +1e +18 +00 +00 +0f +24 +4a +d3 +10 +0f +24 +4a +16 +10 +00 +00 +0f +24 +4a +16 +10 +00 +00 +0f +24 +4a +1e +18 +00 +00 +0f +24 +4a +1e +18 +00 +00 +0f +24 +4b +d3 +10 +0f +24 +4b +16 +10 +00 +00 +0f +24 +4b +16 +10 +00 +00 +0f +24 +4b +1e +18 +00 +00 +0f +24 +4b +1e +18 +00 +00 +0f +24 +49 +d3 +10 +0f +24 +49 +16 +10 +00 +00 +0f +24 +49 +16 +10 +00 +00 +0f +24 +49 +1e +18 +00 +00 +0f +24 +49 +1e +18 +00 +00 +0f +24 +44 +d3 +10 +0f +24 +44 +16 +10 +00 +00 +0f +24 +44 +16 +10 +00 +00 +0f +24 +44 +1e +18 +00 +00 +0f +24 +44 +1e +18 +00 +00 +0f +24 +46 +d3 +10 +0f +24 +46 +16 +10 +00 +00 +0f +24 +46 +16 +10 +00 +00 +0f +24 +46 +1e +18 +00 +00 +0f +24 +46 +1e +18 +00 +00 +0f +24 +47 +d3 +10 +0f +24 +47 +16 +10 +00 +00 +0f +24 +47 +16 +10 +00 +00 +0f +24 +47 +1e +18 +00 +00 +0f +24 +47 +1e +18 +00 +00 +0f +24 +45 +d3 +10 +0f +24 +45 +16 +10 +00 +00 +0f +24 +45 +16 +10 +00 +00 +0f +24 +45 +1e +18 +00 +00 +0f +24 +45 +1e +18 +00 +00 +66 +0f +38 +17 +ca +66 +0f +38 +17 +0e +00 +00 +66 +0f +38 +17 +0e +00 +00 +66 +0f +3a +09 +ca +05 +66 +0f +3a +09 +0e +00 +00 +05 +66 +0f +3a +09 +0e +00 +00 +05 +66 +0f +3a +08 +ca +05 +66 +0f +3a +08 +0e +00 +00 +05 +66 +0f +3a +08 +0e +00 +00 +05 +66 +0f +3a +0b +ca +05 +66 +0f +3a +0b +0e +00 +00 +05 +66 +0f +3a +0b +0e +00 +00 +05 +66 +0f +3a +0a +ca +05 +66 +0f +3a +0a +0e +00 +00 +05 +66 +0f +3a +0a +0e +00 +00 +05 diff --git a/modules/arch/x86/tests/sse5-basic.asm b/modules/arch/x86/tests/sse5-basic.asm new file mode 100644 index 00000000..ed79e77e --- /dev/null +++ b/modules/arch/x86/tests/sse5-basic.asm @@ -0,0 +1,12 @@ +[bits 32] +compd xmm1, xmm4, xmm7, 5 ; 0F 25 2D 347 10 05 +compd xmm2, xmm5, [0], byte 5 ; 0F 25 2D 055 20 00 00 00 00 05 +compd xmm3, xmm6, dqword [ebx+ecx*4], byte 5 ; 0F 25 2D 064 213 30 05 + +[bits 64] +compd xmm8, xmm11, xmm3, 5 ; 0F 25 2D 333 84 05 +compd xmm12, xmm4, xmm14, 5 ; 0F 25 2D 346 C1 05 +compd xmm9, xmm12, [0], byte 5 ; 0F 25 2D 044 045 94 00 00 00 00 05 +compd xmm9, xmm12, [r8], byte 5 ; 0F 25 2D 040 95 05 +compd xmm10, xmm13, dqword [rbx+r9*4], 5 ; 0F 25 2D 054 213 A6 05 + diff --git a/modules/arch/x86/tests/sse5-basic.hex b/modules/arch/x86/tests/sse5-basic.hex new file mode 100644 index 00000000..2d6c87b6 --- /dev/null +++ b/modules/arch/x86/tests/sse5-basic.hex @@ -0,0 +1,59 @@ +0f +25 +2d +e7 +10 +05 +0f +25 +2d +2d +20 +00 +00 +00 +00 +05 +0f +25 +2d +34 +8b +30 +05 +0f +25 +2d +db +84 +05 +0f +25 +2d +e6 +c1 +05 +0f +25 +2d +24 +25 +94 +00 +00 +00 +00 +05 +0f +25 +2d +20 +95 +05 +0f +25 +2d +2c +8b +a6 +05 diff --git a/modules/arch/x86/tests/sse5-err.asm b/modules/arch/x86/tests/sse5-err.asm new file mode 100644 index 00000000..93b474fa --- /dev/null +++ b/modules/arch/x86/tests/sse5-err.asm @@ -0,0 +1,116 @@ +fmaddpd xmm1, xmm2, xmm1, xmm3 ; illegal +fmaddpd xmm1, xmm2, xmm3, xmm3 ; illegal +fmaddpd xmm1, xmm2, xmm2, xmm3 ; illegal + +fmaddps xmm1, xmm2, xmm1, xmm3 ; illegal +fmaddps xmm1, xmm2, xmm3, xmm3 ; illegal +fmaddps xmm1, xmm2, xmm2, xmm3 ; illegal + +fmaddsd xmm1, xmm2, xmm1, xmm3 ; illegal +fmaddsd xmm1, xmm2, xmm3, xmm3 ; illegal +fmaddsd xmm1, xmm2, xmm2, xmm3 ; illegal + +fmaddss xmm1, xmm2, xmm1, xmm3 ; illegal +fmaddss xmm1, xmm2, xmm3, xmm3 ; illegal +fmaddss xmm1, xmm2, xmm2, xmm3 ; illegal + +fmsubpd xmm1, xmm2, xmm1, xmm3 ; illegal +fmsubpd xmm1, xmm2, xmm3, xmm3 ; illegal +fmsubpd xmm1, xmm2, xmm2, xmm3 ; illegal + +fmsubps xmm1, xmm2, xmm1, xmm3 ; illegal +fmsubps xmm1, xmm2, xmm3, xmm3 ; illegal +fmsubps xmm1, xmm2, xmm2, xmm3 ; illegal + +fmsubsd xmm1, xmm2, xmm1, xmm3 ; illegal +fmsubsd xmm1, xmm2, xmm3, xmm3 ; illegal +fmsubsd xmm1, xmm2, xmm2, xmm3 ; illegal + +fmsubss xmm1, xmm2, xmm1, xmm3 ; illegal +fmsubss xmm1, xmm2, xmm3, xmm3 ; illegal +fmsubss xmm1, xmm2, xmm2, xmm3 ; illegal + +fnmaddpd xmm1, xmm2, xmm1, xmm3 ; illegal +fnmaddpd xmm1, xmm2, xmm3, xmm3 ; illegal +fnmaddpd xmm1, xmm2, xmm2, xmm3 ; illegal + +fnmaddps xmm1, xmm2, xmm1, xmm3 ; illegal +fnmaddps xmm1, xmm2, xmm3, xmm3 ; illegal +fnmaddps xmm1, xmm2, xmm2, xmm3 ; illegal + +fnmaddsd xmm1, xmm2, xmm1, xmm3 ; illegal +fnmaddsd xmm1, xmm2, xmm3, xmm3 ; illegal +fnmaddsd xmm1, xmm2, xmm2, xmm3 ; illegal + +fnmaddss xmm1, xmm2, xmm1, xmm3 ; illegal +fnmaddss xmm1, xmm2, xmm3, xmm3 ; illegal +fnmaddss xmm1, xmm2, xmm2, xmm3 ; illegal + +fnmsubpd xmm1, xmm2, xmm1, xmm3 ; illegal +fnmsubpd xmm1, xmm2, xmm3, xmm3 ; illegal +fnmsubpd xmm1, xmm2, xmm2, xmm3 ; illegal + +fnmsubps xmm1, xmm2, xmm1, xmm3 ; illegal +fnmsubps xmm1, xmm2, xmm3, xmm3 ; illegal +fnmsubps xmm1, xmm2, xmm2, xmm3 ; illegal + +fnmsubsd xmm1, xmm2, xmm1, xmm3 ; illegal +fnmsubsd xmm1, xmm2, xmm3, xmm3 ; illegal +fnmsubsd xmm1, xmm2, xmm2, xmm3 ; illegal + +fnmsubss xmm1, xmm2, xmm1, xmm3 ; illegal +fnmsubss xmm1, xmm2, xmm3, xmm3 ; illegal +fnmsubss xmm1, xmm2, xmm2, xmm3 ; illegal + +pcmov xmm1, xmm2, xmm1, xmm3 ; illegal +pcmov xmm1, xmm2, xmm3, xmm3 ; illegal +pcmov xmm1, xmm2, xmm2, xmm3 ; illegal + +permpd xmm1, xmm2, xmm1, xmm3 ; illegal +permpd xmm1, xmm2, xmm3, xmm3 ; illegal +permpd xmm1, xmm2, xmm2, xmm3 ; illegal + +permps xmm1, xmm2, xmm1, xmm3 ; illegal +permps xmm1, xmm2, xmm3, xmm3 ; illegal +permps xmm1, xmm2, xmm2, xmm3 ; illegal + +pmacsdd xmm1, xmm2, xmm1, xmm3 ; illegal +pmacsdd xmm1, xmm1, xmm2, xmm3 ; illegal - better message? + +pmacsdqh xmm1, xmm2, xmm1, xmm3 ; illegal +pmacsdqh xmm1, xmm1, xmm2, xmm3 ; illegal - better message? + +pmacsdql xmm1, xmm2, xmm1, xmm3 ; illegal +pmacsdql xmm1, xmm1, xmm2, xmm3 ; illegal - better message? + +pmacssdd xmm1, xmm2, xmm1, xmm3 ; illegal +pmacssdd xmm1, xmm1, xmm2, xmm3 ; illegal - better message? + +pmacssdqh xmm1, xmm2, xmm1, xmm3 ; illegal +pmacssdqh xmm1, xmm1, xmm2, xmm3 ; illegal - better message? + +pmacssdql xmm1, xmm2, xmm1, xmm3 ; illegal +pmacssdql xmm1, xmm1, xmm2, xmm3 ; illegal - better message? + +pmacsswd xmm1, xmm2, xmm1, xmm3 ; illegal +pmacsswd xmm1, xmm1, xmm2, xmm3 ; illegal - better message? + +pmacssww xmm1, xmm2, xmm1, xmm3 ; illegal +pmacssww xmm1, xmm1, xmm2, xmm3 ; illegal - better message? + +pmacswd xmm1, xmm2, xmm1, xmm3 ; illegal +pmacswd xmm1, xmm1, xmm2, xmm3 ; illegal - better message? + +pmacsww xmm1, xmm2, xmm1, xmm3 ; illegal +pmacsww xmm1, xmm1, xmm2, xmm3 ; illegal - better message? + +pmadcsswd xmm1, xmm2, xmm1, xmm3 ; illegal +pmadcsswd xmm1, xmm1, xmm2, xmm3 ; illegal - better message? + +pmadcswd xmm1, xmm2, xmm1, xmm3 ; illegal +pmadcswd xmm1, xmm1, xmm2, xmm3 ; illegal - better message? + +pperm xmm1, xmm2, xmm1, xmm3 ; illegal +pperm xmm1, xmm2, xmm3, xmm3 ; illegal +pperm xmm1, xmm2, xmm2, xmm3 ; illegal + diff --git a/modules/arch/x86/tests/sse5-err.errwarn b/modules/arch/x86/tests/sse5-err.errwarn new file mode 100644 index 00000000..19df6e09 --- /dev/null +++ b/modules/arch/x86/tests/sse5-err.errwarn @@ -0,0 +1,84 @@ +-:1: one of source operand 1 or 3 must match dest operand +-:2: one of source operand 1 or 3 must match dest operand +-:3: one of source operand 1 or 3 must match dest operand +-:5: one of source operand 1 or 3 must match dest operand +-:6: one of source operand 1 or 3 must match dest operand +-:7: one of source operand 1 or 3 must match dest operand +-:9: one of source operand 1 or 3 must match dest operand +-:10: one of source operand 1 or 3 must match dest operand +-:11: one of source operand 1 or 3 must match dest operand +-:13: one of source operand 1 or 3 must match dest operand +-:14: one of source operand 1 or 3 must match dest operand +-:15: one of source operand 1 or 3 must match dest operand +-:17: one of source operand 1 or 3 must match dest operand +-:18: one of source operand 1 or 3 must match dest operand +-:19: one of source operand 1 or 3 must match dest operand +-:21: one of source operand 1 or 3 must match dest operand +-:22: one of source operand 1 or 3 must match dest operand +-:23: one of source operand 1 or 3 must match dest operand +-:25: one of source operand 1 or 3 must match dest operand +-:26: one of source operand 1 or 3 must match dest operand +-:27: one of source operand 1 or 3 must match dest operand +-:29: one of source operand 1 or 3 must match dest operand +-:30: one of source operand 1 or 3 must match dest operand +-:31: one of source operand 1 or 3 must match dest operand +-:33: one of source operand 1 or 3 must match dest operand +-:34: one of source operand 1 or 3 must match dest operand +-:35: one of source operand 1 or 3 must match dest operand +-:37: one of source operand 1 or 3 must match dest operand +-:38: one of source operand 1 or 3 must match dest operand +-:39: one of source operand 1 or 3 must match dest operand +-:41: one of source operand 1 or 3 must match dest operand +-:42: one of source operand 1 or 3 must match dest operand +-:43: one of source operand 1 or 3 must match dest operand +-:45: one of source operand 1 or 3 must match dest operand +-:46: one of source operand 1 or 3 must match dest operand +-:47: one of source operand 1 or 3 must match dest operand +-:49: one of source operand 1 or 3 must match dest operand +-:50: one of source operand 1 or 3 must match dest operand +-:51: one of source operand 1 or 3 must match dest operand +-:53: one of source operand 1 or 3 must match dest operand +-:54: one of source operand 1 or 3 must match dest operand +-:55: one of source operand 1 or 3 must match dest operand +-:57: one of source operand 1 or 3 must match dest operand +-:58: one of source operand 1 or 3 must match dest operand +-:59: one of source operand 1 or 3 must match dest operand +-:61: one of source operand 1 or 3 must match dest operand +-:62: one of source operand 1 or 3 must match dest operand +-:63: one of source operand 1 or 3 must match dest operand +-:65: one of source operand 1 or 3 must match dest operand +-:66: one of source operand 1 or 3 must match dest operand +-:67: one of source operand 1 or 3 must match dest operand +-:69: one of source operand 1 or 3 must match dest operand +-:70: one of source operand 1 or 3 must match dest operand +-:71: one of source operand 1 or 3 must match dest operand +-:73: one of source operand 1 or 3 must match dest operand +-:74: one of source operand 1 or 3 must match dest operand +-:75: one of source operand 1 or 3 must match dest operand +-:77: one of source operand 1 or 3 must match dest operand +-:78: one of source operand 1 or 3 must match dest operand +-:80: one of source operand 1 or 3 must match dest operand +-:81: one of source operand 1 or 3 must match dest operand +-:83: one of source operand 1 or 3 must match dest operand +-:84: one of source operand 1 or 3 must match dest operand +-:86: one of source operand 1 or 3 must match dest operand +-:87: one of source operand 1 or 3 must match dest operand +-:89: one of source operand 1 or 3 must match dest operand +-:90: one of source operand 1 or 3 must match dest operand +-:92: one of source operand 1 or 3 must match dest operand +-:93: one of source operand 1 or 3 must match dest operand +-:95: one of source operand 1 or 3 must match dest operand +-:96: one of source operand 1 or 3 must match dest operand +-:98: one of source operand 1 or 3 must match dest operand +-:99: one of source operand 1 or 3 must match dest operand +-:101: one of source operand 1 or 3 must match dest operand +-:102: one of source operand 1 or 3 must match dest operand +-:104: one of source operand 1 or 3 must match dest operand +-:105: one of source operand 1 or 3 must match dest operand +-:107: one of source operand 1 or 3 must match dest operand +-:108: one of source operand 1 or 3 must match dest operand +-:110: one of source operand 1 or 3 must match dest operand +-:111: one of source operand 1 or 3 must match dest operand +-:113: one of source operand 1 or 3 must match dest operand +-:114: one of source operand 1 or 3 must match dest operand +-:115: one of source operand 1 or 3 must match dest operand diff --git a/modules/arch/x86/x86arch.h b/modules/arch/x86/x86arch.h index 5f5be1c0..705933c0 100644 --- a/modules/arch/x86/x86arch.h +++ b/modules/arch/x86/x86arch.h @@ -64,6 +64,7 @@ #define CPU_SSE41 30 /* Streaming SIMD extensions 4.1 required */ #define CPU_SSE42 31 /* Streaming SIMD extensions 4.2 required */ #define CPU_SSE4a 32 /* AMD Streaming SIMD extensions 4a required */ +#define CPU_SSE5 33 /* AMD Streaming SIMD extensions 5 required */ /* Technically not CPU capabilities, they do affect what instructions are * available. These are tested against BITS==64. @@ -142,9 +143,9 @@ typedef enum { * indicates bit of REX to use if REX is needed. Will not modify REX if not * in 64-bit mode or if it wasn't needed to express reg. */ -int yasm_x86__set_rex_from_reg(unsigned char *rex, unsigned char *low3, - uintptr_t reg, unsigned int bits, - x86_rex_bit_pos rexbit); +int yasm_x86__set_rex_from_reg(unsigned char *rex, unsigned char *drex, + unsigned char *low3, uintptr_t reg, + unsigned int bits, x86_rex_bit_pos rexbit); /* Effective address type */ typedef struct x86_effaddr { @@ -162,14 +163,19 @@ typedef struct x86_effaddr { unsigned char valid_sib; /* 1 if SIB byte currently valid, 0 if not */ unsigned char need_sib; /* 1 if SIB byte needed, 0 if not, 0xff if unknown */ + + unsigned char drex; /* DREX SSE5 extension byte */ + unsigned char need_drex; /* 1 if DREX byte needed, 0 if not */ } x86_effaddr; void yasm_x86__ea_init(x86_effaddr *x86_ea, unsigned int spare, + unsigned int drex, unsigned int need_drex, yasm_bytecode *precbc); void yasm_x86__ea_set_disponly(x86_effaddr *x86_ea); x86_effaddr *yasm_x86__ea_create_reg(x86_effaddr *x86_ea, unsigned long reg, - unsigned char *rex, unsigned int bits); + unsigned char *rex, unsigned char *drex, + unsigned int bits); x86_effaddr *yasm_x86__ea_create_imm (x86_effaddr *x86_ea, /*@keep@*/ yasm_expr *imm, unsigned int im_len); yasm_effaddr *yasm_x86__ea_create_expr(yasm_arch *arch, diff --git a/modules/arch/x86/x86bc.c b/modules/arch/x86/x86bc.c index a01031d9..4319318b 100644 --- a/modules/arch/x86/x86bc.c +++ b/modules/arch/x86/x86bc.c @@ -103,9 +103,9 @@ static const yasm_bytecode_callback x86_bc_callback_jmpfar = { }; int -yasm_x86__set_rex_from_reg(unsigned char *rex, unsigned char *low3, - uintptr_t reg, unsigned int bits, - x86_rex_bit_pos rexbit) +yasm_x86__set_rex_from_reg(unsigned char *rex, unsigned char *drex, + unsigned char *low3, uintptr_t reg, + unsigned int bits, x86_rex_bit_pos rexbit) { *low3 = (unsigned char)(reg&7); @@ -113,13 +113,17 @@ yasm_x86__set_rex_from_reg(unsigned char *rex, unsigned char *low3, x86_expritem_reg_size size = (x86_expritem_reg_size)(reg & ~0xFUL); if (size == X86_REG8X || (reg & 0xF) >= 8) { - /* Check to make sure we can set it */ - if (*rex == 0xff) { - yasm_error_set(YASM_ERROR_TYPE, - N_("cannot use A/B/C/DH with instruction needing REX")); - return 1; + if (drex) { + *drex |= ((reg & 8) >> 3) << rexbit; + } else { + /* Check to make sure we can set it */ + if (*rex == 0xff) { + yasm_error_set(YASM_ERROR_TYPE, + N_("cannot use A/B/C/DH with instruction needing REX")); + return 1; + } + *rex |= 0x40 | (((reg & 8) >> 3) << rexbit); } - *rex |= 0x40 | (((reg & 8) >> 3) << rexbit); } else if (size == X86_REG8 && (reg & 7) >= 4) { /* AH/BH/CH/DH, so no REX allowed */ if (*rex != 0 && *rex != 0xff) { @@ -153,14 +157,16 @@ yasm_x86__bc_transform_jmpfar(yasm_bytecode *bc, x86_jmpfar *jmpfar) } void -yasm_x86__ea_init(x86_effaddr *x86_ea, unsigned int spare, - yasm_bytecode *precbc) +yasm_x86__ea_init(x86_effaddr *x86_ea, unsigned int spare, unsigned int drex, + unsigned int need_drex, yasm_bytecode *precbc) { if (yasm_value_finalize(&x86_ea->ea.disp, precbc)) yasm_error_set(YASM_ERROR_TOO_COMPLEX, N_("effective address too complex")); x86_ea->modrm &= 0xC7; /* zero spare/reg bits */ x86_ea->modrm |= (spare << 3) & 0x38; /* plug in provided bits */ + x86_ea->drex = (unsigned char)drex; + x86_ea->need_drex = (unsigned char)need_drex; } void @@ -170,6 +176,7 @@ yasm_x86__ea_set_disponly(x86_effaddr *x86_ea) x86_ea->need_modrm = 0; x86_ea->valid_sib = 0; x86_ea->need_sib = 0; + x86_ea->need_drex = 0; } static x86_effaddr * @@ -189,17 +196,20 @@ ea_create(void) x86_ea->sib = 0; x86_ea->valid_sib = 0; x86_ea->need_sib = 0; + x86_ea->drex = 0; + x86_ea->need_drex = 0; return x86_ea; } x86_effaddr * yasm_x86__ea_create_reg(x86_effaddr *x86_ea, unsigned long reg, - unsigned char *rex, unsigned int bits) + unsigned char *rex, unsigned char *drex, + unsigned int bits) { unsigned char rm; - if (yasm_x86__set_rex_from_reg(rex, &rm, reg, bits, X86_REX_B)) + if (yasm_x86__set_rex_from_reg(rex, drex, &rm, reg, bits, X86_REX_B)) return NULL; if (!x86_ea) @@ -539,6 +549,7 @@ x86_bc_insn_calc_len(yasm_bytecode *bc, yasm_bc_add_span_func add_span, /* Compute length of ea and add to total */ bc->len += x86_ea->need_modrm + (x86_ea->need_sib ? 1:0); + bc->len += x86_ea->need_drex ? 1:0; bc->len += (x86_ea->ea.segreg != 0) ? 1 : 0; } @@ -807,6 +818,9 @@ x86_bc_insn_tobytes(yasm_bytecode *bc, unsigned char **bufp, void *d, YASM_WRITE_8(*bufp, x86_ea->sib); } + if (x86_ea->need_drex) + YASM_WRITE_8(*bufp, x86_ea->drex); + if (x86_ea->ea.need_disp) { unsigned int disp_len = x86_ea->ea.disp.size/8; diff --git a/modules/arch/x86/x86cpu.gperf b/modules/arch/x86/x86cpu.gperf index ed2b5232..3597b535 100644 --- a/modules/arch/x86/x86cpu.gperf +++ b/modules/arch/x86/x86cpu.gperf @@ -118,6 +118,7 @@ x86_cpu_ia64(wordptr cpu, unsigned int data) BitVector_Bit_On(cpu, CPU_086); } +#define PROC_bulldozer 11 #define PROC_k10 10 #define PROC_venice 9 #define PROC_hammer 8 @@ -133,6 +134,8 @@ x86_cpu_amd(wordptr cpu, unsigned int data) BitVector_Bit_On(cpu, CPU_Prot); BitVector_Bit_On(cpu, CPU_SMM); BitVector_Bit_On(cpu, CPU_3DNow); + if (data >= PROC_bulldozer) + BitVector_Bit_On(cpu, CPU_SSE5); if (data >= PROC_k10) BitVector_Bit_On(cpu, CPU_SSE4a); if (data >= PROC_venice) @@ -254,6 +257,7 @@ athlon64, x86_cpu_amd, PROC_hammer athlon-64, x86_cpu_amd, PROC_hammer venice, x86_cpu_amd, PROC_venice k10, x86_cpu_amd, PROC_k10 +bulldozer, x86_cpu_amd, PROC_bulldozer prescott, x86_cpu_intel, PROC_prescott conroe, x86_cpu_intel, PROC_conroe penryn, x86_cpu_intel, PROC_penryn @@ -318,6 +322,8 @@ sse4a, x86_cpu_set, CPU_SSE4a nosse4a, x86_cpu_clear, CPU_SSE4a sse4, x86_cpu_set_sse4, 0 nosse4, x86_cpu_clear_sse4, 0 +sse5, x86_cpu_set, CPU_SSE5 +nosse5, x86_cpu_clear, CPU_SSE5 %% void diff --git a/modules/arch/x86/x86expr.c b/modules/arch/x86/x86expr.c index 8d11b1ab..eb145e7a 100644 --- a/modules/arch/x86/x86expr.c +++ b/modules/arch/x86/x86expr.c @@ -549,6 +549,7 @@ yasm_x86__expr_checkea(x86_effaddr *x86_ea, unsigned char *addrsize, yasm_bytecode *bc) { int retval; + unsigned char *drex = x86_ea->need_drex ? &x86_ea->drex : NULL; if (*addrsize == 0) { /* we need to figure out the address size from what we know about: @@ -784,7 +785,7 @@ yasm_x86__expr_checkea(x86_effaddr *x86_ea, unsigned char *addrsize, * of register basereg is, as x86_set_rex_from_reg doesn't pay * much attention. */ - if (yasm_x86__set_rex_from_reg(rex, &low3, + if (yasm_x86__set_rex_from_reg(rex, drex, &low3, (unsigned int)(X86_REG64 | basereg), bits, X86_REX_B)) return 1; @@ -811,7 +812,7 @@ yasm_x86__expr_checkea(x86_effaddr *x86_ea, unsigned char *addrsize, if (basereg == REG3264_NONE) x86_ea->sib |= 5; else { - if (yasm_x86__set_rex_from_reg(rex, &low3, (unsigned int) + if (yasm_x86__set_rex_from_reg(rex, drex, &low3, (unsigned int) (X86_REG64 | basereg), bits, X86_REX_B)) return 1; @@ -823,7 +824,7 @@ yasm_x86__expr_checkea(x86_effaddr *x86_ea, unsigned char *addrsize, x86_ea->sib |= 040; /* Any scale field is valid, just leave at 0. */ else { - if (yasm_x86__set_rex_from_reg(rex, &low3, (unsigned int) + if (yasm_x86__set_rex_from_reg(rex, drex, &low3, (unsigned int) (X86_REG64 | indexreg), bits, X86_REX_X)) return 1; diff --git a/modules/arch/x86/x86id.c b/modules/arch/x86/x86id.c index fa01e8ca..739c8679 100644 --- a/modules/arch/x86/x86id.c +++ b/modules/arch/x86/x86id.c @@ -103,7 +103,9 @@ enum x86_operand_type { */ OPT_MemrAX = 25, /* EAX memory operand only (EA) [special case for SVM skinit opcode] */ - OPT_MemEAX = 26 + OPT_MemEAX = 26, + /* SIMDReg with value equal to operand 0 SIMDReg */ + OPT_SIMDRegMatch0 = 27 }; enum x86_operand_size { @@ -150,7 +152,8 @@ enum x86_operand_action { /* far jump (outputs a farjmp instead of normal insn) */ OPA_JmpFar = 10, /* ea operand only sets address size (no actual ea field) */ - OPA_AdSizeEA = 11 + OPA_AdSizeEA = 11, + OPA_DREX = 12 /* operand data goes into DREX "dest" field */ }; enum x86_operand_post_action { @@ -244,6 +247,14 @@ typedef struct x86_insn_info { */ unsigned char special_prefix; + /* The DREX base byte value (almost). The only bit kept from this + * value is the OC0 bit (0x08). The MSB (0x80) of this value indicates + * if the DREX byte needs to be present in the instruction. + */ +#define NEED_DREX_MASK 0x80 +#define DREX_OC0_MASK 0x08 + unsigned char drex_oc0; + /* The length of the basic opcode */ unsigned char opcode_len; @@ -565,7 +576,7 @@ x86_find_match(x86_id_insn *id_insn, yasm_insn_operand **ops, cpu1 = CPU_Any; if (cpu2 == CPU_64 || cpu2 == CPU_Not64) cpu2 = CPU_Any; - if (bypass != 7 && (!BitVector_bit_test(id_insn->cpu_enabled, cpu0) || + if (bypass != 8 && (!BitVector_bit_test(id_insn->cpu_enabled, cpu0) || !BitVector_bit_test(id_insn->cpu_enabled, cpu1) || !BitVector_bit_test(id_insn->cpu_enabled, cpu2))) continue; @@ -634,6 +645,7 @@ x86_find_match(x86_id_insn *id_insn, yasm_insn_operand **ops, if (op->type == YASM_INSN__OPERAND_MEMORY) break; /*@fallthrough@*/ + case OPT_SIMDRegMatch0: case OPT_SIMDReg: if (op->type != YASM_INSN__OPERAND_REG) mismatch = 1; @@ -647,6 +659,9 @@ x86_find_match(x86_id_insn *id_insn, yasm_insn_operand **ops, break; } } + if (!mismatch && info_ops[i].type == OPT_SIMDRegMatch0 && + bypass != 7 && op->data.reg != use_ops[0]->data.reg) + mismatch = 1; break; case OPT_SegReg: if (op->type != YASM_INSN__OPERAND_SEGREG) @@ -912,7 +927,7 @@ x86_match_error(x86_id_insn *id_insn, yasm_insn_operand **ops, return; } - for (bypass=1; bypass<8; bypass++) { + for (bypass=1; bypass<9; bypass++) { i = x86_find_match(id_insn, ops, rev_ops, size_lookup, bypass); if (i) break; @@ -935,6 +950,10 @@ x86_match_error(x86_id_insn *id_insn, yasm_insn_operand **ops, N_("invalid size for operand %d"), 3); break; case 7: + yasm_error_set(YASM_ERROR_TYPE, + N_("one of source operand 1 or 3 must match dest operand")); + break; + case 8: { unsigned int cpu0 = i->cpu0, cpu1 = i->cpu1, cpu2 = i->cpu2; yasm_error_set(YASM_ERROR_TYPE, @@ -961,6 +980,8 @@ x86_id_insn_finalize(yasm_bytecode *bc, yasm_bytecode *prev_bc) unsigned char im_len; unsigned char im_sign; unsigned char spare; + unsigned char drex; + unsigned char *pdrex; unsigned int i; unsigned int size_lookup[] = {0, 8, 16, 32, 64, 80, 128, 0}; unsigned long do_postop = 0; @@ -1052,10 +1073,12 @@ x86_id_insn_finalize(yasm_bytecode *bc, yasm_bytecode *prev_bc) insn->def_opersize_64 = info->def_opersize_64; insn->special_prefix = info->special_prefix; spare = info->spare; + drex = info->drex_oc0 & DREX_OC0_MASK; im_len = 0; im_sign = 0; insn->postop = X86_POSTOP_NONE; insn->rex = 0; + pdrex = (info->drex_oc0 & NEED_DREX_MASK) ? &drex : NULL; /* Apply modifiers */ for (i=0; imodifiers); i++) { @@ -1137,7 +1160,7 @@ x86_id_insn_finalize(yasm_bytecode *bc, yasm_bytecode *prev_bc) insn->x86_ea = yasm_x86__ea_create_reg(insn->x86_ea, (unsigned long)op->data.reg, &insn->rex, - mode_bits); + pdrex, mode_bits); break; case YASM_INSN__OPERAND_SEGREG: yasm_internal_error( @@ -1175,8 +1198,8 @@ x86_id_insn_finalize(yasm_bytecode *bc, yasm_bytecode *prev_bc) if (op->type == YASM_INSN__OPERAND_SEGREG) spare = (unsigned char)(op->data.reg&7); else if (op->type == YASM_INSN__OPERAND_REG) { - if (yasm_x86__set_rex_from_reg(&insn->rex, &spare, - op->data.reg, mode_bits, X86_REX_R)) + if (yasm_x86__set_rex_from_reg(&insn->rex, pdrex, + &spare, op->data.reg, mode_bits, X86_REX_R)) return; } else yasm_internal_error(N_("invalid operand conversion")); @@ -1184,8 +1207,8 @@ x86_id_insn_finalize(yasm_bytecode *bc, yasm_bytecode *prev_bc) case OPA_Op0Add: if (op->type == YASM_INSN__OPERAND_REG) { unsigned char opadd; - if (yasm_x86__set_rex_from_reg(&insn->rex, &opadd, - op->data.reg, mode_bits, X86_REX_B)) + if (yasm_x86__set_rex_from_reg(&insn->rex, pdrex, + &opadd, op->data.reg, mode_bits, X86_REX_B)) return; insn->opcode.opcode[0] += opadd; } else @@ -1194,8 +1217,8 @@ x86_id_insn_finalize(yasm_bytecode *bc, yasm_bytecode *prev_bc) case OPA_Op1Add: if (op->type == YASM_INSN__OPERAND_REG) { unsigned char opadd; - if (yasm_x86__set_rex_from_reg(&insn->rex, &opadd, - op->data.reg, mode_bits, X86_REX_B)) + if (yasm_x86__set_rex_from_reg(&insn->rex, pdrex, + &opadd, op->data.reg, mode_bits, X86_REX_B)) return; insn->opcode.opcode[1] += opadd; } else @@ -1205,11 +1228,11 @@ x86_id_insn_finalize(yasm_bytecode *bc, yasm_bytecode *prev_bc) if (op->type == YASM_INSN__OPERAND_REG) { insn->x86_ea = yasm_x86__ea_create_reg(insn->x86_ea, - (unsigned long)op->data.reg, - &insn->rex, mode_bits); + (unsigned long)op->data.reg, &insn->rex, + pdrex, mode_bits); if (!insn->x86_ea || - yasm_x86__set_rex_from_reg(&insn->rex, &spare, - op->data.reg, mode_bits, X86_REX_R)) { + yasm_x86__set_rex_from_reg(&insn->rex, pdrex, + &spare, op->data.reg, mode_bits, X86_REX_R)) { if (insn->x86_ea) yasm_xfree(insn->x86_ea); yasm_xfree(insn); @@ -1242,6 +1265,10 @@ x86_id_insn_finalize(yasm_bytecode *bc, yasm_bytecode *prev_bc) yasm_x86__ea_destroy(op->data.ea); break; } + case OPA_DREX: + drex &= 0x0F; + drex |= (op->data.reg << 4) & 0xF0; + break; default: yasm_internal_error(N_("unknown operand action")); } @@ -1283,7 +1310,9 @@ x86_id_insn_finalize(yasm_bytecode *bc, yasm_bytecode *prev_bc) } if (insn->x86_ea) { - yasm_x86__ea_init(insn->x86_ea, spare, prev_bc); + yasm_x86__ea_init(insn->x86_ea, spare, drex, + (unsigned int)(info->drex_oc0 & NEED_DREX_MASK), + prev_bc); for (i=0; iinsn.num_segregs; i++) yasm_ea_set_segreg(&insn->x86_ea->ea, id_insn->insn.segregs[i]); } else if (id_insn->insn.num_segregs > 0 && insn->special_prefix == 0) { @@ -1350,7 +1379,8 @@ x86_id_insn_finalize(yasm_bytecode *bc, yasm_bytecode *prev_bc) * opcode 0 being a mov instruction! */ insn->x86_ea = yasm_x86__ea_create_reg(insn->x86_ea, - (unsigned long)insn->opcode.opcode[0]-0xB8, &rex_temp, 64); + (unsigned long)insn->opcode.opcode[0]-0xB8, &rex_temp, + NULL, 64); /* Make the imm32s form permanent. */ insn->opcode.opcode[0] = insn->opcode.opcode[1];