mirror of
https://github.com/ioacademy-jikim/debugging
synced 2025-06-08 16:36:21 +00:00
5035 lines
186 KiB
C
5035 lines
186 KiB
C
|
|
/*---------------------------------------------------------------*/
|
|
/*--- begin host_amd64_isel.c ---*/
|
|
/*---------------------------------------------------------------*/
|
|
|
|
/*
|
|
This file is part of Valgrind, a dynamic binary instrumentation
|
|
framework.
|
|
|
|
Copyright (C) 2004-2015 OpenWorks LLP
|
|
info@open-works.net
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
02110-1301, USA.
|
|
|
|
The GNU General Public License is contained in the file COPYING.
|
|
|
|
Neither the names of the U.S. Department of Energy nor the
|
|
University of California nor the names of its contributors may be
|
|
used to endorse or promote products derived from this software
|
|
without prior written permission.
|
|
*/
|
|
|
|
#include "libvex_basictypes.h"
|
|
#include "libvex_ir.h"
|
|
#include "libvex.h"
|
|
|
|
#include "ir_match.h"
|
|
#include "main_util.h"
|
|
#include "main_globals.h"
|
|
#include "host_generic_regs.h"
|
|
#include "host_generic_simd64.h"
|
|
#include "host_generic_simd128.h"
|
|
#include "host_generic_simd256.h"
|
|
#include "host_generic_maddf.h"
|
|
#include "host_amd64_defs.h"
|
|
|
|
|
|
/*---------------------------------------------------------*/
|
|
/*--- x87/SSE control word stuff ---*/
|
|
/*---------------------------------------------------------*/
|
|
|
|
/* Vex-generated code expects to run with the FPU set as follows: all
|
|
exceptions masked, round-to-nearest, precision = 53 bits. This
|
|
corresponds to a FPU control word value of 0x027F.
|
|
|
|
Similarly the SSE control word (%mxcsr) should be 0x1F80.
|
|
|
|
%fpucw and %mxcsr should have these values on entry to
|
|
Vex-generated code, and should those values should be
|
|
unchanged at exit.
|
|
*/
|
|
|
|
#define DEFAULT_FPUCW 0x027F
|
|
|
|
#define DEFAULT_MXCSR 0x1F80
|
|
|
|
/* debugging only, do not use */
|
|
/* define DEFAULT_FPUCW 0x037F */
|
|
|
|
|
|
/*---------------------------------------------------------*/
|
|
/*--- misc helpers ---*/
|
|
/*---------------------------------------------------------*/
|
|
|
|
/* These are duplicated in guest-amd64/toIR.c */
|
|
static IRExpr* unop ( IROp op, IRExpr* a )
|
|
{
|
|
return IRExpr_Unop(op, a);
|
|
}
|
|
|
|
static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
|
|
{
|
|
return IRExpr_Binop(op, a1, a2);
|
|
}
|
|
|
|
static IRExpr* bind ( Int binder )
|
|
{
|
|
return IRExpr_Binder(binder);
|
|
}
|
|
|
|
static Bool isZeroU8 ( IRExpr* e )
|
|
{
|
|
return e->tag == Iex_Const
|
|
&& e->Iex.Const.con->tag == Ico_U8
|
|
&& e->Iex.Const.con->Ico.U8 == 0;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------------*/
|
|
/*--- ISelEnv ---*/
|
|
/*---------------------------------------------------------*/
|
|
|
|
/* This carries around:
|
|
|
|
- A mapping from IRTemp to IRType, giving the type of any IRTemp we
|
|
might encounter. This is computed before insn selection starts,
|
|
and does not change.
|
|
|
|
- A mapping from IRTemp to HReg. This tells the insn selector
|
|
which virtual register is associated with each IRTemp
|
|
temporary. This is computed before insn selection starts, and
|
|
does not change. We expect this mapping to map precisely the
|
|
same set of IRTemps as the type mapping does.
|
|
|
|
- vregmap holds the primary register for the IRTemp.
|
|
- vregmapHI is only used for 128-bit integer-typed
|
|
IRTemps. It holds the identity of a second
|
|
64-bit virtual HReg, which holds the high half
|
|
of the value.
|
|
|
|
- The host subarchitecture we are selecting insns for.
|
|
This is set at the start and does not change.
|
|
|
|
- The code array, that is, the insns selected so far.
|
|
|
|
- A counter, for generating new virtual registers.
|
|
|
|
- A Bool for indicating whether we may generate chain-me
|
|
instructions for control flow transfers, or whether we must use
|
|
XAssisted.
|
|
|
|
- The maximum guest address of any guest insn in this block.
|
|
Actually, the address of the highest-addressed byte from any insn
|
|
in this block. Is set at the start and does not change. This is
|
|
used for detecting jumps which are definitely forward-edges from
|
|
this block, and therefore can be made (chained) to the fast entry
|
|
point of the destination, thereby avoiding the destination's
|
|
event check.
|
|
|
|
Note, this is all host-independent. (JRS 20050201: well, kinda
|
|
... not completely. Compare with ISelEnv for X86.)
|
|
*/
|
|
|
|
typedef
|
|
struct {
|
|
/* Constant -- are set at the start and do not change. */
|
|
IRTypeEnv* type_env;
|
|
|
|
HReg* vregmap;
|
|
HReg* vregmapHI;
|
|
Int n_vregmap;
|
|
|
|
UInt hwcaps;
|
|
|
|
Bool chainingAllowed;
|
|
Addr64 max_ga;
|
|
|
|
/* These are modified as we go along. */
|
|
HInstrArray* code;
|
|
Int vreg_ctr;
|
|
}
|
|
ISelEnv;
|
|
|
|
|
|
static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
|
|
{
|
|
vassert(tmp >= 0);
|
|
vassert(tmp < env->n_vregmap);
|
|
return env->vregmap[tmp];
|
|
}
|
|
|
|
static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
|
|
ISelEnv* env, IRTemp tmp )
|
|
{
|
|
vassert(tmp >= 0);
|
|
vassert(tmp < env->n_vregmap);
|
|
vassert(! hregIsInvalid(env->vregmapHI[tmp]));
|
|
*vrLO = env->vregmap[tmp];
|
|
*vrHI = env->vregmapHI[tmp];
|
|
}
|
|
|
|
static void addInstr ( ISelEnv* env, AMD64Instr* instr )
|
|
{
|
|
addHInstr(env->code, instr);
|
|
if (vex_traceflags & VEX_TRACE_VCODE) {
|
|
ppAMD64Instr(instr, True);
|
|
vex_printf("\n");
|
|
}
|
|
}
|
|
|
|
static HReg newVRegI ( ISelEnv* env )
|
|
{
|
|
HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0/*enc*/, env->vreg_ctr);
|
|
env->vreg_ctr++;
|
|
return reg;
|
|
}
|
|
|
|
static HReg newVRegV ( ISelEnv* env )
|
|
{
|
|
HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
|
|
env->vreg_ctr++;
|
|
return reg;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------------*/
|
|
/*--- ISEL: Forward declarations ---*/
|
|
/*---------------------------------------------------------*/
|
|
|
|
/* These are organised as iselXXX and iselXXX_wrk pairs. The
|
|
iselXXX_wrk do the real work, but are not to be called directly.
|
|
For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
|
|
checks that all returned registers are virtual. You should not
|
|
call the _wrk version directly.
|
|
*/
|
|
static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e );
|
|
static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e );
|
|
|
|
static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e );
|
|
static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e );
|
|
|
|
static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e );
|
|
static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e );
|
|
|
|
static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e );
|
|
static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e );
|
|
|
|
static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e );
|
|
static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e );
|
|
|
|
static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
|
|
ISelEnv* env, IRExpr* e );
|
|
static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo,
|
|
ISelEnv* env, IRExpr* e );
|
|
|
|
static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e );
|
|
static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e );
|
|
|
|
static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e );
|
|
static HReg iselDblExpr ( ISelEnv* env, IRExpr* e );
|
|
|
|
static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e );
|
|
static HReg iselFltExpr ( ISelEnv* env, IRExpr* e );
|
|
|
|
static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e );
|
|
static HReg iselVecExpr ( ISelEnv* env, IRExpr* e );
|
|
|
|
static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, HReg* rLo,
|
|
ISelEnv* env, IRExpr* e );
|
|
static void iselDVecExpr ( /*OUT*/HReg* rHi, HReg* rLo,
|
|
ISelEnv* env, IRExpr* e );
|
|
|
|
|
|
/*---------------------------------------------------------*/
|
|
/*--- ISEL: Misc helpers ---*/
|
|
/*---------------------------------------------------------*/
|
|
|
|
static Bool sane_AMode ( AMD64AMode* am )
|
|
{
|
|
switch (am->tag) {
|
|
case Aam_IR:
|
|
return
|
|
toBool( hregClass(am->Aam.IR.reg) == HRcInt64
|
|
&& (hregIsVirtual(am->Aam.IR.reg)
|
|
|| sameHReg(am->Aam.IR.reg, hregAMD64_RBP())) );
|
|
case Aam_IRRS:
|
|
return
|
|
toBool( hregClass(am->Aam.IRRS.base) == HRcInt64
|
|
&& hregIsVirtual(am->Aam.IRRS.base)
|
|
&& hregClass(am->Aam.IRRS.index) == HRcInt64
|
|
&& hregIsVirtual(am->Aam.IRRS.index) );
|
|
default:
|
|
vpanic("sane_AMode: unknown amd64 amode tag");
|
|
}
|
|
}
|
|
|
|
|
|
/* Can the lower 32 bits be signedly widened to produce the whole
|
|
64-bit value? In other words, are the top 33 bits either all 0 or
|
|
all 1 ? */
|
|
static Bool fitsIn32Bits ( ULong x )
|
|
{
|
|
Long y1;
|
|
y1 = x << 32;
|
|
y1 >>=/*s*/ 32;
|
|
return toBool(x == y1);
|
|
}
|
|
|
|
/* Is this a 64-bit zero expression? */
|
|
|
|
static Bool isZeroU64 ( IRExpr* e )
|
|
{
|
|
return e->tag == Iex_Const
|
|
&& e->Iex.Const.con->tag == Ico_U64
|
|
&& e->Iex.Const.con->Ico.U64 == 0ULL;
|
|
}
|
|
|
|
static Bool isZeroU32 ( IRExpr* e )
|
|
{
|
|
return e->tag == Iex_Const
|
|
&& e->Iex.Const.con->tag == Ico_U32
|
|
&& e->Iex.Const.con->Ico.U32 == 0;
|
|
}
|
|
|
|
/* Make a int reg-reg move. */
|
|
|
|
static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
|
|
{
|
|
vassert(hregClass(src) == HRcInt64);
|
|
vassert(hregClass(dst) == HRcInt64);
|
|
return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
|
|
}
|
|
|
|
/* Make a vector (128 bit) reg-reg move. */
|
|
|
|
static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
|
|
{
|
|
vassert(hregClass(src) == HRcVec128);
|
|
vassert(hregClass(dst) == HRcVec128);
|
|
return AMD64Instr_SseReRg(Asse_MOV, src, dst);
|
|
}
|
|
|
|
/* Advance/retreat %rsp by n. */
|
|
|
|
static void add_to_rsp ( ISelEnv* env, Int n )
|
|
{
|
|
vassert(n > 0 && n < 256 && (n%8) == 0);
|
|
addInstr(env,
|
|
AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(n),
|
|
hregAMD64_RSP()));
|
|
}
|
|
|
|
static void sub_from_rsp ( ISelEnv* env, Int n )
|
|
{
|
|
vassert(n > 0 && n < 256 && (n%8) == 0);
|
|
addInstr(env,
|
|
AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(n),
|
|
hregAMD64_RSP()));
|
|
}
|
|
|
|
/* Push 64-bit constants on the stack. */
|
|
static void push_uimm64( ISelEnv* env, ULong uimm64 )
|
|
{
|
|
/* If uimm64 can be expressed as the sign extension of its
|
|
lower 32 bits, we can do it the easy way. */
|
|
Long simm64 = (Long)uimm64;
|
|
if ( simm64 == ((Long)(uimm64 << 32) >> 32) ) {
|
|
addInstr( env, AMD64Instr_Push(AMD64RMI_Imm( (UInt)uimm64 )) );
|
|
} else {
|
|
HReg tmp = newVRegI(env);
|
|
addInstr( env, AMD64Instr_Imm64(uimm64, tmp) );
|
|
addInstr( env, AMD64Instr_Push(AMD64RMI_Reg(tmp)) );
|
|
}
|
|
}
|
|
|
|
|
|
/* Used only in doHelperCall. If possible, produce a single
|
|
instruction which computes 'e' into 'dst'. If not possible, return
|
|
NULL. */
|
|
|
|
static AMD64Instr* iselIntExpr_single_instruction ( ISelEnv* env,
|
|
HReg dst,
|
|
IRExpr* e )
|
|
{
|
|
/* Per comments in doHelperCall below, appearance of
|
|
Iex_VECRET implies ill-formed IR. */
|
|
vassert(e->tag != Iex_VECRET);
|
|
|
|
/* In this case we give out a copy of the BaseBlock pointer. */
|
|
if (UNLIKELY(e->tag == Iex_BBPTR)) {
|
|
return mk_iMOVsd_RR( hregAMD64_RBP(), dst );
|
|
}
|
|
|
|
vassert(typeOfIRExpr(env->type_env, e) == Ity_I64);
|
|
|
|
if (e->tag == Iex_Const) {
|
|
vassert(e->Iex.Const.con->tag == Ico_U64);
|
|
if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
|
|
return AMD64Instr_Alu64R(
|
|
Aalu_MOV,
|
|
AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)),
|
|
dst
|
|
);
|
|
} else {
|
|
return AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, dst);
|
|
}
|
|
}
|
|
|
|
if (e->tag == Iex_RdTmp) {
|
|
HReg src = lookupIRTemp(env, e->Iex.RdTmp.tmp);
|
|
return mk_iMOVsd_RR(src, dst);
|
|
}
|
|
|
|
if (e->tag == Iex_Get) {
|
|
vassert(e->Iex.Get.ty == Ity_I64);
|
|
return AMD64Instr_Alu64R(
|
|
Aalu_MOV,
|
|
AMD64RMI_Mem(
|
|
AMD64AMode_IR(e->Iex.Get.offset,
|
|
hregAMD64_RBP())),
|
|
dst);
|
|
}
|
|
|
|
if (e->tag == Iex_Unop
|
|
&& e->Iex.Unop.op == Iop_32Uto64
|
|
&& e->Iex.Unop.arg->tag == Iex_RdTmp) {
|
|
HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
|
|
return AMD64Instr_MovxLQ(False, src, dst);
|
|
}
|
|
|
|
if (0) { ppIRExpr(e); vex_printf("\n"); }
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/* Do a complete function call. |guard| is a Ity_Bit expression
|
|
indicating whether or not the call happens. If guard==NULL, the
|
|
call is unconditional. |retloc| is set to indicate where the
|
|
return value is after the call. The caller (of this fn) must
|
|
generate code to add |stackAdjustAfterCall| to the stack pointer
|
|
after the call is done. */
|
|
|
|
static
|
|
void doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall,
|
|
/*OUT*/RetLoc* retloc,
|
|
ISelEnv* env,
|
|
IRExpr* guard,
|
|
IRCallee* cee, IRType retTy, IRExpr** args )
|
|
{
|
|
AMD64CondCode cc;
|
|
HReg argregs[6];
|
|
HReg tmpregs[6];
|
|
AMD64Instr* fastinstrs[6];
|
|
UInt n_args, i;
|
|
|
|
/* Set default returns. We'll update them later if needed. */
|
|
*stackAdjustAfterCall = 0;
|
|
*retloc = mk_RetLoc_INVALID();
|
|
|
|
/* These are used for cross-checking that IR-level constraints on
|
|
the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
|
|
UInt nVECRETs = 0;
|
|
UInt nBBPTRs = 0;
|
|
|
|
/* Marshal args for a call and do the call.
|
|
|
|
This function only deals with a tiny set of possibilities, which
|
|
cover all helpers in practice. The restrictions are that only
|
|
arguments in registers are supported, hence only 6x64 integer
|
|
bits in total can be passed. In fact the only supported arg
|
|
type is I64.
|
|
|
|
The return type can be I{64,32,16,8} or V{128,256}. In the
|
|
latter two cases, it is expected that |args| will contain the
|
|
special node IRExpr_VECRET(), in which case this routine
|
|
generates code to allocate space on the stack for the vector
|
|
return value. Since we are not passing any scalars on the
|
|
stack, it is enough to preallocate the return space before
|
|
marshalling any arguments, in this case.
|
|
|
|
|args| may also contain IRExpr_BBPTR(), in which case the
|
|
value in %rbp is passed as the corresponding argument.
|
|
|
|
Generating code which is both efficient and correct when
|
|
parameters are to be passed in registers is difficult, for the
|
|
reasons elaborated in detail in comments attached to
|
|
doHelperCall() in priv/host-x86/isel.c. Here, we use a variant
|
|
of the method described in those comments.
|
|
|
|
The problem is split into two cases: the fast scheme and the
|
|
slow scheme. In the fast scheme, arguments are computed
|
|
directly into the target (real) registers. This is only safe
|
|
when we can be sure that computation of each argument will not
|
|
trash any real registers set by computation of any other
|
|
argument.
|
|
|
|
In the slow scheme, all args are first computed into vregs, and
|
|
once they are all done, they are moved to the relevant real
|
|
regs. This always gives correct code, but it also gives a bunch
|
|
of vreg-to-rreg moves which are usually redundant but are hard
|
|
for the register allocator to get rid of.
|
|
|
|
To decide which scheme to use, all argument expressions are
|
|
first examined. If they are all so simple that it is clear they
|
|
will be evaluated without use of any fixed registers, use the
|
|
fast scheme, else use the slow scheme. Note also that only
|
|
unconditional calls may use the fast scheme, since having to
|
|
compute a condition expression could itself trash real
|
|
registers. Note that for simplicity, in the case where
|
|
IRExpr_VECRET() is present, we use the slow scheme. This is
|
|
motivated by the desire to avoid any possible complexity
|
|
w.r.t. nested calls.
|
|
|
|
Note this requires being able to examine an expression and
|
|
determine whether or not evaluation of it might use a fixed
|
|
register. That requires knowledge of how the rest of this insn
|
|
selector works. Currently just the following 3 are regarded as
|
|
safe -- hopefully they cover the majority of arguments in
|
|
practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
|
|
*/
|
|
|
|
/* Note that the cee->regparms field is meaningless on AMD64 host
|
|
(since there is only one calling convention) and so we always
|
|
ignore it. */
|
|
n_args = 0;
|
|
for (i = 0; args[i]; i++)
|
|
n_args++;
|
|
|
|
if (n_args > 6)
|
|
vpanic("doHelperCall(AMD64): cannot currently handle > 6 args");
|
|
|
|
argregs[0] = hregAMD64_RDI();
|
|
argregs[1] = hregAMD64_RSI();
|
|
argregs[2] = hregAMD64_RDX();
|
|
argregs[3] = hregAMD64_RCX();
|
|
argregs[4] = hregAMD64_R8();
|
|
argregs[5] = hregAMD64_R9();
|
|
|
|
tmpregs[0] = tmpregs[1] = tmpregs[2] =
|
|
tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG;
|
|
|
|
fastinstrs[0] = fastinstrs[1] = fastinstrs[2] =
|
|
fastinstrs[3] = fastinstrs[4] = fastinstrs[5] = NULL;
|
|
|
|
/* First decide which scheme (slow or fast) is to be used. First
|
|
assume the fast scheme, and select slow if any contraindications
|
|
(wow) appear. */
|
|
|
|
/* We'll need space on the stack for the return value. Avoid
|
|
possible complications with nested calls by using the slow
|
|
scheme. */
|
|
if (retTy == Ity_V128 || retTy == Ity_V256)
|
|
goto slowscheme;
|
|
|
|
if (guard) {
|
|
if (guard->tag == Iex_Const
|
|
&& guard->Iex.Const.con->tag == Ico_U1
|
|
&& guard->Iex.Const.con->Ico.U1 == True) {
|
|
/* unconditional */
|
|
} else {
|
|
/* Not manifestly unconditional -- be conservative. */
|
|
goto slowscheme;
|
|
}
|
|
}
|
|
|
|
/* Ok, let's try for the fast scheme. If it doesn't pan out, we'll
|
|
use the slow scheme. Because this is tentative, we can't call
|
|
addInstr (that is, commit to) any instructions until we're
|
|
handled all the arguments. So park the resulting instructions
|
|
in a buffer and emit that if we're successful. */
|
|
|
|
/* FAST SCHEME */
|
|
/* In this loop, we process args that can be computed into the
|
|
destination (real) register with a single instruction, without
|
|
using any fixed regs. That also includes IRExpr_BBPTR(), but
|
|
not IRExpr_VECRET(). Indeed, if the IR is well-formed, we can
|
|
never see IRExpr_VECRET() at this point, since the return-type
|
|
check above should ensure all those cases use the slow scheme
|
|
instead. */
|
|
vassert(n_args >= 0 && n_args <= 6);
|
|
for (i = 0; i < n_args; i++) {
|
|
IRExpr* arg = args[i];
|
|
if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg))) {
|
|
vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
|
|
}
|
|
fastinstrs[i]
|
|
= iselIntExpr_single_instruction( env, argregs[i], args[i] );
|
|
if (fastinstrs[i] == NULL)
|
|
goto slowscheme;
|
|
}
|
|
|
|
/* Looks like we're in luck. Emit the accumulated instructions and
|
|
move on to doing the call itself. */
|
|
for (i = 0; i < n_args; i++)
|
|
addInstr(env, fastinstrs[i]);
|
|
|
|
/* Fast scheme only applies for unconditional calls. Hence: */
|
|
cc = Acc_ALWAYS;
|
|
|
|
goto handle_call;
|
|
|
|
|
|
/* SLOW SCHEME; move via temporaries */
|
|
slowscheme:
|
|
{}
|
|
# if 0 /* debug only */
|
|
if (n_args > 0) {for (i = 0; args[i]; i++) {
|
|
ppIRExpr(args[i]); vex_printf(" "); }
|
|
vex_printf("\n");}
|
|
# endif
|
|
|
|
/* If we have a vector return type, allocate a place for it on the
|
|
stack and record its address. */
|
|
HReg r_vecRetAddr = INVALID_HREG;
|
|
if (retTy == Ity_V128) {
|
|
r_vecRetAddr = newVRegI(env);
|
|
sub_from_rsp(env, 16);
|
|
addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr ));
|
|
}
|
|
else if (retTy == Ity_V256) {
|
|
r_vecRetAddr = newVRegI(env);
|
|
sub_from_rsp(env, 32);
|
|
addInstr(env, mk_iMOVsd_RR( hregAMD64_RSP(), r_vecRetAddr ));
|
|
}
|
|
|
|
vassert(n_args >= 0 && n_args <= 6);
|
|
for (i = 0; i < n_args; i++) {
|
|
IRExpr* arg = args[i];
|
|
if (UNLIKELY(arg->tag == Iex_BBPTR)) {
|
|
tmpregs[i] = newVRegI(env);
|
|
addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[i]));
|
|
nBBPTRs++;
|
|
}
|
|
else if (UNLIKELY(arg->tag == Iex_VECRET)) {
|
|
/* We stashed the address of the return slot earlier, so just
|
|
retrieve it now. */
|
|
vassert(!hregIsInvalid(r_vecRetAddr));
|
|
tmpregs[i] = r_vecRetAddr;
|
|
nVECRETs++;
|
|
}
|
|
else {
|
|
vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
|
|
tmpregs[i] = iselIntExpr_R(env, args[i]);
|
|
}
|
|
}
|
|
|
|
/* Now we can compute the condition. We can't do it earlier
|
|
because the argument computations could trash the condition
|
|
codes. Be a bit clever to handle the common case where the
|
|
guard is 1:Bit. */
|
|
cc = Acc_ALWAYS;
|
|
if (guard) {
|
|
if (guard->tag == Iex_Const
|
|
&& guard->Iex.Const.con->tag == Ico_U1
|
|
&& guard->Iex.Const.con->Ico.U1 == True) {
|
|
/* unconditional -- do nothing */
|
|
} else {
|
|
cc = iselCondCode( env, guard );
|
|
}
|
|
}
|
|
|
|
/* Move the args to their final destinations. */
|
|
for (i = 0; i < n_args; i++) {
|
|
/* None of these insns, including any spill code that might
|
|
be generated, may alter the condition codes. */
|
|
addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) );
|
|
}
|
|
|
|
|
|
/* Do final checks, set the return values, and generate the call
|
|
instruction proper. */
|
|
handle_call:
|
|
|
|
if (retTy == Ity_V128 || retTy == Ity_V256) {
|
|
vassert(nVECRETs == 1);
|
|
} else {
|
|
vassert(nVECRETs == 0);
|
|
}
|
|
|
|
vassert(nBBPTRs == 0 || nBBPTRs == 1);
|
|
|
|
vassert(*stackAdjustAfterCall == 0);
|
|
vassert(is_RetLoc_INVALID(*retloc));
|
|
switch (retTy) {
|
|
case Ity_INVALID:
|
|
/* Function doesn't return a value. */
|
|
*retloc = mk_RetLoc_simple(RLPri_None);
|
|
break;
|
|
case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
|
|
*retloc = mk_RetLoc_simple(RLPri_Int);
|
|
break;
|
|
case Ity_V128:
|
|
*retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
|
|
*stackAdjustAfterCall = 16;
|
|
break;
|
|
case Ity_V256:
|
|
*retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
|
|
*stackAdjustAfterCall = 32;
|
|
break;
|
|
default:
|
|
/* IR can denote other possible return types, but we don't
|
|
handle those here. */
|
|
vassert(0);
|
|
}
|
|
|
|
/* Finally, generate the call itself. This needs the *retloc value
|
|
set in the switch above, which is why it's at the end. */
|
|
addInstr(env,
|
|
AMD64Instr_Call(cc, (Addr)cee->addr, n_args, *retloc));
|
|
}
|
|
|
|
|
|
/* Given a guest-state array descriptor, an index expression and a
|
|
bias, generate an AMD64AMode holding the relevant guest state
|
|
offset. */
|
|
|
|
static
|
|
AMD64AMode* genGuestArrayOffset ( ISelEnv* env, IRRegArray* descr,
|
|
IRExpr* off, Int bias )
|
|
{
|
|
HReg tmp, roff;
|
|
Int elemSz = sizeofIRType(descr->elemTy);
|
|
Int nElems = descr->nElems;
|
|
|
|
/* Throw out any cases not generated by an amd64 front end. In
|
|
theory there might be a day where we need to handle them -- if
|
|
we ever run non-amd64-guest on amd64 host. */
|
|
|
|
if (nElems != 8 || (elemSz != 1 && elemSz != 8))
|
|
vpanic("genGuestArrayOffset(amd64 host)");
|
|
|
|
/* Compute off into a reg, %off. Then return:
|
|
|
|
movq %off, %tmp
|
|
addq $bias, %tmp (if bias != 0)
|
|
andq %tmp, 7
|
|
... base(%rbp, %tmp, shift) ...
|
|
*/
|
|
tmp = newVRegI(env);
|
|
roff = iselIntExpr_R(env, off);
|
|
addInstr(env, mk_iMOVsd_RR(roff, tmp));
|
|
if (bias != 0) {
|
|
/* Make sure the bias is sane, in the sense that there are
|
|
no significant bits above bit 30 in it. */
|
|
vassert(-10000 < bias && bias < 10000);
|
|
addInstr(env,
|
|
AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(bias), tmp));
|
|
}
|
|
addInstr(env,
|
|
AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(7), tmp));
|
|
vassert(elemSz == 1 || elemSz == 8);
|
|
return
|
|
AMD64AMode_IRRS( descr->base, hregAMD64_RBP(), tmp,
|
|
elemSz==8 ? 3 : 0);
|
|
}
|
|
|
|
|
|
/* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */
|
|
static
|
|
void set_SSE_rounding_default ( ISelEnv* env )
|
|
{
|
|
/* pushq $DEFAULT_MXCSR
|
|
ldmxcsr 0(%rsp)
|
|
addq $8, %rsp
|
|
*/
|
|
AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
|
|
addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR)));
|
|
addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
|
|
add_to_rsp(env, 8);
|
|
}
|
|
|
|
/* Mess with the FPU's rounding mode: set to the default rounding mode
|
|
(DEFAULT_FPUCW). */
|
|
static
|
|
void set_FPU_rounding_default ( ISelEnv* env )
|
|
{
|
|
/* movq $DEFAULT_FPUCW, -8(%rsp)
|
|
fldcw -8(%esp)
|
|
*/
|
|
AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
|
|
addInstr(env, AMD64Instr_Alu64M(
|
|
Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp));
|
|
addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
|
|
}
|
|
|
|
|
|
/* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
|
|
expression denoting a value in the range 0 .. 3, indicating a round
|
|
mode encoded as per type IRRoundingMode. Set the SSE machinery to
|
|
have the same rounding.
|
|
*/
|
|
static
|
|
void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode )
|
|
{
|
|
/* Note: this sequence only makes sense because DEFAULT_MXCSR has
|
|
both rounding bits == 0. If that wasn't the case, we couldn't
|
|
create a new rounding field simply by ORing the new value into
|
|
place. */
|
|
|
|
/* movq $3, %reg
|
|
andq [[mode]], %reg -- shouldn't be needed; paranoia
|
|
shlq $13, %reg
|
|
orq $DEFAULT_MXCSR, %reg
|
|
pushq %reg
|
|
ldmxcsr 0(%esp)
|
|
addq $8, %rsp
|
|
*/
|
|
HReg reg = newVRegI(env);
|
|
AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
|
|
iselIntExpr_RMI(env, mode), reg));
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, reg));
|
|
addInstr(env, AMD64Instr_Alu64R(
|
|
Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg));
|
|
addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg)));
|
|
addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
|
|
add_to_rsp(env, 8);
|
|
}
|
|
|
|
|
|
/* Mess with the FPU's rounding mode: 'mode' is an I32-typed
|
|
expression denoting a value in the range 0 .. 3, indicating a round
|
|
mode encoded as per type IRRoundingMode. Set the x87 FPU to have
|
|
the same rounding.
|
|
*/
|
|
static
|
|
void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
|
|
{
|
|
HReg rrm = iselIntExpr_R(env, mode);
|
|
HReg rrm2 = newVRegI(env);
|
|
AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
|
|
|
|
/* movq %rrm, %rrm2
|
|
andq $3, %rrm2 -- shouldn't be needed; paranoia
|
|
shlq $10, %rrm2
|
|
orq $DEFAULT_FPUCW, %rrm2
|
|
movq %rrm2, -8(%rsp)
|
|
fldcw -8(%esp)
|
|
*/
|
|
addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2));
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, rrm2));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
|
|
AMD64RMI_Imm(DEFAULT_FPUCW), rrm2));
|
|
addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,
|
|
AMD64RI_Reg(rrm2), m8_rsp));
|
|
addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
|
|
}
|
|
|
|
|
|
/* Generate all-zeroes into a new vector register.
|
|
*/
|
|
static HReg generate_zeroes_V128 ( ISelEnv* env )
|
|
{
|
|
HReg dst = newVRegV(env);
|
|
addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst));
|
|
return dst;
|
|
}
|
|
|
|
/* Generate all-ones into a new vector register.
|
|
*/
|
|
static HReg generate_ones_V128 ( ISelEnv* env )
|
|
{
|
|
HReg dst = newVRegV(env);
|
|
addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, dst, dst));
|
|
return dst;
|
|
}
|
|
|
|
|
|
/* Generate !src into a new vector register. Amazing that there isn't
|
|
a less crappy way to do this.
|
|
*/
|
|
static HReg do_sse_NotV128 ( ISelEnv* env, HReg src )
|
|
{
|
|
HReg dst = generate_ones_V128(env);
|
|
addInstr(env, AMD64Instr_SseReRg(Asse_XOR, src, dst));
|
|
return dst;
|
|
}
|
|
|
|
|
|
/* Expand the given byte into a 64-bit word, by cloning each bit
|
|
8 times. */
|
|
static ULong bitmask8_to_bytemask64 ( UShort w8 )
|
|
{
|
|
vassert(w8 == (w8 & 0xFF));
|
|
ULong w64 = 0;
|
|
Int i;
|
|
for (i = 0; i < 8; i++) {
|
|
if (w8 & (1<<i))
|
|
w64 |= (0xFFULL << (8 * i));
|
|
}
|
|
return w64;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------------*/
|
|
/*--- ISEL: Integer expressions (64/32/16/8 bit) ---*/
|
|
/*---------------------------------------------------------*/
|
|
|
|
/* Select insns for an integer-typed expression, and add them to the
|
|
code list. Return a reg holding the result. This reg will be a
|
|
virtual register. THE RETURNED REG MUST NOT BE MODIFIED. If you
|
|
want to modify it, ask for a new vreg, copy it in there, and modify
|
|
the copy. The register allocator will do its best to map both
|
|
vregs to the same real register, so the copies will often disappear
|
|
later in the game.
|
|
|
|
This should handle expressions of 64, 32, 16 and 8-bit type. All
|
|
results are returned in a 64-bit register. For 32-, 16- and 8-bit
|
|
expressions, the upper 32/48/56 bits are arbitrary, so you should
|
|
mask or sign extend partial values if necessary.
|
|
*/
|
|
|
|
static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
|
|
{
|
|
HReg r = iselIntExpr_R_wrk(env, e);
|
|
/* sanity checks ... */
|
|
# if 0
|
|
vex_printf("\niselIntExpr_R: "); ppIRExpr(e); vex_printf("\n");
|
|
# endif
|
|
vassert(hregClass(r) == HRcInt64);
|
|
vassert(hregIsVirtual(r));
|
|
return r;
|
|
}
|
|
|
|
/* DO NOT CALL THIS DIRECTLY ! */
|
|
static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
|
|
{
|
|
/* Used for unary/binary SIMD64 ops. */
|
|
HWord fn = 0;
|
|
Bool second_is_UInt;
|
|
|
|
MatchInfo mi;
|
|
DECLARE_PATTERN(p_1Uto8_64to1);
|
|
DECLARE_PATTERN(p_LDle8_then_8Uto64);
|
|
DECLARE_PATTERN(p_LDle16_then_16Uto64);
|
|
|
|
IRType ty = typeOfIRExpr(env->type_env,e);
|
|
switch (ty) {
|
|
case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: break;
|
|
default: vassert(0);
|
|
}
|
|
|
|
switch (e->tag) {
|
|
|
|
/* --------- TEMP --------- */
|
|
case Iex_RdTmp: {
|
|
return lookupIRTemp(env, e->Iex.RdTmp.tmp);
|
|
}
|
|
|
|
/* --------- LOAD --------- */
|
|
case Iex_Load: {
|
|
HReg dst = newVRegI(env);
|
|
AMD64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr );
|
|
|
|
/* We can't handle big-endian loads, nor load-linked. */
|
|
if (e->Iex.Load.end != Iend_LE)
|
|
goto irreducible;
|
|
|
|
if (ty == Ity_I64) {
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
|
|
AMD64RMI_Mem(amode), dst) );
|
|
return dst;
|
|
}
|
|
if (ty == Ity_I32) {
|
|
addInstr(env, AMD64Instr_LoadEX(4,False,amode,dst));
|
|
return dst;
|
|
}
|
|
if (ty == Ity_I16) {
|
|
addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
|
|
return dst;
|
|
}
|
|
if (ty == Ity_I8) {
|
|
addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
|
|
return dst;
|
|
}
|
|
break;
|
|
}
|
|
|
|
/* --------- BINARY OP --------- */
|
|
case Iex_Binop: {
|
|
AMD64AluOp aluOp;
|
|
AMD64ShiftOp shOp;
|
|
|
|
/* Pattern: Sub64(0,x) */
|
|
/* and: Sub32(0,x) */
|
|
if ((e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg1))
|
|
|| (e->Iex.Binop.op == Iop_Sub32 && isZeroU32(e->Iex.Binop.arg1))) {
|
|
HReg dst = newVRegI(env);
|
|
HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
addInstr(env, mk_iMOVsd_RR(reg,dst));
|
|
addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
|
|
return dst;
|
|
}
|
|
|
|
/* Is it an addition or logical style op? */
|
|
switch (e->Iex.Binop.op) {
|
|
case Iop_Add8: case Iop_Add16: case Iop_Add32: case Iop_Add64:
|
|
aluOp = Aalu_ADD; break;
|
|
case Iop_Sub8: case Iop_Sub16: case Iop_Sub32: case Iop_Sub64:
|
|
aluOp = Aalu_SUB; break;
|
|
case Iop_And8: case Iop_And16: case Iop_And32: case Iop_And64:
|
|
aluOp = Aalu_AND; break;
|
|
case Iop_Or8: case Iop_Or16: case Iop_Or32: case Iop_Or64:
|
|
aluOp = Aalu_OR; break;
|
|
case Iop_Xor8: case Iop_Xor16: case Iop_Xor32: case Iop_Xor64:
|
|
aluOp = Aalu_XOR; break;
|
|
case Iop_Mul16: case Iop_Mul32: case Iop_Mul64:
|
|
aluOp = Aalu_MUL; break;
|
|
default:
|
|
aluOp = Aalu_INVALID; break;
|
|
}
|
|
/* For commutative ops we assume any literal
|
|
values are on the second operand. */
|
|
if (aluOp != Aalu_INVALID) {
|
|
HReg dst = newVRegI(env);
|
|
HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
|
|
addInstr(env, mk_iMOVsd_RR(reg,dst));
|
|
addInstr(env, AMD64Instr_Alu64R(aluOp, rmi, dst));
|
|
return dst;
|
|
}
|
|
|
|
/* Perhaps a shift op? */
|
|
switch (e->Iex.Binop.op) {
|
|
case Iop_Shl64: case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
|
|
shOp = Ash_SHL; break;
|
|
case Iop_Shr64: case Iop_Shr32: case Iop_Shr16: case Iop_Shr8:
|
|
shOp = Ash_SHR; break;
|
|
case Iop_Sar64: case Iop_Sar32: case Iop_Sar16: case Iop_Sar8:
|
|
shOp = Ash_SAR; break;
|
|
default:
|
|
shOp = Ash_INVALID; break;
|
|
}
|
|
if (shOp != Ash_INVALID) {
|
|
HReg dst = newVRegI(env);
|
|
|
|
/* regL = the value to be shifted */
|
|
HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
addInstr(env, mk_iMOVsd_RR(regL,dst));
|
|
|
|
/* Do any necessary widening for 32/16/8 bit operands */
|
|
switch (e->Iex.Binop.op) {
|
|
case Iop_Shr64: case Iop_Shl64: case Iop_Sar64:
|
|
break;
|
|
case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
|
|
break;
|
|
case Iop_Shr8:
|
|
addInstr(env, AMD64Instr_Alu64R(
|
|
Aalu_AND, AMD64RMI_Imm(0xFF), dst));
|
|
break;
|
|
case Iop_Shr16:
|
|
addInstr(env, AMD64Instr_Alu64R(
|
|
Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
|
|
break;
|
|
case Iop_Shr32:
|
|
addInstr(env, AMD64Instr_MovxLQ(False, dst, dst));
|
|
break;
|
|
case Iop_Sar8:
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst));
|
|
break;
|
|
case Iop_Sar16:
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst));
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst));
|
|
break;
|
|
case Iop_Sar32:
|
|
addInstr(env, AMD64Instr_MovxLQ(True, dst, dst));
|
|
break;
|
|
default:
|
|
ppIROp(e->Iex.Binop.op);
|
|
vassert(0);
|
|
}
|
|
|
|
/* Now consider the shift amount. If it's a literal, we
|
|
can do a much better job than the general case. */
|
|
if (e->Iex.Binop.arg2->tag == Iex_Const) {
|
|
/* assert that the IR is well-typed */
|
|
Int nshift;
|
|
vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
|
|
nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
|
|
vassert(nshift >= 0);
|
|
if (nshift > 0)
|
|
/* Can't allow nshift==0 since that means %cl */
|
|
addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
|
|
} else {
|
|
/* General case; we have to force the amount into %cl. */
|
|
HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX()));
|
|
addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
|
|
}
|
|
return dst;
|
|
}
|
|
|
|
/* Deal with 64-bit SIMD binary ops */
|
|
second_is_UInt = False;
|
|
switch (e->Iex.Binop.op) {
|
|
case Iop_Add8x8:
|
|
fn = (HWord)h_generic_calc_Add8x8; break;
|
|
case Iop_Add16x4:
|
|
fn = (HWord)h_generic_calc_Add16x4; break;
|
|
case Iop_Add32x2:
|
|
fn = (HWord)h_generic_calc_Add32x2; break;
|
|
|
|
case Iop_Avg8Ux8:
|
|
fn = (HWord)h_generic_calc_Avg8Ux8; break;
|
|
case Iop_Avg16Ux4:
|
|
fn = (HWord)h_generic_calc_Avg16Ux4; break;
|
|
|
|
case Iop_CmpEQ8x8:
|
|
fn = (HWord)h_generic_calc_CmpEQ8x8; break;
|
|
case Iop_CmpEQ16x4:
|
|
fn = (HWord)h_generic_calc_CmpEQ16x4; break;
|
|
case Iop_CmpEQ32x2:
|
|
fn = (HWord)h_generic_calc_CmpEQ32x2; break;
|
|
|
|
case Iop_CmpGT8Sx8:
|
|
fn = (HWord)h_generic_calc_CmpGT8Sx8; break;
|
|
case Iop_CmpGT16Sx4:
|
|
fn = (HWord)h_generic_calc_CmpGT16Sx4; break;
|
|
case Iop_CmpGT32Sx2:
|
|
fn = (HWord)h_generic_calc_CmpGT32Sx2; break;
|
|
|
|
case Iop_InterleaveHI8x8:
|
|
fn = (HWord)h_generic_calc_InterleaveHI8x8; break;
|
|
case Iop_InterleaveLO8x8:
|
|
fn = (HWord)h_generic_calc_InterleaveLO8x8; break;
|
|
case Iop_InterleaveHI16x4:
|
|
fn = (HWord)h_generic_calc_InterleaveHI16x4; break;
|
|
case Iop_InterleaveLO16x4:
|
|
fn = (HWord)h_generic_calc_InterleaveLO16x4; break;
|
|
case Iop_InterleaveHI32x2:
|
|
fn = (HWord)h_generic_calc_InterleaveHI32x2; break;
|
|
case Iop_InterleaveLO32x2:
|
|
fn = (HWord)h_generic_calc_InterleaveLO32x2; break;
|
|
case Iop_CatOddLanes16x4:
|
|
fn = (HWord)h_generic_calc_CatOddLanes16x4; break;
|
|
case Iop_CatEvenLanes16x4:
|
|
fn = (HWord)h_generic_calc_CatEvenLanes16x4; break;
|
|
case Iop_Perm8x8:
|
|
fn = (HWord)h_generic_calc_Perm8x8; break;
|
|
|
|
case Iop_Max8Ux8:
|
|
fn = (HWord)h_generic_calc_Max8Ux8; break;
|
|
case Iop_Max16Sx4:
|
|
fn = (HWord)h_generic_calc_Max16Sx4; break;
|
|
case Iop_Min8Ux8:
|
|
fn = (HWord)h_generic_calc_Min8Ux8; break;
|
|
case Iop_Min16Sx4:
|
|
fn = (HWord)h_generic_calc_Min16Sx4; break;
|
|
|
|
case Iop_Mul16x4:
|
|
fn = (HWord)h_generic_calc_Mul16x4; break;
|
|
case Iop_Mul32x2:
|
|
fn = (HWord)h_generic_calc_Mul32x2; break;
|
|
case Iop_MulHi16Sx4:
|
|
fn = (HWord)h_generic_calc_MulHi16Sx4; break;
|
|
case Iop_MulHi16Ux4:
|
|
fn = (HWord)h_generic_calc_MulHi16Ux4; break;
|
|
|
|
case Iop_QAdd8Sx8:
|
|
fn = (HWord)h_generic_calc_QAdd8Sx8; break;
|
|
case Iop_QAdd16Sx4:
|
|
fn = (HWord)h_generic_calc_QAdd16Sx4; break;
|
|
case Iop_QAdd8Ux8:
|
|
fn = (HWord)h_generic_calc_QAdd8Ux8; break;
|
|
case Iop_QAdd16Ux4:
|
|
fn = (HWord)h_generic_calc_QAdd16Ux4; break;
|
|
|
|
case Iop_QNarrowBin32Sto16Sx4:
|
|
fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; break;
|
|
case Iop_QNarrowBin16Sto8Sx8:
|
|
fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; break;
|
|
case Iop_QNarrowBin16Sto8Ux8:
|
|
fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; break;
|
|
case Iop_NarrowBin16to8x8:
|
|
fn = (HWord)h_generic_calc_NarrowBin16to8x8; break;
|
|
case Iop_NarrowBin32to16x4:
|
|
fn = (HWord)h_generic_calc_NarrowBin32to16x4; break;
|
|
|
|
case Iop_QSub8Sx8:
|
|
fn = (HWord)h_generic_calc_QSub8Sx8; break;
|
|
case Iop_QSub16Sx4:
|
|
fn = (HWord)h_generic_calc_QSub16Sx4; break;
|
|
case Iop_QSub8Ux8:
|
|
fn = (HWord)h_generic_calc_QSub8Ux8; break;
|
|
case Iop_QSub16Ux4:
|
|
fn = (HWord)h_generic_calc_QSub16Ux4; break;
|
|
|
|
case Iop_Sub8x8:
|
|
fn = (HWord)h_generic_calc_Sub8x8; break;
|
|
case Iop_Sub16x4:
|
|
fn = (HWord)h_generic_calc_Sub16x4; break;
|
|
case Iop_Sub32x2:
|
|
fn = (HWord)h_generic_calc_Sub32x2; break;
|
|
|
|
case Iop_ShlN32x2:
|
|
fn = (HWord)h_generic_calc_ShlN32x2;
|
|
second_is_UInt = True;
|
|
break;
|
|
case Iop_ShlN16x4:
|
|
fn = (HWord)h_generic_calc_ShlN16x4;
|
|
second_is_UInt = True;
|
|
break;
|
|
case Iop_ShlN8x8:
|
|
fn = (HWord)h_generic_calc_ShlN8x8;
|
|
second_is_UInt = True;
|
|
break;
|
|
case Iop_ShrN32x2:
|
|
fn = (HWord)h_generic_calc_ShrN32x2;
|
|
second_is_UInt = True;
|
|
break;
|
|
case Iop_ShrN16x4:
|
|
fn = (HWord)h_generic_calc_ShrN16x4;
|
|
second_is_UInt = True;
|
|
break;
|
|
case Iop_SarN32x2:
|
|
fn = (HWord)h_generic_calc_SarN32x2;
|
|
second_is_UInt = True;
|
|
break;
|
|
case Iop_SarN16x4:
|
|
fn = (HWord)h_generic_calc_SarN16x4;
|
|
second_is_UInt = True;
|
|
break;
|
|
case Iop_SarN8x8:
|
|
fn = (HWord)h_generic_calc_SarN8x8;
|
|
second_is_UInt = True;
|
|
break;
|
|
|
|
default:
|
|
fn = (HWord)0; break;
|
|
}
|
|
if (fn != (HWord)0) {
|
|
/* Note: the following assumes all helpers are of signature
|
|
ULong fn ( ULong, ULong ), and they are
|
|
not marked as regparm functions.
|
|
*/
|
|
HReg dst = newVRegI(env);
|
|
HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
if (second_is_UInt)
|
|
addInstr(env, AMD64Instr_MovxLQ(False, argR, argR));
|
|
addInstr(env, mk_iMOVsd_RR(argL, hregAMD64_RDI()) );
|
|
addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RSI()) );
|
|
addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 2,
|
|
mk_RetLoc_simple(RLPri_Int) ));
|
|
addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
|
|
return dst;
|
|
}
|
|
|
|
/* Handle misc other ops. */
|
|
|
|
if (e->Iex.Binop.op == Iop_Max32U) {
|
|
HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
HReg dst = newVRegI(env);
|
|
HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
addInstr(env, mk_iMOVsd_RR(src1, dst));
|
|
addInstr(env, AMD64Instr_Alu32R(Aalu_CMP, AMD64RMI_Reg(src2), dst));
|
|
addInstr(env, AMD64Instr_CMov64(Acc_B, src2, dst));
|
|
return dst;
|
|
}
|
|
|
|
if (e->Iex.Binop.op == Iop_DivModS64to32
|
|
|| e->Iex.Binop.op == Iop_DivModU64to32) {
|
|
/* 64 x 32 -> (32(rem),32(div)) division */
|
|
/* Get the 64-bit operand into edx:eax, and the other into
|
|
any old R/M. */
|
|
HReg rax = hregAMD64_RAX();
|
|
HReg rdx = hregAMD64_RDX();
|
|
HReg dst = newVRegI(env);
|
|
Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS64to32);
|
|
AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
|
|
/* Compute the left operand into a reg, and then
|
|
put the top half in edx and the bottom in eax. */
|
|
HReg left64 = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
addInstr(env, mk_iMOVsd_RR(left64, rdx));
|
|
addInstr(env, mk_iMOVsd_RR(left64, rax));
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SHR, 32, rdx));
|
|
addInstr(env, AMD64Instr_Div(syned, 4, rmRight));
|
|
addInstr(env, AMD64Instr_MovxLQ(False, rdx, rdx));
|
|
addInstr(env, AMD64Instr_MovxLQ(False, rax, rax));
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, rdx));
|
|
addInstr(env, mk_iMOVsd_RR(rax, dst));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(rdx), dst));
|
|
return dst;
|
|
}
|
|
|
|
if (e->Iex.Binop.op == Iop_32HLto64) {
|
|
HReg hi32 = newVRegI(env);
|
|
HReg lo32 = newVRegI(env);
|
|
HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
addInstr(env, mk_iMOVsd_RR(hi32s, hi32));
|
|
addInstr(env, mk_iMOVsd_RR(lo32s, lo32));
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, hi32));
|
|
addInstr(env, AMD64Instr_MovxLQ(False, lo32, lo32));
|
|
addInstr(env, AMD64Instr_Alu64R(
|
|
Aalu_OR, AMD64RMI_Reg(lo32), hi32));
|
|
return hi32;
|
|
}
|
|
|
|
if (e->Iex.Binop.op == Iop_16HLto32) {
|
|
HReg hi16 = newVRegI(env);
|
|
HReg lo16 = newVRegI(env);
|
|
HReg hi16s = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
HReg lo16s = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
addInstr(env, mk_iMOVsd_RR(hi16s, hi16));
|
|
addInstr(env, mk_iMOVsd_RR(lo16s, lo16));
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 16, hi16));
|
|
addInstr(env, AMD64Instr_Alu64R(
|
|
Aalu_AND, AMD64RMI_Imm(0xFFFF), lo16));
|
|
addInstr(env, AMD64Instr_Alu64R(
|
|
Aalu_OR, AMD64RMI_Reg(lo16), hi16));
|
|
return hi16;
|
|
}
|
|
|
|
if (e->Iex.Binop.op == Iop_8HLto16) {
|
|
HReg hi8 = newVRegI(env);
|
|
HReg lo8 = newVRegI(env);
|
|
HReg hi8s = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
HReg lo8s = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
addInstr(env, mk_iMOVsd_RR(hi8s, hi8));
|
|
addInstr(env, mk_iMOVsd_RR(lo8s, lo8));
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 8, hi8));
|
|
addInstr(env, AMD64Instr_Alu64R(
|
|
Aalu_AND, AMD64RMI_Imm(0xFF), lo8));
|
|
addInstr(env, AMD64Instr_Alu64R(
|
|
Aalu_OR, AMD64RMI_Reg(lo8), hi8));
|
|
return hi8;
|
|
}
|
|
|
|
if (e->Iex.Binop.op == Iop_MullS32
|
|
|| e->Iex.Binop.op == Iop_MullS16
|
|
|| e->Iex.Binop.op == Iop_MullS8
|
|
|| e->Iex.Binop.op == Iop_MullU32
|
|
|| e->Iex.Binop.op == Iop_MullU16
|
|
|| e->Iex.Binop.op == Iop_MullU8) {
|
|
HReg a32 = newVRegI(env);
|
|
HReg b32 = newVRegI(env);
|
|
HReg a32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
HReg b32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
Int shift = 0;
|
|
AMD64ShiftOp shr_op = Ash_SHR;
|
|
switch (e->Iex.Binop.op) {
|
|
case Iop_MullS32: shr_op = Ash_SAR; shift = 32; break;
|
|
case Iop_MullS16: shr_op = Ash_SAR; shift = 48; break;
|
|
case Iop_MullS8: shr_op = Ash_SAR; shift = 56; break;
|
|
case Iop_MullU32: shr_op = Ash_SHR; shift = 32; break;
|
|
case Iop_MullU16: shr_op = Ash_SHR; shift = 48; break;
|
|
case Iop_MullU8: shr_op = Ash_SHR; shift = 56; break;
|
|
default: vassert(0);
|
|
}
|
|
|
|
addInstr(env, mk_iMOVsd_RR(a32s, a32));
|
|
addInstr(env, mk_iMOVsd_RR(b32s, b32));
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, a32));
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SHL, shift, b32));
|
|
addInstr(env, AMD64Instr_Sh64(shr_op, shift, a32));
|
|
addInstr(env, AMD64Instr_Sh64(shr_op, shift, b32));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_MUL, AMD64RMI_Reg(a32), b32));
|
|
return b32;
|
|
}
|
|
|
|
if (e->Iex.Binop.op == Iop_CmpF64) {
|
|
HReg fL = iselDblExpr(env, e->Iex.Binop.arg1);
|
|
HReg fR = iselDblExpr(env, e->Iex.Binop.arg2);
|
|
HReg dst = newVRegI(env);
|
|
addInstr(env, AMD64Instr_SseUComIS(8,fL,fR,dst));
|
|
/* Mask out irrelevant parts of the result so as to conform
|
|
to the CmpF64 definition. */
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(0x45), dst));
|
|
return dst;
|
|
}
|
|
|
|
if (e->Iex.Binop.op == Iop_F64toI32S
|
|
|| e->Iex.Binop.op == Iop_F64toI64S) {
|
|
Int szD = e->Iex.Binop.op==Iop_F64toI32S ? 4 : 8;
|
|
HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
|
|
HReg dst = newVRegI(env);
|
|
set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
|
|
addInstr(env, AMD64Instr_SseSF2SI( 8, szD, rf, dst ));
|
|
set_SSE_rounding_default(env);
|
|
return dst;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
/* --------- UNARY OP --------- */
|
|
case Iex_Unop: {
|
|
|
|
/* 1Uto8(64to1(expr64)) */
|
|
{
|
|
DEFINE_PATTERN( p_1Uto8_64to1,
|
|
unop(Iop_1Uto8, unop(Iop_64to1, bind(0))) );
|
|
if (matchIRExpr(&mi,p_1Uto8_64to1,e)) {
|
|
IRExpr* expr64 = mi.bindee[0];
|
|
HReg dst = newVRegI(env);
|
|
HReg src = iselIntExpr_R(env, expr64);
|
|
addInstr(env, mk_iMOVsd_RR(src,dst) );
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
|
|
AMD64RMI_Imm(1), dst));
|
|
return dst;
|
|
}
|
|
}
|
|
|
|
/* 8Uto64(LDle(expr64)) */
|
|
{
|
|
DEFINE_PATTERN(p_LDle8_then_8Uto64,
|
|
unop(Iop_8Uto64,
|
|
IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
|
|
if (matchIRExpr(&mi,p_LDle8_then_8Uto64,e)) {
|
|
HReg dst = newVRegI(env);
|
|
AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
|
|
addInstr(env, AMD64Instr_LoadEX(1,False,amode,dst));
|
|
return dst;
|
|
}
|
|
}
|
|
|
|
/* 16Uto64(LDle(expr64)) */
|
|
{
|
|
DEFINE_PATTERN(p_LDle16_then_16Uto64,
|
|
unop(Iop_16Uto64,
|
|
IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
|
|
if (matchIRExpr(&mi,p_LDle16_then_16Uto64,e)) {
|
|
HReg dst = newVRegI(env);
|
|
AMD64AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
|
|
addInstr(env, AMD64Instr_LoadEX(2,False,amode,dst));
|
|
return dst;
|
|
}
|
|
}
|
|
|
|
/* 32Uto64( Add32/Sub32/And32/Or32/Xor32(expr32, expr32) )
|
|
Use 32 bit arithmetic and let the default zero-extend rule
|
|
do the 32Uto64 for free. */
|
|
if (e->Iex.Unop.op == Iop_32Uto64 && e->Iex.Unop.arg->tag == Iex_Binop) {
|
|
IROp opi = e->Iex.Unop.arg->Iex.Binop.op; /* inner op */
|
|
IRExpr* argL = e->Iex.Unop.arg->Iex.Binop.arg1;
|
|
IRExpr* argR = e->Iex.Unop.arg->Iex.Binop.arg2;
|
|
AMD64AluOp aluOp = Aalu_INVALID;
|
|
switch (opi) {
|
|
case Iop_Add32: aluOp = Aalu_ADD; break;
|
|
case Iop_Sub32: aluOp = Aalu_SUB; break;
|
|
case Iop_And32: aluOp = Aalu_AND; break;
|
|
case Iop_Or32: aluOp = Aalu_OR; break;
|
|
case Iop_Xor32: aluOp = Aalu_XOR; break;
|
|
default: break;
|
|
}
|
|
if (aluOp != Aalu_INVALID) {
|
|
/* For commutative ops we assume any literal values are on
|
|
the second operand. */
|
|
HReg dst = newVRegI(env);
|
|
HReg reg = iselIntExpr_R(env, argL);
|
|
AMD64RMI* rmi = iselIntExpr_RMI(env, argR);
|
|
addInstr(env, mk_iMOVsd_RR(reg,dst));
|
|
addInstr(env, AMD64Instr_Alu32R(aluOp, rmi, dst));
|
|
return dst;
|
|
}
|
|
/* just fall through to normal handling for Iop_32Uto64 */
|
|
}
|
|
|
|
/* Fallback cases */
|
|
switch (e->Iex.Unop.op) {
|
|
case Iop_32Uto64:
|
|
case Iop_32Sto64: {
|
|
HReg dst = newVRegI(env);
|
|
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
addInstr(env, AMD64Instr_MovxLQ(e->Iex.Unop.op == Iop_32Sto64,
|
|
src, dst) );
|
|
return dst;
|
|
}
|
|
case Iop_128HIto64: {
|
|
HReg rHi, rLo;
|
|
iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
|
|
return rHi; /* and abandon rLo */
|
|
}
|
|
case Iop_128to64: {
|
|
HReg rHi, rLo;
|
|
iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
|
|
return rLo; /* and abandon rHi */
|
|
}
|
|
case Iop_8Uto16:
|
|
case Iop_8Uto32:
|
|
case Iop_8Uto64:
|
|
case Iop_16Uto64:
|
|
case Iop_16Uto32: {
|
|
HReg dst = newVRegI(env);
|
|
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Uto32
|
|
|| e->Iex.Unop.op==Iop_16Uto64 );
|
|
UInt mask = srcIs16 ? 0xFFFF : 0xFF;
|
|
addInstr(env, mk_iMOVsd_RR(src,dst) );
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
|
|
AMD64RMI_Imm(mask), dst));
|
|
return dst;
|
|
}
|
|
case Iop_8Sto16:
|
|
case Iop_8Sto64:
|
|
case Iop_8Sto32:
|
|
case Iop_16Sto32:
|
|
case Iop_16Sto64: {
|
|
HReg dst = newVRegI(env);
|
|
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Sto32
|
|
|| e->Iex.Unop.op==Iop_16Sto64 );
|
|
UInt amt = srcIs16 ? 48 : 56;
|
|
addInstr(env, mk_iMOVsd_RR(src,dst) );
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst));
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst));
|
|
return dst;
|
|
}
|
|
case Iop_Not8:
|
|
case Iop_Not16:
|
|
case Iop_Not32:
|
|
case Iop_Not64: {
|
|
HReg dst = newVRegI(env);
|
|
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
addInstr(env, mk_iMOVsd_RR(src,dst) );
|
|
addInstr(env, AMD64Instr_Unary64(Aun_NOT,dst));
|
|
return dst;
|
|
}
|
|
case Iop_16HIto8:
|
|
case Iop_32HIto16:
|
|
case Iop_64HIto32: {
|
|
HReg dst = newVRegI(env);
|
|
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
Int shift = 0;
|
|
switch (e->Iex.Unop.op) {
|
|
case Iop_16HIto8: shift = 8; break;
|
|
case Iop_32HIto16: shift = 16; break;
|
|
case Iop_64HIto32: shift = 32; break;
|
|
default: vassert(0);
|
|
}
|
|
addInstr(env, mk_iMOVsd_RR(src,dst) );
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SHR, shift, dst));
|
|
return dst;
|
|
}
|
|
case Iop_1Uto64:
|
|
case Iop_1Uto32:
|
|
case Iop_1Uto8: {
|
|
HReg dst = newVRegI(env);
|
|
AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
|
|
addInstr(env, AMD64Instr_Set64(cond,dst));
|
|
return dst;
|
|
}
|
|
case Iop_1Sto8:
|
|
case Iop_1Sto16:
|
|
case Iop_1Sto32:
|
|
case Iop_1Sto64: {
|
|
/* could do better than this, but for now ... */
|
|
HReg dst = newVRegI(env);
|
|
AMD64CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
|
|
addInstr(env, AMD64Instr_Set64(cond,dst));
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SHL, 63, dst));
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
|
|
return dst;
|
|
}
|
|
case Iop_Ctz64: {
|
|
/* Count trailing zeroes, implemented by amd64 'bsfq' */
|
|
HReg dst = newVRegI(env);
|
|
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
addInstr(env, AMD64Instr_Bsfr64(True,src,dst));
|
|
return dst;
|
|
}
|
|
case Iop_Clz64: {
|
|
/* Count leading zeroes. Do 'bsrq' to establish the index
|
|
of the highest set bit, and subtract that value from
|
|
63. */
|
|
HReg tmp = newVRegI(env);
|
|
HReg dst = newVRegI(env);
|
|
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
addInstr(env, AMD64Instr_Bsfr64(False,src,tmp));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,
|
|
AMD64RMI_Imm(63), dst));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_SUB,
|
|
AMD64RMI_Reg(tmp), dst));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_CmpwNEZ64: {
|
|
HReg dst = newVRegI(env);
|
|
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
addInstr(env, mk_iMOVsd_RR(src,dst));
|
|
addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
|
|
AMD64RMI_Reg(src), dst));
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_CmpwNEZ32: {
|
|
HReg src = newVRegI(env);
|
|
HReg dst = newVRegI(env);
|
|
HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
addInstr(env, mk_iMOVsd_RR(pre,src));
|
|
addInstr(env, AMD64Instr_MovxLQ(False, src, src));
|
|
addInstr(env, mk_iMOVsd_RR(src,dst));
|
|
addInstr(env, AMD64Instr_Unary64(Aun_NEG,dst));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
|
|
AMD64RMI_Reg(src), dst));
|
|
addInstr(env, AMD64Instr_Sh64(Ash_SAR, 63, dst));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_Left8:
|
|
case Iop_Left16:
|
|
case Iop_Left32:
|
|
case Iop_Left64: {
|
|
HReg dst = newVRegI(env);
|
|
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
addInstr(env, mk_iMOVsd_RR(src, dst));
|
|
addInstr(env, AMD64Instr_Unary64(Aun_NEG, dst));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_OR, AMD64RMI_Reg(src), dst));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_V128to32: {
|
|
HReg dst = newVRegI(env);
|
|
HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
|
|
AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp_m16));
|
|
addInstr(env, AMD64Instr_LoadEX(4, False/*z-widen*/, rsp_m16, dst));
|
|
return dst;
|
|
}
|
|
|
|
/* V128{HI}to64 */
|
|
case Iop_V128HIto64:
|
|
case Iop_V128to64: {
|
|
HReg dst = newVRegI(env);
|
|
Int off = e->Iex.Unop.op==Iop_V128HIto64 ? -8 : -16;
|
|
HReg rsp = hregAMD64_RSP();
|
|
HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
|
|
AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
|
|
AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp);
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
|
|
16, vec, m16_rsp));
|
|
addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
|
|
AMD64RMI_Mem(off_rsp), dst ));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_V256to64_0: case Iop_V256to64_1:
|
|
case Iop_V256to64_2: case Iop_V256to64_3: {
|
|
HReg vHi, vLo, vec;
|
|
iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg);
|
|
/* Do the first part of the selection by deciding which of
|
|
the 128 bit registers do look at, and second part using
|
|
the same scheme as for V128{HI}to64 above. */
|
|
Int off = 0;
|
|
switch (e->Iex.Unop.op) {
|
|
case Iop_V256to64_0: vec = vLo; off = -16; break;
|
|
case Iop_V256to64_1: vec = vLo; off = -8; break;
|
|
case Iop_V256to64_2: vec = vHi; off = -16; break;
|
|
case Iop_V256to64_3: vec = vHi; off = -8; break;
|
|
default: vassert(0);
|
|
}
|
|
HReg dst = newVRegI(env);
|
|
HReg rsp = hregAMD64_RSP();
|
|
AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
|
|
AMD64AMode* off_rsp = AMD64AMode_IR(off, rsp);
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
|
|
16, vec, m16_rsp));
|
|
addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
|
|
AMD64RMI_Mem(off_rsp), dst ));
|
|
return dst;
|
|
}
|
|
|
|
/* ReinterpF64asI64(e) */
|
|
/* Given an IEEE754 double, produce an I64 with the same bit
|
|
pattern. */
|
|
case Iop_ReinterpF64asI64: {
|
|
AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
|
|
HReg dst = newVRegI(env);
|
|
HReg src = iselDblExpr(env, e->Iex.Unop.arg);
|
|
/* paranoia */
|
|
set_SSE_rounding_default(env);
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, src, m8_rsp));
|
|
addInstr(env, AMD64Instr_Alu64R(
|
|
Aalu_MOV, AMD64RMI_Mem(m8_rsp), dst));
|
|
return dst;
|
|
}
|
|
|
|
/* ReinterpF32asI32(e) */
|
|
/* Given an IEEE754 single, produce an I64 with the same bit
|
|
pattern in the lower half. */
|
|
case Iop_ReinterpF32asI32: {
|
|
AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
|
|
HReg dst = newVRegI(env);
|
|
HReg src = iselFltExpr(env, e->Iex.Unop.arg);
|
|
/* paranoia */
|
|
set_SSE_rounding_default(env);
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, src, m8_rsp));
|
|
addInstr(env, AMD64Instr_LoadEX(4, False/*unsigned*/, m8_rsp, dst ));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_16to8:
|
|
case Iop_32to8:
|
|
case Iop_64to8:
|
|
case Iop_32to16:
|
|
case Iop_64to16:
|
|
case Iop_64to32:
|
|
/* These are no-ops. */
|
|
return iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
|
|
case Iop_GetMSBs8x8: {
|
|
/* Note: the following assumes the helper is of
|
|
signature
|
|
UInt fn ( ULong ), and is not a regparm fn.
|
|
*/
|
|
HReg dst = newVRegI(env);
|
|
HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
fn = (HWord)h_generic_calc_GetMSBs8x8;
|
|
addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
|
|
addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
|
|
1, mk_RetLoc_simple(RLPri_Int) ));
|
|
/* MovxLQ is not exactly the right thing here. We just
|
|
need to get the bottom 8 bits of RAX into dst, and zero
|
|
out everything else. Assuming that the helper returns
|
|
a UInt with the top 24 bits zeroed out, it'll do,
|
|
though. */
|
|
addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_GetMSBs8x16: {
|
|
/* Note: the following assumes the helper is of signature
|
|
UInt fn ( ULong w64hi, ULong w64Lo ),
|
|
and is not a regparm fn. */
|
|
HReg dst = newVRegI(env);
|
|
HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
|
|
HReg rsp = hregAMD64_RSP();
|
|
fn = (HWord)h_generic_calc_GetMSBs8x16;
|
|
AMD64AMode* m8_rsp = AMD64AMode_IR( -8, rsp);
|
|
AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/,
|
|
16, vec, m16_rsp));
|
|
/* hi 64 bits into RDI -- the first arg */
|
|
addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
|
|
AMD64RMI_Mem(m8_rsp),
|
|
hregAMD64_RDI() )); /* 1st arg */
|
|
/* lo 64 bits into RSI -- the 2nd arg */
|
|
addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
|
|
AMD64RMI_Mem(m16_rsp),
|
|
hregAMD64_RSI() )); /* 2nd arg */
|
|
addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
|
|
2, mk_RetLoc_simple(RLPri_Int) ));
|
|
/* MovxLQ is not exactly the right thing here. We just
|
|
need to get the bottom 16 bits of RAX into dst, and zero
|
|
out everything else. Assuming that the helper returns
|
|
a UInt with the top 16 bits zeroed out, it'll do,
|
|
though. */
|
|
addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
|
|
return dst;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
/* Deal with unary 64-bit SIMD ops. */
|
|
switch (e->Iex.Unop.op) {
|
|
case Iop_CmpNEZ32x2:
|
|
fn = (HWord)h_generic_calc_CmpNEZ32x2; break;
|
|
case Iop_CmpNEZ16x4:
|
|
fn = (HWord)h_generic_calc_CmpNEZ16x4; break;
|
|
case Iop_CmpNEZ8x8:
|
|
fn = (HWord)h_generic_calc_CmpNEZ8x8; break;
|
|
default:
|
|
fn = (HWord)0; break;
|
|
}
|
|
if (fn != (HWord)0) {
|
|
/* Note: the following assumes all helpers are of
|
|
signature
|
|
ULong fn ( ULong ), and they are
|
|
not marked as regparm functions.
|
|
*/
|
|
HReg dst = newVRegI(env);
|
|
HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
addInstr(env, mk_iMOVsd_RR(arg, hregAMD64_RDI()) );
|
|
addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 1,
|
|
mk_RetLoc_simple(RLPri_Int) ));
|
|
addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
|
|
return dst;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
/* --------- GET --------- */
|
|
case Iex_Get: {
|
|
if (ty == Ity_I64) {
|
|
HReg dst = newVRegI(env);
|
|
addInstr(env, AMD64Instr_Alu64R(
|
|
Aalu_MOV,
|
|
AMD64RMI_Mem(
|
|
AMD64AMode_IR(e->Iex.Get.offset,
|
|
hregAMD64_RBP())),
|
|
dst));
|
|
return dst;
|
|
}
|
|
if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
|
|
HReg dst = newVRegI(env);
|
|
addInstr(env, AMD64Instr_LoadEX(
|
|
toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
|
|
False,
|
|
AMD64AMode_IR(e->Iex.Get.offset,hregAMD64_RBP()),
|
|
dst));
|
|
return dst;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case Iex_GetI: {
|
|
AMD64AMode* am
|
|
= genGuestArrayOffset(
|
|
env, e->Iex.GetI.descr,
|
|
e->Iex.GetI.ix, e->Iex.GetI.bias );
|
|
HReg dst = newVRegI(env);
|
|
if (ty == Ity_I8) {
|
|
addInstr(env, AMD64Instr_LoadEX( 1, False, am, dst ));
|
|
return dst;
|
|
}
|
|
if (ty == Ity_I64) {
|
|
addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, AMD64RMI_Mem(am), dst ));
|
|
return dst;
|
|
}
|
|
break;
|
|
}
|
|
|
|
/* --------- CCALL --------- */
|
|
case Iex_CCall: {
|
|
HReg dst = newVRegI(env);
|
|
vassert(ty == e->Iex.CCall.retty);
|
|
|
|
/* be very restrictive for now. Only 64-bit ints allowed for
|
|
args, and 64 or 32 bits for return type. */
|
|
if (e->Iex.CCall.retty != Ity_I64 && e->Iex.CCall.retty != Ity_I32)
|
|
goto irreducible;
|
|
|
|
/* Marshal args, do the call. */
|
|
UInt addToSp = 0;
|
|
RetLoc rloc = mk_RetLoc_INVALID();
|
|
doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
|
|
e->Iex.CCall.cee, e->Iex.CCall.retty, e->Iex.CCall.args );
|
|
vassert(is_sane_RetLoc(rloc));
|
|
vassert(rloc.pri == RLPri_Int);
|
|
vassert(addToSp == 0);
|
|
|
|
/* Move to dst, and zero out the top 32 bits if the result type is
|
|
Ity_I32. Probably overkill, but still .. */
|
|
if (e->Iex.CCall.retty == Ity_I64)
|
|
addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), dst));
|
|
else
|
|
addInstr(env, AMD64Instr_MovxLQ(False, hregAMD64_RAX(), dst));
|
|
|
|
return dst;
|
|
}
|
|
|
|
/* --------- LITERAL --------- */
|
|
/* 64/32/16/8-bit literals */
|
|
case Iex_Const:
|
|
if (ty == Ity_I64) {
|
|
HReg r = newVRegI(env);
|
|
addInstr(env, AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, r));
|
|
return r;
|
|
} else {
|
|
AMD64RMI* rmi = iselIntExpr_RMI ( env, e );
|
|
HReg r = newVRegI(env);
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, rmi, r));
|
|
return r;
|
|
}
|
|
|
|
/* --------- MULTIPLEX --------- */
|
|
case Iex_ITE: { // VFD
|
|
if ((ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8)
|
|
&& typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
|
|
HReg r1 = iselIntExpr_R(env, e->Iex.ITE.iftrue);
|
|
HReg r0 = iselIntExpr_R(env, e->Iex.ITE.iffalse);
|
|
HReg dst = newVRegI(env);
|
|
addInstr(env, mk_iMOVsd_RR(r1,dst));
|
|
AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
|
|
addInstr(env, AMD64Instr_CMov64(cc ^ 1, r0, dst));
|
|
return dst;
|
|
}
|
|
break;
|
|
}
|
|
|
|
/* --------- TERNARY OP --------- */
|
|
case Iex_Triop: {
|
|
IRTriop *triop = e->Iex.Triop.details;
|
|
/* C3210 flags following FPU partial remainder (fprem), both
|
|
IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
|
|
if (triop->op == Iop_PRemC3210F64
|
|
|| triop->op == Iop_PRem1C3210F64) {
|
|
AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
|
|
HReg arg1 = iselDblExpr(env, triop->arg2);
|
|
HReg arg2 = iselDblExpr(env, triop->arg3);
|
|
HReg dst = newVRegI(env);
|
|
addInstr(env, AMD64Instr_A87Free(2));
|
|
|
|
/* one arg -> top of x87 stack */
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp));
|
|
addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
|
|
|
|
/* other arg -> top of x87 stack */
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp));
|
|
addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
|
|
|
|
switch (triop->op) {
|
|
case Iop_PRemC3210F64:
|
|
addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
|
|
break;
|
|
case Iop_PRem1C3210F64:
|
|
addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
|
|
break;
|
|
default:
|
|
vassert(0);
|
|
}
|
|
/* Ignore the result, and instead make off with the FPU's
|
|
C3210 flags (in the status word). */
|
|
addInstr(env, AMD64Instr_A87StSW(m8_rsp));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Mem(m8_rsp),dst));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0x4700),dst));
|
|
return dst;
|
|
}
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
} /* switch (e->tag) */
|
|
|
|
/* We get here if no pattern matched. */
|
|
irreducible:
|
|
ppIRExpr(e);
|
|
vpanic("iselIntExpr_R(amd64): cannot reduce tree");
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------------*/
|
|
/*--- ISEL: Integer expression auxiliaries ---*/
|
|
/*---------------------------------------------------------*/
|
|
|
|
/* --------------------- AMODEs --------------------- */
|
|
|
|
/* Return an AMode which computes the value of the specified
|
|
expression, possibly also adding insns to the code list as a
|
|
result. The expression may only be a 32-bit one.
|
|
*/
|
|
|
|
static AMD64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e )
|
|
{
|
|
AMD64AMode* am = iselIntExpr_AMode_wrk(env, e);
|
|
vassert(sane_AMode(am));
|
|
return am;
|
|
}
|
|
|
|
/* DO NOT CALL THIS DIRECTLY ! */
|
|
static AMD64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e )
|
|
{
|
|
MatchInfo mi;
|
|
DECLARE_PATTERN(p_complex);
|
|
IRType ty = typeOfIRExpr(env->type_env,e);
|
|
vassert(ty == Ity_I64);
|
|
|
|
/* Add64( Add64(expr1, Shl64(expr2, imm8)), simm32 ) */
|
|
/* bind0 bind1 bind2 bind3 */
|
|
DEFINE_PATTERN(p_complex,
|
|
binop( Iop_Add64,
|
|
binop( Iop_Add64,
|
|
bind(0),
|
|
binop(Iop_Shl64, bind(1), bind(2))
|
|
),
|
|
bind(3)
|
|
)
|
|
);
|
|
if (matchIRExpr(&mi, p_complex, e)) {
|
|
IRExpr* expr1 = mi.bindee[0];
|
|
IRExpr* expr2 = mi.bindee[1];
|
|
IRExpr* imm8 = mi.bindee[2];
|
|
IRExpr* simm32 = mi.bindee[3];
|
|
if (imm8->tag == Iex_Const
|
|
&& imm8->Iex.Const.con->tag == Ico_U8
|
|
&& imm8->Iex.Const.con->Ico.U8 < 4
|
|
/* imm8 is OK, now check simm32 */
|
|
&& simm32->tag == Iex_Const
|
|
&& simm32->Iex.Const.con->tag == Ico_U64
|
|
&& fitsIn32Bits(simm32->Iex.Const.con->Ico.U64)) {
|
|
UInt shift = imm8->Iex.Const.con->Ico.U8;
|
|
UInt offset = toUInt(simm32->Iex.Const.con->Ico.U64);
|
|
HReg r1 = iselIntExpr_R(env, expr1);
|
|
HReg r2 = iselIntExpr_R(env, expr2);
|
|
vassert(shift == 0 || shift == 1 || shift == 2 || shift == 3);
|
|
return AMD64AMode_IRRS(offset, r1, r2, shift);
|
|
}
|
|
}
|
|
|
|
/* Add64(expr1, Shl64(expr2, imm)) */
|
|
if (e->tag == Iex_Binop
|
|
&& e->Iex.Binop.op == Iop_Add64
|
|
&& e->Iex.Binop.arg2->tag == Iex_Binop
|
|
&& e->Iex.Binop.arg2->Iex.Binop.op == Iop_Shl64
|
|
&& e->Iex.Binop.arg2->Iex.Binop.arg2->tag == Iex_Const
|
|
&& e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
|
|
UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
|
|
if (shift == 1 || shift == 2 || shift == 3) {
|
|
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
|
|
return AMD64AMode_IRRS(0, r1, r2, shift);
|
|
}
|
|
}
|
|
|
|
/* Add64(expr,i) */
|
|
if (e->tag == Iex_Binop
|
|
&& e->Iex.Binop.op == Iop_Add64
|
|
&& e->Iex.Binop.arg2->tag == Iex_Const
|
|
&& e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64
|
|
&& fitsIn32Bits(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)) {
|
|
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
return AMD64AMode_IR(
|
|
toUInt(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64),
|
|
r1
|
|
);
|
|
}
|
|
|
|
/* Doesn't match anything in particular. Generate it into
|
|
a register and use that. */
|
|
{
|
|
HReg r1 = iselIntExpr_R(env, e);
|
|
return AMD64AMode_IR(0, r1);
|
|
}
|
|
}
|
|
|
|
|
|
/* --------------------- RMIs --------------------- */
|
|
|
|
/* Similarly, calculate an expression into an X86RMI operand. As with
|
|
iselIntExpr_R, the expression can have type 32, 16 or 8 bits. */
|
|
|
|
static AMD64RMI* iselIntExpr_RMI ( ISelEnv* env, IRExpr* e )
|
|
{
|
|
AMD64RMI* rmi = iselIntExpr_RMI_wrk(env, e);
|
|
/* sanity checks ... */
|
|
switch (rmi->tag) {
|
|
case Armi_Imm:
|
|
return rmi;
|
|
case Armi_Reg:
|
|
vassert(hregClass(rmi->Armi.Reg.reg) == HRcInt64);
|
|
vassert(hregIsVirtual(rmi->Armi.Reg.reg));
|
|
return rmi;
|
|
case Armi_Mem:
|
|
vassert(sane_AMode(rmi->Armi.Mem.am));
|
|
return rmi;
|
|
default:
|
|
vpanic("iselIntExpr_RMI: unknown amd64 RMI tag");
|
|
}
|
|
}
|
|
|
|
/* DO NOT CALL THIS DIRECTLY ! */
|
|
static AMD64RMI* iselIntExpr_RMI_wrk ( ISelEnv* env, IRExpr* e )
|
|
{
|
|
IRType ty = typeOfIRExpr(env->type_env,e);
|
|
vassert(ty == Ity_I64 || ty == Ity_I32
|
|
|| ty == Ity_I16 || ty == Ity_I8);
|
|
|
|
/* special case: immediate 64/32/16/8 */
|
|
if (e->tag == Iex_Const) {
|
|
switch (e->Iex.Const.con->tag) {
|
|
case Ico_U64:
|
|
if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
|
|
return AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
|
|
}
|
|
break;
|
|
case Ico_U32:
|
|
return AMD64RMI_Imm(e->Iex.Const.con->Ico.U32); break;
|
|
case Ico_U16:
|
|
return AMD64RMI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16); break;
|
|
case Ico_U8:
|
|
return AMD64RMI_Imm(0xFF & e->Iex.Const.con->Ico.U8); break;
|
|
default:
|
|
vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
|
|
}
|
|
}
|
|
|
|
/* special case: 64-bit GET */
|
|
if (e->tag == Iex_Get && ty == Ity_I64) {
|
|
return AMD64RMI_Mem(AMD64AMode_IR(e->Iex.Get.offset,
|
|
hregAMD64_RBP()));
|
|
}
|
|
|
|
/* special case: 64-bit load from memory */
|
|
if (e->tag == Iex_Load && ty == Ity_I64
|
|
&& e->Iex.Load.end == Iend_LE) {
|
|
AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
|
|
return AMD64RMI_Mem(am);
|
|
}
|
|
|
|
/* default case: calculate into a register and return that */
|
|
{
|
|
HReg r = iselIntExpr_R ( env, e );
|
|
return AMD64RMI_Reg(r);
|
|
}
|
|
}
|
|
|
|
|
|
/* --------------------- RIs --------------------- */
|
|
|
|
/* Calculate an expression into an AMD64RI operand. As with
|
|
iselIntExpr_R, the expression can have type 64, 32, 16 or 8
|
|
bits. */
|
|
|
|
static AMD64RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e )
|
|
{
|
|
AMD64RI* ri = iselIntExpr_RI_wrk(env, e);
|
|
/* sanity checks ... */
|
|
switch (ri->tag) {
|
|
case Ari_Imm:
|
|
return ri;
|
|
case Ari_Reg:
|
|
vassert(hregClass(ri->Ari.Reg.reg) == HRcInt64);
|
|
vassert(hregIsVirtual(ri->Ari.Reg.reg));
|
|
return ri;
|
|
default:
|
|
vpanic("iselIntExpr_RI: unknown amd64 RI tag");
|
|
}
|
|
}
|
|
|
|
/* DO NOT CALL THIS DIRECTLY ! */
|
|
static AMD64RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e )
|
|
{
|
|
IRType ty = typeOfIRExpr(env->type_env,e);
|
|
vassert(ty == Ity_I64 || ty == Ity_I32
|
|
|| ty == Ity_I16 || ty == Ity_I8);
|
|
|
|
/* special case: immediate */
|
|
if (e->tag == Iex_Const) {
|
|
switch (e->Iex.Const.con->tag) {
|
|
case Ico_U64:
|
|
if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
|
|
return AMD64RI_Imm(toUInt(e->Iex.Const.con->Ico.U64));
|
|
}
|
|
break;
|
|
case Ico_U32:
|
|
return AMD64RI_Imm(e->Iex.Const.con->Ico.U32);
|
|
case Ico_U16:
|
|
return AMD64RI_Imm(0xFFFF & e->Iex.Const.con->Ico.U16);
|
|
case Ico_U8:
|
|
return AMD64RI_Imm(0xFF & e->Iex.Const.con->Ico.U8);
|
|
default:
|
|
vpanic("iselIntExpr_RMI.Iex_Const(amd64)");
|
|
}
|
|
}
|
|
|
|
/* default case: calculate into a register and return that */
|
|
{
|
|
HReg r = iselIntExpr_R ( env, e );
|
|
return AMD64RI_Reg(r);
|
|
}
|
|
}
|
|
|
|
|
|
/* --------------------- RMs --------------------- */
|
|
|
|
/* Similarly, calculate an expression into an AMD64RM operand. As
|
|
with iselIntExpr_R, the expression can have type 64, 32, 16 or 8
|
|
bits. */
|
|
|
|
static AMD64RM* iselIntExpr_RM ( ISelEnv* env, IRExpr* e )
|
|
{
|
|
AMD64RM* rm = iselIntExpr_RM_wrk(env, e);
|
|
/* sanity checks ... */
|
|
switch (rm->tag) {
|
|
case Arm_Reg:
|
|
vassert(hregClass(rm->Arm.Reg.reg) == HRcInt64);
|
|
vassert(hregIsVirtual(rm->Arm.Reg.reg));
|
|
return rm;
|
|
case Arm_Mem:
|
|
vassert(sane_AMode(rm->Arm.Mem.am));
|
|
return rm;
|
|
default:
|
|
vpanic("iselIntExpr_RM: unknown amd64 RM tag");
|
|
}
|
|
}
|
|
|
|
/* DO NOT CALL THIS DIRECTLY ! */
|
|
static AMD64RM* iselIntExpr_RM_wrk ( ISelEnv* env, IRExpr* e )
|
|
{
|
|
IRType ty = typeOfIRExpr(env->type_env,e);
|
|
vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
|
|
|
|
/* special case: 64-bit GET */
|
|
if (e->tag == Iex_Get && ty == Ity_I64) {
|
|
return AMD64RM_Mem(AMD64AMode_IR(e->Iex.Get.offset,
|
|
hregAMD64_RBP()));
|
|
}
|
|
|
|
/* special case: load from memory */
|
|
|
|
/* default case: calculate into a register and return that */
|
|
{
|
|
HReg r = iselIntExpr_R ( env, e );
|
|
return AMD64RM_Reg(r);
|
|
}
|
|
}
|
|
|
|
|
|
/* --------------------- CONDCODE --------------------- */
|
|
|
|
/* Generate code to evaluated a bit-typed expression, returning the
|
|
condition code which would correspond when the expression would
|
|
notionally have returned 1. */
|
|
|
|
static AMD64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
|
|
{
|
|
/* Uh, there's nothing we can sanity check here, unfortunately. */
|
|
return iselCondCode_wrk(env,e);
|
|
}
|
|
|
|
/* DO NOT CALL THIS DIRECTLY ! */
|
|
static AMD64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
|
|
{
|
|
MatchInfo mi;
|
|
|
|
vassert(e);
|
|
vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
|
|
|
|
/* var */
|
|
if (e->tag == Iex_RdTmp) {
|
|
HReg r64 = lookupIRTemp(env, e->Iex.RdTmp.tmp);
|
|
HReg dst = newVRegI(env);
|
|
addInstr(env, mk_iMOVsd_RR(r64,dst));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(1),dst));
|
|
return Acc_NZ;
|
|
}
|
|
|
|
/* Constant 1:Bit */
|
|
if (e->tag == Iex_Const) {
|
|
HReg r;
|
|
vassert(e->Iex.Const.con->tag == Ico_U1);
|
|
vassert(e->Iex.Const.con->Ico.U1 == True
|
|
|| e->Iex.Const.con->Ico.U1 == False);
|
|
r = newVRegI(env);
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,AMD64RMI_Imm(0),r));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,AMD64RMI_Reg(r),r));
|
|
return e->Iex.Const.con->Ico.U1 ? Acc_Z : Acc_NZ;
|
|
}
|
|
|
|
/* Not1(...) */
|
|
if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
|
|
/* Generate code for the arg, and negate the test condition */
|
|
return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
|
|
}
|
|
|
|
/* --- patterns rooted at: 64to1 --- */
|
|
|
|
/* 64to1 */
|
|
if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_64to1) {
|
|
HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
addInstr(env, AMD64Instr_Test64(1,reg));
|
|
return Acc_NZ;
|
|
}
|
|
|
|
/* --- patterns rooted at: 32to1 --- */
|
|
|
|
/* 32to1 */
|
|
if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_32to1) {
|
|
HReg reg = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
addInstr(env, AMD64Instr_Test64(1,reg));
|
|
return Acc_NZ;
|
|
}
|
|
|
|
/* --- patterns rooted at: CmpNEZ8 --- */
|
|
|
|
/* CmpNEZ8(x) */
|
|
if (e->tag == Iex_Unop
|
|
&& e->Iex.Unop.op == Iop_CmpNEZ8) {
|
|
HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
addInstr(env, AMD64Instr_Test64(0xFF,r));
|
|
return Acc_NZ;
|
|
}
|
|
|
|
/* --- patterns rooted at: CmpNEZ16 --- */
|
|
|
|
/* CmpNEZ16(x) */
|
|
if (e->tag == Iex_Unop
|
|
&& e->Iex.Unop.op == Iop_CmpNEZ16) {
|
|
HReg r = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
addInstr(env, AMD64Instr_Test64(0xFFFF,r));
|
|
return Acc_NZ;
|
|
}
|
|
|
|
/* --- patterns rooted at: CmpNEZ32 --- */
|
|
|
|
/* CmpNEZ32(x) */
|
|
if (e->tag == Iex_Unop
|
|
&& e->Iex.Unop.op == Iop_CmpNEZ32) {
|
|
HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
AMD64RMI* rmi2 = AMD64RMI_Imm(0);
|
|
addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1));
|
|
return Acc_NZ;
|
|
}
|
|
|
|
/* --- patterns rooted at: CmpNEZ64 --- */
|
|
|
|
/* CmpNEZ64(Or64(x,y)) */
|
|
{
|
|
DECLARE_PATTERN(p_CmpNEZ64_Or64);
|
|
DEFINE_PATTERN(p_CmpNEZ64_Or64,
|
|
unop(Iop_CmpNEZ64, binop(Iop_Or64, bind(0), bind(1))));
|
|
if (matchIRExpr(&mi, p_CmpNEZ64_Or64, e)) {
|
|
HReg r0 = iselIntExpr_R(env, mi.bindee[0]);
|
|
AMD64RMI* rmi1 = iselIntExpr_RMI(env, mi.bindee[1]);
|
|
HReg tmp = newVRegI(env);
|
|
addInstr(env, mk_iMOVsd_RR(r0, tmp));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_OR,rmi1,tmp));
|
|
return Acc_NZ;
|
|
}
|
|
}
|
|
|
|
/* CmpNEZ64(x) */
|
|
if (e->tag == Iex_Unop
|
|
&& e->Iex.Unop.op == Iop_CmpNEZ64) {
|
|
HReg r1 = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
AMD64RMI* rmi2 = AMD64RMI_Imm(0);
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
|
|
return Acc_NZ;
|
|
}
|
|
|
|
/* --- patterns rooted at: Cmp{EQ,NE}{8,16,32} --- */
|
|
|
|
/* CmpEQ8 / CmpNE8 */
|
|
if (e->tag == Iex_Binop
|
|
&& (e->Iex.Binop.op == Iop_CmpEQ8
|
|
|| e->Iex.Binop.op == Iop_CmpNE8
|
|
|| e->Iex.Binop.op == Iop_CasCmpEQ8
|
|
|| e->Iex.Binop.op == Iop_CasCmpNE8)) {
|
|
if (isZeroU8(e->Iex.Binop.arg2)) {
|
|
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
addInstr(env, AMD64Instr_Test64(0xFF,r1));
|
|
switch (e->Iex.Binop.op) {
|
|
case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
|
|
case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
|
|
default: vpanic("iselCondCode(amd64): CmpXX8(expr,0:I8)");
|
|
}
|
|
} else {
|
|
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
|
|
HReg r = newVRegI(env);
|
|
addInstr(env, mk_iMOVsd_RR(r1,r));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFF),r));
|
|
switch (e->Iex.Binop.op) {
|
|
case Iop_CmpEQ8: case Iop_CasCmpEQ8: return Acc_Z;
|
|
case Iop_CmpNE8: case Iop_CasCmpNE8: return Acc_NZ;
|
|
default: vpanic("iselCondCode(amd64): CmpXX8(expr,expr)");
|
|
}
|
|
}
|
|
}
|
|
|
|
/* CmpEQ16 / CmpNE16 */
|
|
if (e->tag == Iex_Binop
|
|
&& (e->Iex.Binop.op == Iop_CmpEQ16
|
|
|| e->Iex.Binop.op == Iop_CmpNE16
|
|
|| e->Iex.Binop.op == Iop_CasCmpEQ16
|
|
|| e->Iex.Binop.op == Iop_CasCmpNE16)) {
|
|
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
|
|
HReg r = newVRegI(env);
|
|
addInstr(env, mk_iMOVsd_RR(r1,r));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,AMD64RMI_Imm(0xFFFF),r));
|
|
switch (e->Iex.Binop.op) {
|
|
case Iop_CmpEQ16: case Iop_CasCmpEQ16: return Acc_Z;
|
|
case Iop_CmpNE16: case Iop_CasCmpNE16: return Acc_NZ;
|
|
default: vpanic("iselCondCode(amd64): CmpXX16");
|
|
}
|
|
}
|
|
|
|
/* CmpNE64(ccall, 64-bit constant) (--smc-check=all optimisation).
|
|
Saves a "movq %rax, %tmp" compared to the default route. */
|
|
if (e->tag == Iex_Binop
|
|
&& e->Iex.Binop.op == Iop_CmpNE64
|
|
&& e->Iex.Binop.arg1->tag == Iex_CCall
|
|
&& e->Iex.Binop.arg2->tag == Iex_Const) {
|
|
IRExpr* cal = e->Iex.Binop.arg1;
|
|
IRExpr* con = e->Iex.Binop.arg2;
|
|
HReg tmp = newVRegI(env);
|
|
/* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
|
|
vassert(cal->Iex.CCall.retty == Ity_I64); /* else ill-typed IR */
|
|
vassert(con->Iex.Const.con->tag == Ico_U64);
|
|
/* Marshal args, do the call. */
|
|
UInt addToSp = 0;
|
|
RetLoc rloc = mk_RetLoc_INVALID();
|
|
doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
|
|
cal->Iex.CCall.cee,
|
|
cal->Iex.CCall.retty, cal->Iex.CCall.args );
|
|
vassert(is_sane_RetLoc(rloc));
|
|
vassert(rloc.pri == RLPri_Int);
|
|
vassert(addToSp == 0);
|
|
/* */
|
|
addInstr(env, AMD64Instr_Imm64(con->Iex.Const.con->Ico.U64, tmp));
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,
|
|
AMD64RMI_Reg(hregAMD64_RAX()), tmp));
|
|
return Acc_NZ;
|
|
}
|
|
|
|
/* Cmp*64*(x,y) */
|
|
if (e->tag == Iex_Binop
|
|
&& (e->Iex.Binop.op == Iop_CmpEQ64
|
|
|| e->Iex.Binop.op == Iop_CmpNE64
|
|
|| e->Iex.Binop.op == Iop_CmpLT64S
|
|
|| e->Iex.Binop.op == Iop_CmpLT64U
|
|
|| e->Iex.Binop.op == Iop_CmpLE64S
|
|
|| e->Iex.Binop.op == Iop_CmpLE64U
|
|
|| e->Iex.Binop.op == Iop_CasCmpEQ64
|
|
|| e->Iex.Binop.op == Iop_CasCmpNE64
|
|
|| e->Iex.Binop.op == Iop_ExpCmpNE64)) {
|
|
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,r1));
|
|
switch (e->Iex.Binop.op) {
|
|
case Iop_CmpEQ64: case Iop_CasCmpEQ64: return Acc_Z;
|
|
case Iop_CmpNE64:
|
|
case Iop_CasCmpNE64: case Iop_ExpCmpNE64: return Acc_NZ;
|
|
case Iop_CmpLT64S: return Acc_L;
|
|
case Iop_CmpLT64U: return Acc_B;
|
|
case Iop_CmpLE64S: return Acc_LE;
|
|
case Iop_CmpLE64U: return Acc_BE;
|
|
default: vpanic("iselCondCode(amd64): CmpXX64");
|
|
}
|
|
}
|
|
|
|
/* Cmp*32*(x,y) */
|
|
if (e->tag == Iex_Binop
|
|
&& (e->Iex.Binop.op == Iop_CmpEQ32
|
|
|| e->Iex.Binop.op == Iop_CmpNE32
|
|
|| e->Iex.Binop.op == Iop_CmpLT32S
|
|
|| e->Iex.Binop.op == Iop_CmpLT32U
|
|
|| e->Iex.Binop.op == Iop_CmpLE32S
|
|
|| e->Iex.Binop.op == Iop_CmpLE32U
|
|
|| e->Iex.Binop.op == Iop_CasCmpEQ32
|
|
|| e->Iex.Binop.op == Iop_CasCmpNE32
|
|
|| e->Iex.Binop.op == Iop_ExpCmpNE32)) {
|
|
HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
|
|
addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1));
|
|
switch (e->Iex.Binop.op) {
|
|
case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Acc_Z;
|
|
case Iop_CmpNE32:
|
|
case Iop_CasCmpNE32: case Iop_ExpCmpNE32: return Acc_NZ;
|
|
case Iop_CmpLT32S: return Acc_L;
|
|
case Iop_CmpLT32U: return Acc_B;
|
|
case Iop_CmpLE32S: return Acc_LE;
|
|
case Iop_CmpLE32U: return Acc_BE;
|
|
default: vpanic("iselCondCode(amd64): CmpXX32");
|
|
}
|
|
}
|
|
|
|
ppIRExpr(e);
|
|
vpanic("iselCondCode(amd64)");
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------------*/
|
|
/*--- ISEL: Integer expressions (128 bit) ---*/
|
|
/*---------------------------------------------------------*/
|
|
|
|
/* Compute a 128-bit value into a register pair, which is returned as
|
|
the first two parameters. As with iselIntExpr_R, these may be
|
|
either real or virtual regs; in any case they must not be changed
|
|
by subsequent code emitted by the caller. */
|
|
|
|
static void iselInt128Expr ( HReg* rHi, HReg* rLo,
|
|
ISelEnv* env, IRExpr* e )
|
|
{
|
|
iselInt128Expr_wrk(rHi, rLo, env, e);
|
|
# if 0
|
|
vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
|
|
# endif
|
|
vassert(hregClass(*rHi) == HRcInt64);
|
|
vassert(hregIsVirtual(*rHi));
|
|
vassert(hregClass(*rLo) == HRcInt64);
|
|
vassert(hregIsVirtual(*rLo));
|
|
}
|
|
|
|
/* DO NOT CALL THIS DIRECTLY ! */
|
|
static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
|
|
ISelEnv* env, IRExpr* e )
|
|
{
|
|
vassert(e);
|
|
vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
|
|
|
|
/* read 128-bit IRTemp */
|
|
if (e->tag == Iex_RdTmp) {
|
|
lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
|
|
return;
|
|
}
|
|
|
|
/* --------- BINARY ops --------- */
|
|
if (e->tag == Iex_Binop) {
|
|
switch (e->Iex.Binop.op) {
|
|
/* 64 x 64 -> 128 multiply */
|
|
case Iop_MullU64:
|
|
case Iop_MullS64: {
|
|
/* get one operand into %rax, and the other into a R/M.
|
|
Need to make an educated guess about which is better in
|
|
which. */
|
|
HReg tLo = newVRegI(env);
|
|
HReg tHi = newVRegI(env);
|
|
Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
|
|
AMD64RM* rmLeft = iselIntExpr_RM(env, e->Iex.Binop.arg1);
|
|
HReg rRight = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
addInstr(env, mk_iMOVsd_RR(rRight, hregAMD64_RAX()));
|
|
addInstr(env, AMD64Instr_MulL(syned, rmLeft));
|
|
/* Result is now in RDX:RAX. Tell the caller. */
|
|
addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
|
|
addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
|
|
*rHi = tHi;
|
|
*rLo = tLo;
|
|
return;
|
|
}
|
|
|
|
/* 128 x 64 -> (64(rem),64(div)) division */
|
|
case Iop_DivModU128to64:
|
|
case Iop_DivModS128to64: {
|
|
/* Get the 128-bit operand into rdx:rax, and the other into
|
|
any old R/M. */
|
|
HReg sHi, sLo;
|
|
HReg tLo = newVRegI(env);
|
|
HReg tHi = newVRegI(env);
|
|
Bool syned = toBool(e->Iex.Binop.op == Iop_DivModS128to64);
|
|
AMD64RM* rmRight = iselIntExpr_RM(env, e->Iex.Binop.arg2);
|
|
iselInt128Expr(&sHi,&sLo, env, e->Iex.Binop.arg1);
|
|
addInstr(env, mk_iMOVsd_RR(sHi, hregAMD64_RDX()));
|
|
addInstr(env, mk_iMOVsd_RR(sLo, hregAMD64_RAX()));
|
|
addInstr(env, AMD64Instr_Div(syned, 8, rmRight));
|
|
addInstr(env, mk_iMOVsd_RR(hregAMD64_RDX(), tHi));
|
|
addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(), tLo));
|
|
*rHi = tHi;
|
|
*rLo = tLo;
|
|
return;
|
|
}
|
|
|
|
/* 64HLto128(e1,e2) */
|
|
case Iop_64HLto128:
|
|
*rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
*rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
return;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
} /* if (e->tag == Iex_Binop) */
|
|
|
|
ppIRExpr(e);
|
|
vpanic("iselInt128Expr");
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------------*/
|
|
/*--- ISEL: Floating point expressions (32 bit) ---*/
|
|
/*---------------------------------------------------------*/
|
|
|
|
/* Nothing interesting here; really just wrappers for
|
|
64-bit stuff. */
|
|
|
|
static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
|
|
{
|
|
HReg r = iselFltExpr_wrk( env, e );
|
|
# if 0
|
|
vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
|
|
# endif
|
|
vassert(hregClass(r) == HRcVec128);
|
|
vassert(hregIsVirtual(r));
|
|
return r;
|
|
}
|
|
|
|
/* DO NOT CALL THIS DIRECTLY */
|
|
static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
|
|
{
|
|
IRType ty = typeOfIRExpr(env->type_env,e);
|
|
vassert(ty == Ity_F32);
|
|
|
|
if (e->tag == Iex_RdTmp) {
|
|
return lookupIRTemp(env, e->Iex.RdTmp.tmp);
|
|
}
|
|
|
|
if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
|
|
AMD64AMode* am;
|
|
HReg res = newVRegV(env);
|
|
vassert(e->Iex.Load.ty == Ity_F32);
|
|
am = iselIntExpr_AMode(env, e->Iex.Load.addr);
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, res, am));
|
|
return res;
|
|
}
|
|
|
|
if (e->tag == Iex_Binop
|
|
&& e->Iex.Binop.op == Iop_F64toF32) {
|
|
/* Although the result is still held in a standard SSE register,
|
|
we need to round it to reflect the loss of accuracy/range
|
|
entailed in casting it to a 32-bit float. */
|
|
HReg dst = newVRegV(env);
|
|
HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
|
|
set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
|
|
addInstr(env, AMD64Instr_SseSDSS(True/*D->S*/,src,dst));
|
|
set_SSE_rounding_default( env );
|
|
return dst;
|
|
}
|
|
|
|
if (e->tag == Iex_Get) {
|
|
AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
|
|
hregAMD64_RBP() );
|
|
HReg res = newVRegV(env);
|
|
addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, res, am ));
|
|
return res;
|
|
}
|
|
|
|
if (e->tag == Iex_Unop
|
|
&& e->Iex.Unop.op == Iop_ReinterpI32asF32) {
|
|
/* Given an I32, produce an IEEE754 float with the same bit
|
|
pattern. */
|
|
HReg dst = newVRegV(env);
|
|
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
AMD64AMode* m4_rsp = AMD64AMode_IR(-4, hregAMD64_RSP());
|
|
addInstr(env, AMD64Instr_Store(4, src, m4_rsp));
|
|
addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 4, dst, m4_rsp ));
|
|
return dst;
|
|
}
|
|
|
|
if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
|
|
AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
|
|
HReg arg = iselFltExpr(env, e->Iex.Binop.arg2);
|
|
HReg dst = newVRegV(env);
|
|
|
|
/* rf now holds the value to be rounded. The first thing to do
|
|
is set the FPU's rounding mode accordingly. */
|
|
|
|
/* Set host x87 rounding mode */
|
|
set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
|
|
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, arg, m8_rsp));
|
|
addInstr(env, AMD64Instr_A87Free(1));
|
|
addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 4));
|
|
addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
|
|
addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 4));
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, dst, m8_rsp));
|
|
|
|
/* Restore default x87 rounding. */
|
|
set_FPU_rounding_default( env );
|
|
|
|
return dst;
|
|
}
|
|
|
|
if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_NegF32) {
|
|
/* Sigh ... very rough code. Could do much better. */
|
|
/* Get the 128-bit literal 00---0 10---0 into a register
|
|
and xor it with the value to be negated. */
|
|
HReg r1 = newVRegI(env);
|
|
HReg dst = newVRegV(env);
|
|
HReg tmp = newVRegV(env);
|
|
HReg src = iselFltExpr(env, e->Iex.Unop.arg);
|
|
AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
|
|
addInstr(env, mk_vMOVsd_RR(src,tmp));
|
|
addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
|
|
addInstr(env, AMD64Instr_Imm64( 1ULL<<31, r1 ));
|
|
addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
|
|
addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
|
|
addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
|
|
add_to_rsp(env, 16);
|
|
return dst;
|
|
}
|
|
|
|
if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_MAddF32) {
|
|
IRQop *qop = e->Iex.Qop.details;
|
|
HReg dst = newVRegV(env);
|
|
HReg argX = iselFltExpr(env, qop->arg2);
|
|
HReg argY = iselFltExpr(env, qop->arg3);
|
|
HReg argZ = iselFltExpr(env, qop->arg4);
|
|
/* XXXROUNDINGFIXME */
|
|
/* set roundingmode here */
|
|
/* subq $16, %rsp -- make a space*/
|
|
sub_from_rsp(env, 16);
|
|
/* Prepare 4 arg regs:
|
|
leaq 0(%rsp), %rdi
|
|
leaq 4(%rsp), %rsi
|
|
leaq 8(%rsp), %rdx
|
|
leaq 12(%rsp), %rcx
|
|
*/
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, hregAMD64_RSP()),
|
|
hregAMD64_RDI()));
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(4, hregAMD64_RSP()),
|
|
hregAMD64_RSI()));
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(8, hregAMD64_RSP()),
|
|
hregAMD64_RDX()));
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(12, hregAMD64_RSP()),
|
|
hregAMD64_RCX()));
|
|
/* Store the three args, at (%rsi), (%rdx) and (%rcx):
|
|
movss %argX, 0(%rsi)
|
|
movss %argY, 0(%rdx)
|
|
movss %argZ, 0(%rcx)
|
|
*/
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argX,
|
|
AMD64AMode_IR(0, hregAMD64_RSI())));
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argY,
|
|
AMD64AMode_IR(0, hregAMD64_RDX())));
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 4, argZ,
|
|
AMD64AMode_IR(0, hregAMD64_RCX())));
|
|
/* call the helper */
|
|
addInstr(env, AMD64Instr_Call( Acc_ALWAYS,
|
|
(ULong)(HWord)h_generic_calc_MAddF32,
|
|
4, mk_RetLoc_simple(RLPri_None) ));
|
|
/* fetch the result from memory, using %r_argp, which the
|
|
register allocator will keep alive across the call. */
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 4, dst,
|
|
AMD64AMode_IR(0, hregAMD64_RSP())));
|
|
/* and finally, clear the space */
|
|
add_to_rsp(env, 16);
|
|
return dst;
|
|
}
|
|
|
|
ppIRExpr(e);
|
|
vpanic("iselFltExpr_wrk");
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------------*/
|
|
/*--- ISEL: Floating point expressions (64 bit) ---*/
|
|
/*---------------------------------------------------------*/
|
|
|
|
/* Compute a 64-bit floating point value into the lower half of an xmm
|
|
register, the identity of which is returned. As with
|
|
iselIntExpr_R, the returned reg will be virtual, and it must not be
|
|
changed by subsequent code emitted by the caller.
|
|
*/
|
|
|
|
/* IEEE 754 formats. From http://www.freesoft.org/CIE/RFC/1832/32.htm:
|
|
|
|
Type S (1 bit) E (11 bits) F (52 bits)
|
|
---- --------- ----------- -----------
|
|
signalling NaN u 2047 (max) .0uuuuu---u
|
|
(with at least
|
|
one 1 bit)
|
|
quiet NaN u 2047 (max) .1uuuuu---u
|
|
|
|
negative infinity 1 2047 (max) .000000---0
|
|
|
|
positive infinity 0 2047 (max) .000000---0
|
|
|
|
negative zero 1 0 .000000---0
|
|
|
|
positive zero 0 0 .000000---0
|
|
*/
|
|
|
|
static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
|
|
{
|
|
HReg r = iselDblExpr_wrk( env, e );
|
|
# if 0
|
|
vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
|
|
# endif
|
|
vassert(hregClass(r) == HRcVec128);
|
|
vassert(hregIsVirtual(r));
|
|
return r;
|
|
}
|
|
|
|
/* DO NOT CALL THIS DIRECTLY */
|
|
static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
|
|
{
|
|
IRType ty = typeOfIRExpr(env->type_env,e);
|
|
vassert(e);
|
|
vassert(ty == Ity_F64);
|
|
|
|
if (e->tag == Iex_RdTmp) {
|
|
return lookupIRTemp(env, e->Iex.RdTmp.tmp);
|
|
}
|
|
|
|
if (e->tag == Iex_Const) {
|
|
union { ULong u64; Double f64; } u;
|
|
HReg res = newVRegV(env);
|
|
HReg tmp = newVRegI(env);
|
|
vassert(sizeof(u) == 8);
|
|
vassert(sizeof(u.u64) == 8);
|
|
vassert(sizeof(u.f64) == 8);
|
|
|
|
if (e->Iex.Const.con->tag == Ico_F64) {
|
|
u.f64 = e->Iex.Const.con->Ico.F64;
|
|
}
|
|
else if (e->Iex.Const.con->tag == Ico_F64i) {
|
|
u.u64 = e->Iex.Const.con->Ico.F64i;
|
|
}
|
|
else
|
|
vpanic("iselDblExpr(amd64): const");
|
|
|
|
addInstr(env, AMD64Instr_Imm64(u.u64, tmp));
|
|
addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(tmp)));
|
|
addInstr(env, AMD64Instr_SseLdSt(
|
|
True/*load*/, 8, res,
|
|
AMD64AMode_IR(0, hregAMD64_RSP())
|
|
));
|
|
add_to_rsp(env, 8);
|
|
return res;
|
|
}
|
|
|
|
if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
|
|
AMD64AMode* am;
|
|
HReg res = newVRegV(env);
|
|
vassert(e->Iex.Load.ty == Ity_F64);
|
|
am = iselIntExpr_AMode(env, e->Iex.Load.addr);
|
|
addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
|
|
return res;
|
|
}
|
|
|
|
if (e->tag == Iex_Get) {
|
|
AMD64AMode* am = AMD64AMode_IR( e->Iex.Get.offset,
|
|
hregAMD64_RBP() );
|
|
HReg res = newVRegV(env);
|
|
addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
|
|
return res;
|
|
}
|
|
|
|
if (e->tag == Iex_GetI) {
|
|
AMD64AMode* am
|
|
= genGuestArrayOffset(
|
|
env, e->Iex.GetI.descr,
|
|
e->Iex.GetI.ix, e->Iex.GetI.bias );
|
|
HReg res = newVRegV(env);
|
|
addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 8, res, am ));
|
|
return res;
|
|
}
|
|
|
|
if (e->tag == Iex_Triop) {
|
|
IRTriop *triop = e->Iex.Triop.details;
|
|
AMD64SseOp op = Asse_INVALID;
|
|
switch (triop->op) {
|
|
case Iop_AddF64: op = Asse_ADDF; break;
|
|
case Iop_SubF64: op = Asse_SUBF; break;
|
|
case Iop_MulF64: op = Asse_MULF; break;
|
|
case Iop_DivF64: op = Asse_DIVF; break;
|
|
default: break;
|
|
}
|
|
if (op != Asse_INVALID) {
|
|
HReg dst = newVRegV(env);
|
|
HReg argL = iselDblExpr(env, triop->arg2);
|
|
HReg argR = iselDblExpr(env, triop->arg3);
|
|
addInstr(env, mk_vMOVsd_RR(argL, dst));
|
|
/* XXXROUNDINGFIXME */
|
|
/* set roundingmode here */
|
|
addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
|
|
return dst;
|
|
}
|
|
}
|
|
|
|
if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_MAddF64) {
|
|
IRQop *qop = e->Iex.Qop.details;
|
|
HReg dst = newVRegV(env);
|
|
HReg argX = iselDblExpr(env, qop->arg2);
|
|
HReg argY = iselDblExpr(env, qop->arg3);
|
|
HReg argZ = iselDblExpr(env, qop->arg4);
|
|
/* XXXROUNDINGFIXME */
|
|
/* set roundingmode here */
|
|
/* subq $32, %rsp -- make a space*/
|
|
sub_from_rsp(env, 32);
|
|
/* Prepare 4 arg regs:
|
|
leaq 0(%rsp), %rdi
|
|
leaq 8(%rsp), %rsi
|
|
leaq 16(%rsp), %rdx
|
|
leaq 24(%rsp), %rcx
|
|
*/
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, hregAMD64_RSP()),
|
|
hregAMD64_RDI()));
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(8, hregAMD64_RSP()),
|
|
hregAMD64_RSI()));
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, hregAMD64_RSP()),
|
|
hregAMD64_RDX()));
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(24, hregAMD64_RSP()),
|
|
hregAMD64_RCX()));
|
|
/* Store the three args, at (%rsi), (%rdx) and (%rcx):
|
|
movsd %argX, 0(%rsi)
|
|
movsd %argY, 0(%rdx)
|
|
movsd %argZ, 0(%rcx)
|
|
*/
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argX,
|
|
AMD64AMode_IR(0, hregAMD64_RSI())));
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argY,
|
|
AMD64AMode_IR(0, hregAMD64_RDX())));
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 8, argZ,
|
|
AMD64AMode_IR(0, hregAMD64_RCX())));
|
|
/* call the helper */
|
|
addInstr(env, AMD64Instr_Call( Acc_ALWAYS,
|
|
(ULong)(HWord)h_generic_calc_MAddF64,
|
|
4, mk_RetLoc_simple(RLPri_None) ));
|
|
/* fetch the result from memory, using %r_argp, which the
|
|
register allocator will keep alive across the call. */
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 8, dst,
|
|
AMD64AMode_IR(0, hregAMD64_RSP())));
|
|
/* and finally, clear the space */
|
|
add_to_rsp(env, 32);
|
|
return dst;
|
|
}
|
|
|
|
if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64toInt) {
|
|
AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
|
|
HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
|
|
HReg dst = newVRegV(env);
|
|
|
|
/* rf now holds the value to be rounded. The first thing to do
|
|
is set the FPU's rounding mode accordingly. */
|
|
|
|
/* Set host x87 rounding mode */
|
|
set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
|
|
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
|
|
addInstr(env, AMD64Instr_A87Free(1));
|
|
addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
|
|
addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
|
|
addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
|
|
|
|
/* Restore default x87 rounding. */
|
|
set_FPU_rounding_default( env );
|
|
|
|
return dst;
|
|
}
|
|
|
|
IRTriop *triop = e->Iex.Triop.details;
|
|
if (e->tag == Iex_Triop
|
|
&& (triop->op == Iop_ScaleF64
|
|
|| triop->op == Iop_AtanF64
|
|
|| triop->op == Iop_Yl2xF64
|
|
|| triop->op == Iop_Yl2xp1F64
|
|
|| triop->op == Iop_PRemF64
|
|
|| triop->op == Iop_PRem1F64)
|
|
) {
|
|
AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
|
|
HReg arg1 = iselDblExpr(env, triop->arg2);
|
|
HReg arg2 = iselDblExpr(env, triop->arg3);
|
|
HReg dst = newVRegV(env);
|
|
Bool arg2first = toBool(triop->op == Iop_ScaleF64
|
|
|| triop->op == Iop_PRemF64
|
|
|| triop->op == Iop_PRem1F64);
|
|
addInstr(env, AMD64Instr_A87Free(2));
|
|
|
|
/* one arg -> top of x87 stack */
|
|
addInstr(env, AMD64Instr_SseLdSt(
|
|
False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp));
|
|
addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
|
|
|
|
/* other arg -> top of x87 stack */
|
|
addInstr(env, AMD64Instr_SseLdSt(
|
|
False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp));
|
|
addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
|
|
|
|
/* do it */
|
|
/* XXXROUNDINGFIXME */
|
|
/* set roundingmode here */
|
|
switch (triop->op) {
|
|
case Iop_ScaleF64:
|
|
addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE));
|
|
break;
|
|
case Iop_AtanF64:
|
|
addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN));
|
|
break;
|
|
case Iop_Yl2xF64:
|
|
addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X));
|
|
break;
|
|
case Iop_Yl2xp1F64:
|
|
addInstr(env, AMD64Instr_A87FpOp(Afp_YL2XP1));
|
|
break;
|
|
case Iop_PRemF64:
|
|
addInstr(env, AMD64Instr_A87FpOp(Afp_PREM));
|
|
break;
|
|
case Iop_PRem1F64:
|
|
addInstr(env, AMD64Instr_A87FpOp(Afp_PREM1));
|
|
break;
|
|
default:
|
|
vassert(0);
|
|
}
|
|
|
|
/* save result */
|
|
addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
|
|
return dst;
|
|
}
|
|
|
|
if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64StoF64) {
|
|
HReg dst = newVRegV(env);
|
|
HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
|
|
addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst ));
|
|
set_SSE_rounding_default( env );
|
|
return dst;
|
|
}
|
|
|
|
if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32StoF64) {
|
|
HReg dst = newVRegV(env);
|
|
HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
set_SSE_rounding_default( env );
|
|
addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst ));
|
|
return dst;
|
|
}
|
|
|
|
if (e->tag == Iex_Unop
|
|
&& (e->Iex.Unop.op == Iop_NegF64
|
|
|| e->Iex.Unop.op == Iop_AbsF64)) {
|
|
/* Sigh ... very rough code. Could do much better. */
|
|
/* Get the 128-bit literal 00---0 10---0 into a register
|
|
and xor/nand it with the value to be negated. */
|
|
HReg r1 = newVRegI(env);
|
|
HReg dst = newVRegV(env);
|
|
HReg tmp = newVRegV(env);
|
|
HReg src = iselDblExpr(env, e->Iex.Unop.arg);
|
|
AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
|
|
addInstr(env, mk_vMOVsd_RR(src,tmp));
|
|
addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
|
|
addInstr(env, AMD64Instr_Imm64( 1ULL<<63, r1 ));
|
|
addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(r1)));
|
|
addInstr(env, AMD64Instr_SseLdSt(True, 16, dst, rsp0));
|
|
|
|
if (e->Iex.Unop.op == Iop_NegF64)
|
|
addInstr(env, AMD64Instr_SseReRg(Asse_XOR, tmp, dst));
|
|
else
|
|
addInstr(env, AMD64Instr_SseReRg(Asse_ANDN, tmp, dst));
|
|
|
|
add_to_rsp(env, 16);
|
|
return dst;
|
|
}
|
|
|
|
if (e->tag == Iex_Binop) {
|
|
A87FpOp fpop = Afp_INVALID;
|
|
switch (e->Iex.Binop.op) {
|
|
case Iop_SqrtF64: fpop = Afp_SQRT; break;
|
|
case Iop_SinF64: fpop = Afp_SIN; break;
|
|
case Iop_CosF64: fpop = Afp_COS; break;
|
|
case Iop_TanF64: fpop = Afp_TAN; break;
|
|
case Iop_2xm1F64: fpop = Afp_2XM1; break;
|
|
default: break;
|
|
}
|
|
if (fpop != Afp_INVALID) {
|
|
AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
|
|
HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
|
|
HReg dst = newVRegV(env);
|
|
Int nNeeded = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1;
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
|
|
addInstr(env, AMD64Instr_A87Free(nNeeded));
|
|
addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
|
|
/* XXXROUNDINGFIXME */
|
|
/* set roundingmode here */
|
|
/* Note that AMD64Instr_A87FpOp(Afp_TAN) sets the condition
|
|
codes. I don't think that matters, since this insn
|
|
selector never generates such an instruction intervening
|
|
between an flag-setting instruction and a flag-using
|
|
instruction. */
|
|
addInstr(env, AMD64Instr_A87FpOp(fpop));
|
|
addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
|
|
return dst;
|
|
}
|
|
}
|
|
|
|
if (e->tag == Iex_Unop) {
|
|
switch (e->Iex.Unop.op) {
|
|
//.. case Iop_I32toF64: {
|
|
//.. HReg dst = newVRegF(env);
|
|
//.. HReg ri = iselIntExpr_R(env, e->Iex.Unop.arg);
|
|
//.. addInstr(env, X86Instr_Push(X86RMI_Reg(ri)));
|
|
//.. set_FPU_rounding_default(env);
|
|
//.. addInstr(env, X86Instr_FpLdStI(
|
|
//.. True/*load*/, 4, dst,
|
|
//.. X86AMode_IR(0, hregX86_ESP())));
|
|
//.. add_to_esp(env, 4);
|
|
//.. return dst;
|
|
//.. }
|
|
case Iop_ReinterpI64asF64: {
|
|
/* Given an I64, produce an IEEE754 double with the same
|
|
bit pattern. */
|
|
AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
|
|
HReg dst = newVRegV(env);
|
|
AMD64RI* src = iselIntExpr_RI(env, e->Iex.Unop.arg);
|
|
/* paranoia */
|
|
set_SSE_rounding_default(env);
|
|
addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, src, m8_rsp));
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
|
|
return dst;
|
|
}
|
|
case Iop_F32toF64: {
|
|
HReg f32;
|
|
HReg f64 = newVRegV(env);
|
|
/* this shouldn't be necessary, but be paranoid ... */
|
|
set_SSE_rounding_default(env);
|
|
f32 = iselFltExpr(env, e->Iex.Unop.arg);
|
|
addInstr(env, AMD64Instr_SseSDSS(False/*S->D*/, f32, f64));
|
|
return f64;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* --------- MULTIPLEX --------- */
|
|
if (e->tag == Iex_ITE) { // VFD
|
|
HReg r1, r0, dst;
|
|
vassert(ty == Ity_F64);
|
|
vassert(typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1);
|
|
r1 = iselDblExpr(env, e->Iex.ITE.iftrue);
|
|
r0 = iselDblExpr(env, e->Iex.ITE.iffalse);
|
|
dst = newVRegV(env);
|
|
addInstr(env, mk_vMOVsd_RR(r1,dst));
|
|
AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
|
|
addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0, dst));
|
|
return dst;
|
|
}
|
|
|
|
ppIRExpr(e);
|
|
vpanic("iselDblExpr_wrk");
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------------*/
|
|
/*--- ISEL: SIMD (Vector) expressions, 128 bit. ---*/
|
|
/*---------------------------------------------------------*/
|
|
|
|
static HReg iselVecExpr ( ISelEnv* env, IRExpr* e )
|
|
{
|
|
HReg r = iselVecExpr_wrk( env, e );
|
|
# if 0
|
|
vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
|
|
# endif
|
|
vassert(hregClass(r) == HRcVec128);
|
|
vassert(hregIsVirtual(r));
|
|
return r;
|
|
}
|
|
|
|
|
|
/* DO NOT CALL THIS DIRECTLY */
|
|
static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
|
|
{
|
|
HWord fn = 0; /* address of helper fn, if required */
|
|
Bool arg1isEReg = False;
|
|
AMD64SseOp op = Asse_INVALID;
|
|
IRType ty = typeOfIRExpr(env->type_env,e);
|
|
vassert(e);
|
|
vassert(ty == Ity_V128);
|
|
|
|
if (e->tag == Iex_RdTmp) {
|
|
return lookupIRTemp(env, e->Iex.RdTmp.tmp);
|
|
}
|
|
|
|
if (e->tag == Iex_Get) {
|
|
HReg dst = newVRegV(env);
|
|
addInstr(env, AMD64Instr_SseLdSt(
|
|
True/*load*/,
|
|
16,
|
|
dst,
|
|
AMD64AMode_IR(e->Iex.Get.offset, hregAMD64_RBP())
|
|
)
|
|
);
|
|
return dst;
|
|
}
|
|
|
|
if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
|
|
HReg dst = newVRegV(env);
|
|
AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.Load.addr);
|
|
addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
|
|
return dst;
|
|
}
|
|
|
|
if (e->tag == Iex_Const) {
|
|
HReg dst = newVRegV(env);
|
|
vassert(e->Iex.Const.con->tag == Ico_V128);
|
|
switch (e->Iex.Const.con->Ico.V128) {
|
|
case 0x0000:
|
|
dst = generate_zeroes_V128(env);
|
|
break;
|
|
case 0xFFFF:
|
|
dst = generate_ones_V128(env);
|
|
break;
|
|
default: {
|
|
AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
|
|
/* do push_uimm64 twice, first time for the high-order half. */
|
|
push_uimm64(env, bitmask8_to_bytemask64(
|
|
(e->Iex.Const.con->Ico.V128 >> 8) & 0xFF
|
|
));
|
|
push_uimm64(env, bitmask8_to_bytemask64(
|
|
(e->Iex.Const.con->Ico.V128 >> 0) & 0xFF
|
|
));
|
|
addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 ));
|
|
add_to_rsp(env, 16);
|
|
break;
|
|
}
|
|
}
|
|
return dst;
|
|
}
|
|
|
|
if (e->tag == Iex_Unop) {
|
|
switch (e->Iex.Unop.op) {
|
|
|
|
case Iop_NotV128: {
|
|
HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
|
|
return do_sse_NotV128(env, arg);
|
|
}
|
|
|
|
case Iop_CmpNEZ64x2: {
|
|
/* We can use SSE2 instructions for this. */
|
|
/* Ideally, we want to do a 64Ix2 comparison against zero of
|
|
the operand. Problem is no such insn exists. Solution
|
|
therefore is to do a 32Ix4 comparison instead, and bitwise-
|
|
negate (NOT) the result. Let a,b,c,d be 32-bit lanes, and
|
|
let the not'd result of this initial comparison be a:b:c:d.
|
|
What we need to compute is (a|b):(a|b):(c|d):(c|d). So, use
|
|
pshufd to create a value b:a:d:c, and OR that with a:b:c:d,
|
|
giving the required result.
|
|
|
|
The required selection sequence is 2,3,0,1, which
|
|
according to Intel's documentation means the pshufd
|
|
literal value is 0xB1, that is,
|
|
(2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)
|
|
*/
|
|
HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
|
|
HReg tmp = generate_zeroes_V128(env);
|
|
HReg dst = newVRegV(env);
|
|
addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, arg, tmp));
|
|
tmp = do_sse_NotV128(env, tmp);
|
|
addInstr(env, AMD64Instr_SseShuf(0xB1, tmp, dst));
|
|
addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmp, dst));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_CmpNEZ32x4: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
|
|
case Iop_CmpNEZ16x8: op = Asse_CMPEQ16; goto do_CmpNEZ_vector;
|
|
case Iop_CmpNEZ8x16: op = Asse_CMPEQ8; goto do_CmpNEZ_vector;
|
|
do_CmpNEZ_vector:
|
|
{
|
|
HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
|
|
HReg tmp = newVRegV(env);
|
|
HReg zero = generate_zeroes_V128(env);
|
|
HReg dst;
|
|
addInstr(env, mk_vMOVsd_RR(arg, tmp));
|
|
addInstr(env, AMD64Instr_SseReRg(op, zero, tmp));
|
|
dst = do_sse_NotV128(env, tmp);
|
|
return dst;
|
|
}
|
|
|
|
case Iop_RecipEst32Fx4: op = Asse_RCPF; goto do_32Fx4_unary;
|
|
case Iop_RSqrtEst32Fx4: op = Asse_RSQRTF; goto do_32Fx4_unary;
|
|
do_32Fx4_unary:
|
|
{
|
|
HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
|
|
HReg dst = newVRegV(env);
|
|
addInstr(env, AMD64Instr_Sse32Fx4(op, arg, dst));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_RecipEst32F0x4: op = Asse_RCPF; goto do_32F0x4_unary;
|
|
case Iop_RSqrtEst32F0x4: op = Asse_RSQRTF; goto do_32F0x4_unary;
|
|
case Iop_Sqrt32F0x4: op = Asse_SQRTF; goto do_32F0x4_unary;
|
|
do_32F0x4_unary:
|
|
{
|
|
/* A bit subtle. We have to copy the arg to the result
|
|
register first, because actually doing the SSE scalar insn
|
|
leaves the upper 3/4 of the destination register
|
|
unchanged. Whereas the required semantics of these
|
|
primops is that the upper 3/4 is simply copied in from the
|
|
argument. */
|
|
HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
|
|
HReg dst = newVRegV(env);
|
|
addInstr(env, mk_vMOVsd_RR(arg, dst));
|
|
addInstr(env, AMD64Instr_Sse32FLo(op, arg, dst));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_Sqrt64F0x2: op = Asse_SQRTF; goto do_64F0x2_unary;
|
|
do_64F0x2_unary:
|
|
{
|
|
/* A bit subtle. We have to copy the arg to the result
|
|
register first, because actually doing the SSE scalar insn
|
|
leaves the upper half of the destination register
|
|
unchanged. Whereas the required semantics of these
|
|
primops is that the upper half is simply copied in from the
|
|
argument. */
|
|
HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
|
|
HReg dst = newVRegV(env);
|
|
addInstr(env, mk_vMOVsd_RR(arg, dst));
|
|
addInstr(env, AMD64Instr_Sse64FLo(op, arg, dst));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_32UtoV128: {
|
|
HReg dst = newVRegV(env);
|
|
AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP());
|
|
AMD64RI* ri = iselIntExpr_RI(env, e->Iex.Unop.arg);
|
|
addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, ri, rsp_m32));
|
|
addInstr(env, AMD64Instr_SseLdzLO(4, dst, rsp_m32));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_64UtoV128: {
|
|
HReg dst = newVRegV(env);
|
|
AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
|
|
AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
|
|
addInstr(env, AMD64Instr_Push(rmi));
|
|
addInstr(env, AMD64Instr_SseLdzLO(8, dst, rsp0));
|
|
add_to_rsp(env, 8);
|
|
return dst;
|
|
}
|
|
|
|
case Iop_V256toV128_0:
|
|
case Iop_V256toV128_1: {
|
|
HReg vHi, vLo;
|
|
iselDVecExpr(&vHi, &vLo, env, e->Iex.Unop.arg);
|
|
return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
} /* switch (e->Iex.Unop.op) */
|
|
} /* if (e->tag == Iex_Unop) */
|
|
|
|
if (e->tag == Iex_Binop) {
|
|
switch (e->Iex.Binop.op) {
|
|
|
|
case Iop_Sqrt64Fx2:
|
|
case Iop_Sqrt32Fx4: {
|
|
/* :: (rmode, vec) -> vec */
|
|
HReg arg = iselVecExpr(env, e->Iex.Binop.arg2);
|
|
HReg dst = newVRegV(env);
|
|
/* XXXROUNDINGFIXME */
|
|
/* set roundingmode here */
|
|
addInstr(env, (e->Iex.Binop.op == Iop_Sqrt64Fx2
|
|
? AMD64Instr_Sse64Fx2 : AMD64Instr_Sse32Fx4)
|
|
(Asse_SQRTF, arg, dst));
|
|
return dst;
|
|
}
|
|
|
|
/* FIXME: could we generate MOVQ here? */
|
|
case Iop_SetV128lo64: {
|
|
HReg dst = newVRegV(env);
|
|
HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
|
|
HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
|
|
addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, AMD64RI_Reg(srcI), rsp_m16));
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
|
|
return dst;
|
|
}
|
|
|
|
/* FIXME: could we generate MOVD here? */
|
|
case Iop_SetV128lo32: {
|
|
HReg dst = newVRegV(env);
|
|
HReg srcV = iselVecExpr(env, e->Iex.Binop.arg1);
|
|
HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
AMD64AMode* rsp_m16 = AMD64AMode_IR(-16, hregAMD64_RSP());
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, srcV, rsp_m16));
|
|
addInstr(env, AMD64Instr_Store(4, srcI, rsp_m16));
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp_m16));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_64HLtoV128: {
|
|
HReg rsp = hregAMD64_RSP();
|
|
AMD64AMode* m8_rsp = AMD64AMode_IR(-8, rsp);
|
|
AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
|
|
AMD64RI* qHi = iselIntExpr_RI(env, e->Iex.Binop.arg1);
|
|
AMD64RI* qLo = iselIntExpr_RI(env, e->Iex.Binop.arg2);
|
|
addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, qHi, m8_rsp));
|
|
addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, qLo, m16_rsp));
|
|
HReg dst = newVRegV(env);
|
|
/* One store-forwarding stall coming up, oh well :-( */
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, m16_rsp));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_CmpEQ32Fx4: op = Asse_CMPEQF; goto do_32Fx4;
|
|
case Iop_CmpLT32Fx4: op = Asse_CMPLTF; goto do_32Fx4;
|
|
case Iop_CmpLE32Fx4: op = Asse_CMPLEF; goto do_32Fx4;
|
|
case Iop_CmpUN32Fx4: op = Asse_CMPUNF; goto do_32Fx4;
|
|
case Iop_Max32Fx4: op = Asse_MAXF; goto do_32Fx4;
|
|
case Iop_Min32Fx4: op = Asse_MINF; goto do_32Fx4;
|
|
do_32Fx4:
|
|
{
|
|
HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
|
|
HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
|
|
HReg dst = newVRegV(env);
|
|
addInstr(env, mk_vMOVsd_RR(argL, dst));
|
|
addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_CmpEQ64Fx2: op = Asse_CMPEQF; goto do_64Fx2;
|
|
case Iop_CmpLT64Fx2: op = Asse_CMPLTF; goto do_64Fx2;
|
|
case Iop_CmpLE64Fx2: op = Asse_CMPLEF; goto do_64Fx2;
|
|
case Iop_CmpUN64Fx2: op = Asse_CMPUNF; goto do_64Fx2;
|
|
case Iop_Max64Fx2: op = Asse_MAXF; goto do_64Fx2;
|
|
case Iop_Min64Fx2: op = Asse_MINF; goto do_64Fx2;
|
|
do_64Fx2:
|
|
{
|
|
HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
|
|
HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
|
|
HReg dst = newVRegV(env);
|
|
addInstr(env, mk_vMOVsd_RR(argL, dst));
|
|
addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_CmpEQ32F0x4: op = Asse_CMPEQF; goto do_32F0x4;
|
|
case Iop_CmpLT32F0x4: op = Asse_CMPLTF; goto do_32F0x4;
|
|
case Iop_CmpLE32F0x4: op = Asse_CMPLEF; goto do_32F0x4;
|
|
case Iop_CmpUN32F0x4: op = Asse_CMPUNF; goto do_32F0x4;
|
|
case Iop_Add32F0x4: op = Asse_ADDF; goto do_32F0x4;
|
|
case Iop_Div32F0x4: op = Asse_DIVF; goto do_32F0x4;
|
|
case Iop_Max32F0x4: op = Asse_MAXF; goto do_32F0x4;
|
|
case Iop_Min32F0x4: op = Asse_MINF; goto do_32F0x4;
|
|
case Iop_Mul32F0x4: op = Asse_MULF; goto do_32F0x4;
|
|
case Iop_Sub32F0x4: op = Asse_SUBF; goto do_32F0x4;
|
|
do_32F0x4: {
|
|
HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
|
|
HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
|
|
HReg dst = newVRegV(env);
|
|
addInstr(env, mk_vMOVsd_RR(argL, dst));
|
|
addInstr(env, AMD64Instr_Sse32FLo(op, argR, dst));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_CmpEQ64F0x2: op = Asse_CMPEQF; goto do_64F0x2;
|
|
case Iop_CmpLT64F0x2: op = Asse_CMPLTF; goto do_64F0x2;
|
|
case Iop_CmpLE64F0x2: op = Asse_CMPLEF; goto do_64F0x2;
|
|
case Iop_CmpUN64F0x2: op = Asse_CMPUNF; goto do_64F0x2;
|
|
case Iop_Add64F0x2: op = Asse_ADDF; goto do_64F0x2;
|
|
case Iop_Div64F0x2: op = Asse_DIVF; goto do_64F0x2;
|
|
case Iop_Max64F0x2: op = Asse_MAXF; goto do_64F0x2;
|
|
case Iop_Min64F0x2: op = Asse_MINF; goto do_64F0x2;
|
|
case Iop_Mul64F0x2: op = Asse_MULF; goto do_64F0x2;
|
|
case Iop_Sub64F0x2: op = Asse_SUBF; goto do_64F0x2;
|
|
do_64F0x2: {
|
|
HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
|
|
HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
|
|
HReg dst = newVRegV(env);
|
|
addInstr(env, mk_vMOVsd_RR(argL, dst));
|
|
addInstr(env, AMD64Instr_Sse64FLo(op, argR, dst));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_QNarrowBin32Sto16Sx8:
|
|
op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
|
|
case Iop_QNarrowBin16Sto8Sx16:
|
|
op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
|
|
case Iop_QNarrowBin16Sto8Ux16:
|
|
op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
|
|
|
|
case Iop_InterleaveHI8x16:
|
|
op = Asse_UNPCKHB; arg1isEReg = True; goto do_SseReRg;
|
|
case Iop_InterleaveHI16x8:
|
|
op = Asse_UNPCKHW; arg1isEReg = True; goto do_SseReRg;
|
|
case Iop_InterleaveHI32x4:
|
|
op = Asse_UNPCKHD; arg1isEReg = True; goto do_SseReRg;
|
|
case Iop_InterleaveHI64x2:
|
|
op = Asse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg;
|
|
|
|
case Iop_InterleaveLO8x16:
|
|
op = Asse_UNPCKLB; arg1isEReg = True; goto do_SseReRg;
|
|
case Iop_InterleaveLO16x8:
|
|
op = Asse_UNPCKLW; arg1isEReg = True; goto do_SseReRg;
|
|
case Iop_InterleaveLO32x4:
|
|
op = Asse_UNPCKLD; arg1isEReg = True; goto do_SseReRg;
|
|
case Iop_InterleaveLO64x2:
|
|
op = Asse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
|
|
|
|
case Iop_AndV128: op = Asse_AND; goto do_SseReRg;
|
|
case Iop_OrV128: op = Asse_OR; goto do_SseReRg;
|
|
case Iop_XorV128: op = Asse_XOR; goto do_SseReRg;
|
|
case Iop_Add8x16: op = Asse_ADD8; goto do_SseReRg;
|
|
case Iop_Add16x8: op = Asse_ADD16; goto do_SseReRg;
|
|
case Iop_Add32x4: op = Asse_ADD32; goto do_SseReRg;
|
|
case Iop_Add64x2: op = Asse_ADD64; goto do_SseReRg;
|
|
case Iop_QAdd8Sx16: op = Asse_QADD8S; goto do_SseReRg;
|
|
case Iop_QAdd16Sx8: op = Asse_QADD16S; goto do_SseReRg;
|
|
case Iop_QAdd8Ux16: op = Asse_QADD8U; goto do_SseReRg;
|
|
case Iop_QAdd16Ux8: op = Asse_QADD16U; goto do_SseReRg;
|
|
case Iop_Avg8Ux16: op = Asse_AVG8U; goto do_SseReRg;
|
|
case Iop_Avg16Ux8: op = Asse_AVG16U; goto do_SseReRg;
|
|
case Iop_CmpEQ8x16: op = Asse_CMPEQ8; goto do_SseReRg;
|
|
case Iop_CmpEQ16x8: op = Asse_CMPEQ16; goto do_SseReRg;
|
|
case Iop_CmpEQ32x4: op = Asse_CMPEQ32; goto do_SseReRg;
|
|
case Iop_CmpGT8Sx16: op = Asse_CMPGT8S; goto do_SseReRg;
|
|
case Iop_CmpGT16Sx8: op = Asse_CMPGT16S; goto do_SseReRg;
|
|
case Iop_CmpGT32Sx4: op = Asse_CMPGT32S; goto do_SseReRg;
|
|
case Iop_Max16Sx8: op = Asse_MAX16S; goto do_SseReRg;
|
|
case Iop_Max8Ux16: op = Asse_MAX8U; goto do_SseReRg;
|
|
case Iop_Min16Sx8: op = Asse_MIN16S; goto do_SseReRg;
|
|
case Iop_Min8Ux16: op = Asse_MIN8U; goto do_SseReRg;
|
|
case Iop_MulHi16Ux8: op = Asse_MULHI16U; goto do_SseReRg;
|
|
case Iop_MulHi16Sx8: op = Asse_MULHI16S; goto do_SseReRg;
|
|
case Iop_Mul16x8: op = Asse_MUL16; goto do_SseReRg;
|
|
case Iop_Sub8x16: op = Asse_SUB8; goto do_SseReRg;
|
|
case Iop_Sub16x8: op = Asse_SUB16; goto do_SseReRg;
|
|
case Iop_Sub32x4: op = Asse_SUB32; goto do_SseReRg;
|
|
case Iop_Sub64x2: op = Asse_SUB64; goto do_SseReRg;
|
|
case Iop_QSub8Sx16: op = Asse_QSUB8S; goto do_SseReRg;
|
|
case Iop_QSub16Sx8: op = Asse_QSUB16S; goto do_SseReRg;
|
|
case Iop_QSub8Ux16: op = Asse_QSUB8U; goto do_SseReRg;
|
|
case Iop_QSub16Ux8: op = Asse_QSUB16U; goto do_SseReRg;
|
|
do_SseReRg: {
|
|
HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1);
|
|
HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2);
|
|
HReg dst = newVRegV(env);
|
|
if (arg1isEReg) {
|
|
addInstr(env, mk_vMOVsd_RR(arg2, dst));
|
|
addInstr(env, AMD64Instr_SseReRg(op, arg1, dst));
|
|
} else {
|
|
addInstr(env, mk_vMOVsd_RR(arg1, dst));
|
|
addInstr(env, AMD64Instr_SseReRg(op, arg2, dst));
|
|
}
|
|
return dst;
|
|
}
|
|
|
|
case Iop_ShlN16x8: op = Asse_SHL16; goto do_SseShift;
|
|
case Iop_ShlN32x4: op = Asse_SHL32; goto do_SseShift;
|
|
case Iop_ShlN64x2: op = Asse_SHL64; goto do_SseShift;
|
|
case Iop_SarN16x8: op = Asse_SAR16; goto do_SseShift;
|
|
case Iop_SarN32x4: op = Asse_SAR32; goto do_SseShift;
|
|
case Iop_ShrN16x8: op = Asse_SHR16; goto do_SseShift;
|
|
case Iop_ShrN32x4: op = Asse_SHR32; goto do_SseShift;
|
|
case Iop_ShrN64x2: op = Asse_SHR64; goto do_SseShift;
|
|
do_SseShift: {
|
|
HReg greg = iselVecExpr(env, e->Iex.Binop.arg1);
|
|
AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
|
|
AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
|
|
HReg ereg = newVRegV(env);
|
|
HReg dst = newVRegV(env);
|
|
addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
|
|
addInstr(env, AMD64Instr_Push(rmi));
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0));
|
|
addInstr(env, mk_vMOVsd_RR(greg, dst));
|
|
addInstr(env, AMD64Instr_SseReRg(op, ereg, dst));
|
|
add_to_rsp(env, 16);
|
|
return dst;
|
|
}
|
|
|
|
case Iop_Mul32x4: fn = (HWord)h_generic_calc_Mul32x4;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_Max32Sx4: fn = (HWord)h_generic_calc_Max32Sx4;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_Min32Sx4: fn = (HWord)h_generic_calc_Min32Sx4;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_Max32Ux4: fn = (HWord)h_generic_calc_Max32Ux4;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_Min32Ux4: fn = (HWord)h_generic_calc_Min32Ux4;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_Max16Ux8: fn = (HWord)h_generic_calc_Max16Ux8;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_Min16Ux8: fn = (HWord)h_generic_calc_Min16Ux8;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_Max8Sx16: fn = (HWord)h_generic_calc_Max8Sx16;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_Min8Sx16: fn = (HWord)h_generic_calc_Min8Sx16;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_CmpEQ64x2: fn = (HWord)h_generic_calc_CmpEQ64x2;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_Perm32x4: fn = (HWord)h_generic_calc_Perm32x4;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_QNarrowBin32Sto16Ux8:
|
|
fn = (HWord)h_generic_calc_QNarrowBin32Sto16Ux8;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_NarrowBin16to8x16:
|
|
fn = (HWord)h_generic_calc_NarrowBin16to8x16;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_NarrowBin32to16x8:
|
|
fn = (HWord)h_generic_calc_NarrowBin32to16x8;
|
|
goto do_SseAssistedBinary;
|
|
do_SseAssistedBinary: {
|
|
/* RRRufff! RRRufff code is what we're generating here. Oh
|
|
well. */
|
|
vassert(fn != 0);
|
|
HReg dst = newVRegV(env);
|
|
HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
|
|
HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
|
|
HReg argp = newVRegI(env);
|
|
/* subq $112, %rsp -- make a space*/
|
|
sub_from_rsp(env, 112);
|
|
/* leaq 48(%rsp), %r_argp -- point into it */
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
|
|
argp));
|
|
/* andq $-16, %r_argp -- 16-align the pointer */
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
|
|
AMD64RMI_Imm( ~(UInt)15 ),
|
|
argp));
|
|
/* Prepare 3 arg regs:
|
|
leaq 0(%r_argp), %rdi
|
|
leaq 16(%r_argp), %rsi
|
|
leaq 32(%r_argp), %rdx
|
|
*/
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
|
|
hregAMD64_RDI()));
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
|
|
hregAMD64_RSI()));
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
|
|
hregAMD64_RDX()));
|
|
/* Store the two args, at (%rsi) and (%rdx):
|
|
movupd %argL, 0(%rsi)
|
|
movupd %argR, 0(%rdx)
|
|
*/
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL,
|
|
AMD64AMode_IR(0, hregAMD64_RSI())));
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argR,
|
|
AMD64AMode_IR(0, hregAMD64_RDX())));
|
|
/* call the helper */
|
|
addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
|
|
3, mk_RetLoc_simple(RLPri_None) ));
|
|
/* fetch the result from memory, using %r_argp, which the
|
|
register allocator will keep alive across the call. */
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
|
|
AMD64AMode_IR(0, argp)));
|
|
/* and finally, clear the space */
|
|
add_to_rsp(env, 112);
|
|
return dst;
|
|
}
|
|
|
|
case Iop_SarN64x2: fn = (HWord)h_generic_calc_SarN64x2;
|
|
goto do_SseAssistedVectorAndScalar;
|
|
case Iop_SarN8x16: fn = (HWord)h_generic_calc_SarN8x16;
|
|
goto do_SseAssistedVectorAndScalar;
|
|
do_SseAssistedVectorAndScalar: {
|
|
/* RRRufff! RRRufff code is what we're generating here. Oh
|
|
well. */
|
|
vassert(fn != 0);
|
|
HReg dst = newVRegV(env);
|
|
HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
|
|
HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
HReg argp = newVRegI(env);
|
|
/* subq $112, %rsp -- make a space*/
|
|
sub_from_rsp(env, 112);
|
|
/* leaq 48(%rsp), %r_argp -- point into it */
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
|
|
argp));
|
|
/* andq $-16, %r_argp -- 16-align the pointer */
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
|
|
AMD64RMI_Imm( ~(UInt)15 ),
|
|
argp));
|
|
/* Prepare 2 vector arg regs:
|
|
leaq 0(%r_argp), %rdi
|
|
leaq 16(%r_argp), %rsi
|
|
*/
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
|
|
hregAMD64_RDI()));
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
|
|
hregAMD64_RSI()));
|
|
/* Store the vector arg, at (%rsi):
|
|
movupd %argL, 0(%rsi)
|
|
*/
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argL,
|
|
AMD64AMode_IR(0, hregAMD64_RSI())));
|
|
/* And get the scalar value into rdx */
|
|
addInstr(env, mk_iMOVsd_RR(argR, hregAMD64_RDX()));
|
|
|
|
/* call the helper */
|
|
addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn,
|
|
3, mk_RetLoc_simple(RLPri_None) ));
|
|
/* fetch the result from memory, using %r_argp, which the
|
|
register allocator will keep alive across the call. */
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dst,
|
|
AMD64AMode_IR(0, argp)));
|
|
/* and finally, clear the space */
|
|
add_to_rsp(env, 112);
|
|
return dst;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
} /* switch (e->Iex.Binop.op) */
|
|
} /* if (e->tag == Iex_Binop) */
|
|
|
|
if (e->tag == Iex_Triop) {
|
|
IRTriop *triop = e->Iex.Triop.details;
|
|
switch (triop->op) {
|
|
|
|
case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2_w_rm;
|
|
case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2_w_rm;
|
|
case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2_w_rm;
|
|
case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2_w_rm;
|
|
do_64Fx2_w_rm:
|
|
{
|
|
HReg argL = iselVecExpr(env, triop->arg2);
|
|
HReg argR = iselVecExpr(env, triop->arg3);
|
|
HReg dst = newVRegV(env);
|
|
addInstr(env, mk_vMOVsd_RR(argL, dst));
|
|
/* XXXROUNDINGFIXME */
|
|
/* set roundingmode here */
|
|
addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst));
|
|
return dst;
|
|
}
|
|
|
|
case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4_w_rm;
|
|
case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4_w_rm;
|
|
case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4_w_rm;
|
|
case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4_w_rm;
|
|
do_32Fx4_w_rm:
|
|
{
|
|
HReg argL = iselVecExpr(env, triop->arg2);
|
|
HReg argR = iselVecExpr(env, triop->arg3);
|
|
HReg dst = newVRegV(env);
|
|
addInstr(env, mk_vMOVsd_RR(argL, dst));
|
|
/* XXXROUNDINGFIXME */
|
|
/* set roundingmode here */
|
|
addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst));
|
|
return dst;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
} /* switch (triop->op) */
|
|
} /* if (e->tag == Iex_Triop) */
|
|
|
|
if (e->tag == Iex_ITE) { // VFD
|
|
HReg r1 = iselVecExpr(env, e->Iex.ITE.iftrue);
|
|
HReg r0 = iselVecExpr(env, e->Iex.ITE.iffalse);
|
|
HReg dst = newVRegV(env);
|
|
addInstr(env, mk_vMOVsd_RR(r1,dst));
|
|
AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
|
|
addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0, dst));
|
|
return dst;
|
|
}
|
|
|
|
//vec_fail:
|
|
vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n",
|
|
LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
|
|
ppIRExpr(e);
|
|
vpanic("iselVecExpr_wrk");
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------------*/
|
|
/*--- ISEL: SIMD (V256) expressions, into 2 XMM regs. --*/
|
|
/*---------------------------------------------------------*/
|
|
|
|
static void iselDVecExpr ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
|
|
ISelEnv* env, IRExpr* e )
|
|
{
|
|
iselDVecExpr_wrk( rHi, rLo, env, e );
|
|
# if 0
|
|
vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
|
|
# endif
|
|
vassert(hregClass(*rHi) == HRcVec128);
|
|
vassert(hregClass(*rLo) == HRcVec128);
|
|
vassert(hregIsVirtual(*rHi));
|
|
vassert(hregIsVirtual(*rLo));
|
|
}
|
|
|
|
|
|
/* DO NOT CALL THIS DIRECTLY */
|
|
static void iselDVecExpr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
|
|
ISelEnv* env, IRExpr* e )
|
|
{
|
|
HWord fn = 0; /* address of helper fn, if required */
|
|
vassert(e);
|
|
IRType ty = typeOfIRExpr(env->type_env,e);
|
|
vassert(ty == Ity_V256);
|
|
|
|
AMD64SseOp op = Asse_INVALID;
|
|
|
|
/* read 256-bit IRTemp */
|
|
if (e->tag == Iex_RdTmp) {
|
|
lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
|
|
return;
|
|
}
|
|
|
|
if (e->tag == Iex_Get) {
|
|
HReg vHi = newVRegV(env);
|
|
HReg vLo = newVRegV(env);
|
|
HReg rbp = hregAMD64_RBP();
|
|
AMD64AMode* am0 = AMD64AMode_IR(e->Iex.Get.offset + 0, rbp);
|
|
AMD64AMode* am16 = AMD64AMode_IR(e->Iex.Get.offset + 16, rbp);
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, am0));
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, am16));
|
|
*rHi = vHi;
|
|
*rLo = vLo;
|
|
return;
|
|
}
|
|
|
|
if (e->tag == Iex_Load) {
|
|
HReg vHi = newVRegV(env);
|
|
HReg vLo = newVRegV(env);
|
|
HReg rA = iselIntExpr_R(env, e->Iex.Load.addr);
|
|
AMD64AMode* am0 = AMD64AMode_IR(0, rA);
|
|
AMD64AMode* am16 = AMD64AMode_IR(16, rA);
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, am0));
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, am16));
|
|
*rHi = vHi;
|
|
*rLo = vLo;
|
|
return;
|
|
}
|
|
|
|
if (e->tag == Iex_Const) {
|
|
vassert(e->Iex.Const.con->tag == Ico_V256);
|
|
switch (e->Iex.Const.con->Ico.V256) {
|
|
case 0x00000000: {
|
|
HReg vHi = generate_zeroes_V128(env);
|
|
HReg vLo = newVRegV(env);
|
|
addInstr(env, mk_vMOVsd_RR(vHi, vLo));
|
|
*rHi = vHi;
|
|
*rLo = vLo;
|
|
return;
|
|
}
|
|
default:
|
|
break; /* give up. Until such time as is necessary. */
|
|
}
|
|
}
|
|
|
|
if (e->tag == Iex_Unop) {
|
|
switch (e->Iex.Unop.op) {
|
|
|
|
case Iop_NotV256: {
|
|
HReg argHi, argLo;
|
|
iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
|
|
*rHi = do_sse_NotV128(env, argHi);
|
|
*rLo = do_sse_NotV128(env, argLo);
|
|
return;
|
|
}
|
|
|
|
case Iop_RecipEst32Fx8: op = Asse_RCPF; goto do_32Fx8_unary;
|
|
case Iop_Sqrt32Fx8: op = Asse_SQRTF; goto do_32Fx8_unary;
|
|
case Iop_RSqrtEst32Fx8: op = Asse_RSQRTF; goto do_32Fx8_unary;
|
|
do_32Fx8_unary:
|
|
{
|
|
HReg argHi, argLo;
|
|
iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
|
|
HReg dstHi = newVRegV(env);
|
|
HReg dstLo = newVRegV(env);
|
|
addInstr(env, AMD64Instr_Sse32Fx4(op, argHi, dstHi));
|
|
addInstr(env, AMD64Instr_Sse32Fx4(op, argLo, dstLo));
|
|
*rHi = dstHi;
|
|
*rLo = dstLo;
|
|
return;
|
|
}
|
|
|
|
case Iop_Sqrt64Fx4: op = Asse_SQRTF; goto do_64Fx4_unary;
|
|
do_64Fx4_unary:
|
|
{
|
|
HReg argHi, argLo;
|
|
iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
|
|
HReg dstHi = newVRegV(env);
|
|
HReg dstLo = newVRegV(env);
|
|
addInstr(env, AMD64Instr_Sse64Fx2(op, argHi, dstHi));
|
|
addInstr(env, AMD64Instr_Sse64Fx2(op, argLo, dstLo));
|
|
*rHi = dstHi;
|
|
*rLo = dstLo;
|
|
return;
|
|
}
|
|
|
|
case Iop_CmpNEZ64x4: {
|
|
/* We can use SSE2 instructions for this. */
|
|
/* Same scheme as Iop_CmpNEZ64x2, except twice as wide
|
|
(obviously). See comment on Iop_CmpNEZ64x2 for
|
|
explanation of what's going on here. */
|
|
HReg argHi, argLo;
|
|
iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
|
|
HReg tmpHi = generate_zeroes_V128(env);
|
|
HReg tmpLo = newVRegV(env);
|
|
addInstr(env, mk_vMOVsd_RR(tmpHi, tmpLo));
|
|
HReg dstHi = newVRegV(env);
|
|
HReg dstLo = newVRegV(env);
|
|
addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argHi, tmpHi));
|
|
addInstr(env, AMD64Instr_SseReRg(Asse_CMPEQ32, argLo, tmpLo));
|
|
tmpHi = do_sse_NotV128(env, tmpHi);
|
|
tmpLo = do_sse_NotV128(env, tmpLo);
|
|
addInstr(env, AMD64Instr_SseShuf(0xB1, tmpHi, dstHi));
|
|
addInstr(env, AMD64Instr_SseShuf(0xB1, tmpLo, dstLo));
|
|
addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpHi, dstHi));
|
|
addInstr(env, AMD64Instr_SseReRg(Asse_OR, tmpLo, dstLo));
|
|
*rHi = dstHi;
|
|
*rLo = dstLo;
|
|
return;
|
|
}
|
|
|
|
case Iop_CmpNEZ32x8: op = Asse_CMPEQ32; goto do_CmpNEZ_vector;
|
|
case Iop_CmpNEZ16x16: op = Asse_CMPEQ16; goto do_CmpNEZ_vector;
|
|
case Iop_CmpNEZ8x32: op = Asse_CMPEQ8; goto do_CmpNEZ_vector;
|
|
do_CmpNEZ_vector:
|
|
{
|
|
HReg argHi, argLo;
|
|
iselDVecExpr(&argHi, &argLo, env, e->Iex.Unop.arg);
|
|
HReg tmpHi = newVRegV(env);
|
|
HReg tmpLo = newVRegV(env);
|
|
HReg zero = generate_zeroes_V128(env);
|
|
HReg dstHi, dstLo;
|
|
addInstr(env, mk_vMOVsd_RR(argHi, tmpHi));
|
|
addInstr(env, mk_vMOVsd_RR(argLo, tmpLo));
|
|
addInstr(env, AMD64Instr_SseReRg(op, zero, tmpHi));
|
|
addInstr(env, AMD64Instr_SseReRg(op, zero, tmpLo));
|
|
dstHi = do_sse_NotV128(env, tmpHi);
|
|
dstLo = do_sse_NotV128(env, tmpLo);
|
|
*rHi = dstHi;
|
|
*rLo = dstLo;
|
|
return;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
} /* switch (e->Iex.Unop.op) */
|
|
} /* if (e->tag == Iex_Unop) */
|
|
|
|
if (e->tag == Iex_Binop) {
|
|
switch (e->Iex.Binop.op) {
|
|
|
|
case Iop_Max64Fx4: op = Asse_MAXF; goto do_64Fx4;
|
|
case Iop_Min64Fx4: op = Asse_MINF; goto do_64Fx4;
|
|
do_64Fx4:
|
|
{
|
|
HReg argLhi, argLlo, argRhi, argRlo;
|
|
iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
|
|
iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
|
|
HReg dstHi = newVRegV(env);
|
|
HReg dstLo = newVRegV(env);
|
|
addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
|
|
addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
|
|
addInstr(env, AMD64Instr_Sse64Fx2(op, argRhi, dstHi));
|
|
addInstr(env, AMD64Instr_Sse64Fx2(op, argRlo, dstLo));
|
|
*rHi = dstHi;
|
|
*rLo = dstLo;
|
|
return;
|
|
}
|
|
|
|
case Iop_Max32Fx8: op = Asse_MAXF; goto do_32Fx8;
|
|
case Iop_Min32Fx8: op = Asse_MINF; goto do_32Fx8;
|
|
do_32Fx8:
|
|
{
|
|
HReg argLhi, argLlo, argRhi, argRlo;
|
|
iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
|
|
iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
|
|
HReg dstHi = newVRegV(env);
|
|
HReg dstLo = newVRegV(env);
|
|
addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
|
|
addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
|
|
addInstr(env, AMD64Instr_Sse32Fx4(op, argRhi, dstHi));
|
|
addInstr(env, AMD64Instr_Sse32Fx4(op, argRlo, dstLo));
|
|
*rHi = dstHi;
|
|
*rLo = dstLo;
|
|
return;
|
|
}
|
|
|
|
case Iop_AndV256: op = Asse_AND; goto do_SseReRg;
|
|
case Iop_OrV256: op = Asse_OR; goto do_SseReRg;
|
|
case Iop_XorV256: op = Asse_XOR; goto do_SseReRg;
|
|
case Iop_Add8x32: op = Asse_ADD8; goto do_SseReRg;
|
|
case Iop_Add16x16: op = Asse_ADD16; goto do_SseReRg;
|
|
case Iop_Add32x8: op = Asse_ADD32; goto do_SseReRg;
|
|
case Iop_Add64x4: op = Asse_ADD64; goto do_SseReRg;
|
|
case Iop_QAdd8Sx32: op = Asse_QADD8S; goto do_SseReRg;
|
|
case Iop_QAdd16Sx16: op = Asse_QADD16S; goto do_SseReRg;
|
|
case Iop_QAdd8Ux32: op = Asse_QADD8U; goto do_SseReRg;
|
|
case Iop_QAdd16Ux16: op = Asse_QADD16U; goto do_SseReRg;
|
|
case Iop_Avg8Ux32: op = Asse_AVG8U; goto do_SseReRg;
|
|
case Iop_Avg16Ux16: op = Asse_AVG16U; goto do_SseReRg;
|
|
case Iop_CmpEQ8x32: op = Asse_CMPEQ8; goto do_SseReRg;
|
|
case Iop_CmpEQ16x16: op = Asse_CMPEQ16; goto do_SseReRg;
|
|
case Iop_CmpEQ32x8: op = Asse_CMPEQ32; goto do_SseReRg;
|
|
case Iop_CmpGT8Sx32: op = Asse_CMPGT8S; goto do_SseReRg;
|
|
case Iop_CmpGT16Sx16: op = Asse_CMPGT16S; goto do_SseReRg;
|
|
case Iop_CmpGT32Sx8: op = Asse_CMPGT32S; goto do_SseReRg;
|
|
case Iop_Max16Sx16: op = Asse_MAX16S; goto do_SseReRg;
|
|
case Iop_Max8Ux32: op = Asse_MAX8U; goto do_SseReRg;
|
|
case Iop_Min16Sx16: op = Asse_MIN16S; goto do_SseReRg;
|
|
case Iop_Min8Ux32: op = Asse_MIN8U; goto do_SseReRg;
|
|
case Iop_MulHi16Ux16: op = Asse_MULHI16U; goto do_SseReRg;
|
|
case Iop_MulHi16Sx16: op = Asse_MULHI16S; goto do_SseReRg;
|
|
case Iop_Mul16x16: op = Asse_MUL16; goto do_SseReRg;
|
|
case Iop_Sub8x32: op = Asse_SUB8; goto do_SseReRg;
|
|
case Iop_Sub16x16: op = Asse_SUB16; goto do_SseReRg;
|
|
case Iop_Sub32x8: op = Asse_SUB32; goto do_SseReRg;
|
|
case Iop_Sub64x4: op = Asse_SUB64; goto do_SseReRg;
|
|
case Iop_QSub8Sx32: op = Asse_QSUB8S; goto do_SseReRg;
|
|
case Iop_QSub16Sx16: op = Asse_QSUB16S; goto do_SseReRg;
|
|
case Iop_QSub8Ux32: op = Asse_QSUB8U; goto do_SseReRg;
|
|
case Iop_QSub16Ux16: op = Asse_QSUB16U; goto do_SseReRg;
|
|
do_SseReRg:
|
|
{
|
|
HReg argLhi, argLlo, argRhi, argRlo;
|
|
iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
|
|
iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
|
|
HReg dstHi = newVRegV(env);
|
|
HReg dstLo = newVRegV(env);
|
|
addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
|
|
addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
|
|
addInstr(env, AMD64Instr_SseReRg(op, argRhi, dstHi));
|
|
addInstr(env, AMD64Instr_SseReRg(op, argRlo, dstLo));
|
|
*rHi = dstHi;
|
|
*rLo = dstLo;
|
|
return;
|
|
}
|
|
|
|
case Iop_ShlN16x16: op = Asse_SHL16; goto do_SseShift;
|
|
case Iop_ShlN32x8: op = Asse_SHL32; goto do_SseShift;
|
|
case Iop_ShlN64x4: op = Asse_SHL64; goto do_SseShift;
|
|
case Iop_SarN16x16: op = Asse_SAR16; goto do_SseShift;
|
|
case Iop_SarN32x8: op = Asse_SAR32; goto do_SseShift;
|
|
case Iop_ShrN16x16: op = Asse_SHR16; goto do_SseShift;
|
|
case Iop_ShrN32x8: op = Asse_SHR32; goto do_SseShift;
|
|
case Iop_ShrN64x4: op = Asse_SHR64; goto do_SseShift;
|
|
do_SseShift: {
|
|
HReg gregHi, gregLo;
|
|
iselDVecExpr(&gregHi, &gregLo, env, e->Iex.Binop.arg1);
|
|
AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
|
|
AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
|
|
HReg ereg = newVRegV(env);
|
|
HReg dstHi = newVRegV(env);
|
|
HReg dstLo = newVRegV(env);
|
|
addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(0)));
|
|
addInstr(env, AMD64Instr_Push(rmi));
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, ereg, rsp0));
|
|
addInstr(env, mk_vMOVsd_RR(gregHi, dstHi));
|
|
addInstr(env, AMD64Instr_SseReRg(op, ereg, dstHi));
|
|
addInstr(env, mk_vMOVsd_RR(gregLo, dstLo));
|
|
addInstr(env, AMD64Instr_SseReRg(op, ereg, dstLo));
|
|
add_to_rsp(env, 16);
|
|
*rHi = dstHi;
|
|
*rLo = dstLo;
|
|
return;
|
|
}
|
|
|
|
case Iop_V128HLtoV256: {
|
|
*rHi = iselVecExpr(env, e->Iex.Binop.arg1);
|
|
*rLo = iselVecExpr(env, e->Iex.Binop.arg2);
|
|
return;
|
|
}
|
|
|
|
case Iop_Mul32x8: fn = (HWord)h_generic_calc_Mul32x4;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_Max32Sx8: fn = (HWord)h_generic_calc_Max32Sx4;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_Min32Sx8: fn = (HWord)h_generic_calc_Min32Sx4;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_Max32Ux8: fn = (HWord)h_generic_calc_Max32Ux4;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_Min32Ux8: fn = (HWord)h_generic_calc_Min32Ux4;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_Max16Ux16: fn = (HWord)h_generic_calc_Max16Ux8;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_Min16Ux16: fn = (HWord)h_generic_calc_Min16Ux8;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_Max8Sx32: fn = (HWord)h_generic_calc_Max8Sx16;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_Min8Sx32: fn = (HWord)h_generic_calc_Min8Sx16;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_CmpEQ64x4: fn = (HWord)h_generic_calc_CmpEQ64x2;
|
|
goto do_SseAssistedBinary;
|
|
case Iop_CmpGT64Sx4: fn = (HWord)h_generic_calc_CmpGT64Sx2;
|
|
goto do_SseAssistedBinary;
|
|
do_SseAssistedBinary: {
|
|
/* RRRufff! RRRufff code is what we're generating here. Oh
|
|
well. */
|
|
vassert(fn != 0);
|
|
HReg dstHi = newVRegV(env);
|
|
HReg dstLo = newVRegV(env);
|
|
HReg argLhi, argLlo, argRhi, argRlo;
|
|
iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
|
|
iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
|
|
HReg argp = newVRegI(env);
|
|
/* subq $160, %rsp -- make a space*/
|
|
sub_from_rsp(env, 160);
|
|
/* leaq 48(%rsp), %r_argp -- point into it */
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
|
|
argp));
|
|
/* andq $-16, %r_argp -- 16-align the pointer */
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
|
|
AMD64RMI_Imm( ~(UInt)15 ),
|
|
argp));
|
|
/* Prepare 3 arg regs:
|
|
leaq 0(%r_argp), %rdi
|
|
leaq 16(%r_argp), %rsi
|
|
leaq 32(%r_argp), %rdx
|
|
*/
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
|
|
hregAMD64_RDI()));
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(16, argp),
|
|
hregAMD64_RSI()));
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
|
|
hregAMD64_RDX()));
|
|
/* Store the two high args, at (%rsi) and (%rdx):
|
|
movupd %argLhi, 0(%rsi)
|
|
movupd %argRhi, 0(%rdx)
|
|
*/
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLhi,
|
|
AMD64AMode_IR(0, hregAMD64_RSI())));
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRhi,
|
|
AMD64AMode_IR(0, hregAMD64_RDX())));
|
|
/* Store the two low args, at 48(%rsi) and 48(%rdx):
|
|
movupd %argLlo, 48(%rsi)
|
|
movupd %argRlo, 48(%rdx)
|
|
*/
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLlo,
|
|
AMD64AMode_IR(48, hregAMD64_RSI())));
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRlo,
|
|
AMD64AMode_IR(48, hregAMD64_RDX())));
|
|
/* call the helper */
|
|
addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
|
|
mk_RetLoc_simple(RLPri_None) ));
|
|
/* Prepare 3 arg regs:
|
|
leaq 48(%r_argp), %rdi
|
|
leaq 64(%r_argp), %rsi
|
|
leaq 80(%r_argp), %rdx
|
|
*/
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, argp),
|
|
hregAMD64_RDI()));
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(64, argp),
|
|
hregAMD64_RSI()));
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(80, argp),
|
|
hregAMD64_RDX()));
|
|
/* call the helper */
|
|
addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
|
|
mk_RetLoc_simple(RLPri_None) ));
|
|
/* fetch the result from memory, using %r_argp, which the
|
|
register allocator will keep alive across the call. */
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstHi,
|
|
AMD64AMode_IR(0, argp)));
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstLo,
|
|
AMD64AMode_IR(48, argp)));
|
|
/* and finally, clear the space */
|
|
add_to_rsp(env, 160);
|
|
*rHi = dstHi;
|
|
*rLo = dstLo;
|
|
return;
|
|
}
|
|
|
|
case Iop_Perm32x8: fn = (HWord)h_generic_calc_Perm32x8;
|
|
goto do_SseAssistedBinary256;
|
|
do_SseAssistedBinary256: {
|
|
/* RRRufff! RRRufff code is what we're generating here. Oh
|
|
well. */
|
|
vassert(fn != 0);
|
|
HReg dstHi = newVRegV(env);
|
|
HReg dstLo = newVRegV(env);
|
|
HReg argLhi, argLlo, argRhi, argRlo;
|
|
iselDVecExpr(&argLhi, &argLlo, env, e->Iex.Binop.arg1);
|
|
iselDVecExpr(&argRhi, &argRlo, env, e->Iex.Binop.arg2);
|
|
HReg argp = newVRegI(env);
|
|
/* subq $160, %rsp -- make a space*/
|
|
sub_from_rsp(env, 160);
|
|
/* leaq 48(%rsp), %r_argp -- point into it */
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(48, hregAMD64_RSP()),
|
|
argp));
|
|
/* andq $-16, %r_argp -- 16-align the pointer */
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
|
|
AMD64RMI_Imm( ~(UInt)15 ),
|
|
argp));
|
|
/* Prepare 3 arg regs:
|
|
leaq 0(%r_argp), %rdi
|
|
leaq 32(%r_argp), %rsi
|
|
leaq 64(%r_argp), %rdx
|
|
*/
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(0, argp),
|
|
hregAMD64_RDI()));
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(32, argp),
|
|
hregAMD64_RSI()));
|
|
addInstr(env, AMD64Instr_Lea64(AMD64AMode_IR(64, argp),
|
|
hregAMD64_RDX()));
|
|
/* Store the two args, at (%rsi) and (%rdx):
|
|
movupd %argLlo, 0(%rsi)
|
|
movupd %argLhi, 16(%rsi)
|
|
movupd %argRlo, 0(%rdx)
|
|
movupd %argRhi, 16(%rdx)
|
|
*/
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLlo,
|
|
AMD64AMode_IR(0, hregAMD64_RSI())));
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argLhi,
|
|
AMD64AMode_IR(16, hregAMD64_RSI())));
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRlo,
|
|
AMD64AMode_IR(0, hregAMD64_RDX())));
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*!isLoad*/, 16, argRhi,
|
|
AMD64AMode_IR(16, hregAMD64_RDX())));
|
|
/* call the helper */
|
|
addInstr(env, AMD64Instr_Call( Acc_ALWAYS, (ULong)fn, 3,
|
|
mk_RetLoc_simple(RLPri_None) ));
|
|
/* fetch the result from memory, using %r_argp, which the
|
|
register allocator will keep alive across the call. */
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstLo,
|
|
AMD64AMode_IR(0, argp)));
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*isLoad*/, 16, dstHi,
|
|
AMD64AMode_IR(16, argp)));
|
|
/* and finally, clear the space */
|
|
add_to_rsp(env, 160);
|
|
*rHi = dstHi;
|
|
*rLo = dstLo;
|
|
return;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
} /* switch (e->Iex.Binop.op) */
|
|
} /* if (e->tag == Iex_Binop) */
|
|
|
|
if (e->tag == Iex_Triop) {
|
|
IRTriop *triop = e->Iex.Triop.details;
|
|
switch (triop->op) {
|
|
|
|
case Iop_Add64Fx4: op = Asse_ADDF; goto do_64Fx4_w_rm;
|
|
case Iop_Sub64Fx4: op = Asse_SUBF; goto do_64Fx4_w_rm;
|
|
case Iop_Mul64Fx4: op = Asse_MULF; goto do_64Fx4_w_rm;
|
|
case Iop_Div64Fx4: op = Asse_DIVF; goto do_64Fx4_w_rm;
|
|
do_64Fx4_w_rm:
|
|
{
|
|
HReg argLhi, argLlo, argRhi, argRlo;
|
|
iselDVecExpr(&argLhi, &argLlo, env, triop->arg2);
|
|
iselDVecExpr(&argRhi, &argRlo, env, triop->arg3);
|
|
HReg dstHi = newVRegV(env);
|
|
HReg dstLo = newVRegV(env);
|
|
addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
|
|
addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
|
|
/* XXXROUNDINGFIXME */
|
|
/* set roundingmode here */
|
|
addInstr(env, AMD64Instr_Sse64Fx2(op, argRhi, dstHi));
|
|
addInstr(env, AMD64Instr_Sse64Fx2(op, argRlo, dstLo));
|
|
*rHi = dstHi;
|
|
*rLo = dstLo;
|
|
return;
|
|
}
|
|
|
|
case Iop_Add32Fx8: op = Asse_ADDF; goto do_32Fx8_w_rm;
|
|
case Iop_Sub32Fx8: op = Asse_SUBF; goto do_32Fx8_w_rm;
|
|
case Iop_Mul32Fx8: op = Asse_MULF; goto do_32Fx8_w_rm;
|
|
case Iop_Div32Fx8: op = Asse_DIVF; goto do_32Fx8_w_rm;
|
|
do_32Fx8_w_rm:
|
|
{
|
|
HReg argLhi, argLlo, argRhi, argRlo;
|
|
iselDVecExpr(&argLhi, &argLlo, env, triop->arg2);
|
|
iselDVecExpr(&argRhi, &argRlo, env, triop->arg3);
|
|
HReg dstHi = newVRegV(env);
|
|
HReg dstLo = newVRegV(env);
|
|
addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
|
|
addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
|
|
/* XXXROUNDINGFIXME */
|
|
/* set roundingmode here */
|
|
addInstr(env, AMD64Instr_Sse32Fx4(op, argRhi, dstHi));
|
|
addInstr(env, AMD64Instr_Sse32Fx4(op, argRlo, dstLo));
|
|
*rHi = dstHi;
|
|
*rLo = dstLo;
|
|
return;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
} /* switch (triop->op) */
|
|
} /* if (e->tag == Iex_Triop) */
|
|
|
|
|
|
if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_64x4toV256) {
|
|
HReg rsp = hregAMD64_RSP();
|
|
HReg vHi = newVRegV(env);
|
|
HReg vLo = newVRegV(env);
|
|
AMD64AMode* m8_rsp = AMD64AMode_IR(-8, rsp);
|
|
AMD64AMode* m16_rsp = AMD64AMode_IR(-16, rsp);
|
|
/* arg1 is the most significant (Q3), arg4 the least (Q0) */
|
|
/* Get all the args into regs, before messing with the stack. */
|
|
AMD64RI* q3 = iselIntExpr_RI(env, e->Iex.Qop.details->arg1);
|
|
AMD64RI* q2 = iselIntExpr_RI(env, e->Iex.Qop.details->arg2);
|
|
AMD64RI* q1 = iselIntExpr_RI(env, e->Iex.Qop.details->arg3);
|
|
AMD64RI* q0 = iselIntExpr_RI(env, e->Iex.Qop.details->arg4);
|
|
/* less significant lane (Q2) at the lower address (-16(rsp)) */
|
|
addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q3, m8_rsp));
|
|
addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q2, m16_rsp));
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vHi, m16_rsp));
|
|
/* and then the lower half .. */
|
|
addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q1, m8_rsp));
|
|
addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, q0, m16_rsp));
|
|
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, vLo, m16_rsp));
|
|
*rHi = vHi;
|
|
*rLo = vLo;
|
|
return;
|
|
}
|
|
|
|
if (e->tag == Iex_ITE) {
|
|
HReg r1Hi, r1Lo, r0Hi, r0Lo;
|
|
iselDVecExpr(&r1Hi, &r1Lo, env, e->Iex.ITE.iftrue);
|
|
iselDVecExpr(&r0Hi, &r0Lo, env, e->Iex.ITE.iffalse);
|
|
HReg dstHi = newVRegV(env);
|
|
HReg dstLo = newVRegV(env);
|
|
addInstr(env, mk_vMOVsd_RR(r1Hi,dstHi));
|
|
addInstr(env, mk_vMOVsd_RR(r1Lo,dstLo));
|
|
AMD64CondCode cc = iselCondCode(env, e->Iex.ITE.cond);
|
|
addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0Hi, dstHi));
|
|
addInstr(env, AMD64Instr_SseCMov(cc ^ 1, r0Lo, dstLo));
|
|
*rHi = dstHi;
|
|
*rLo = dstLo;
|
|
return;
|
|
}
|
|
|
|
//avx_fail:
|
|
vex_printf("iselDVecExpr (amd64, subarch = %s): can't reduce\n",
|
|
LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
|
|
ppIRExpr(e);
|
|
vpanic("iselDVecExpr_wrk");
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------------*/
|
|
/*--- ISEL: Statements ---*/
|
|
/*---------------------------------------------------------*/
|
|
|
|
static void iselStmt ( ISelEnv* env, IRStmt* stmt )
|
|
{
|
|
if (vex_traceflags & VEX_TRACE_VCODE) {
|
|
vex_printf("\n-- ");
|
|
ppIRStmt(stmt);
|
|
vex_printf("\n");
|
|
}
|
|
|
|
switch (stmt->tag) {
|
|
|
|
/* --------- LOADG (guarded load) --------- */
|
|
case Ist_LoadG: {
|
|
IRLoadG* lg = stmt->Ist.LoadG.details;
|
|
if (lg->end != Iend_LE)
|
|
goto stmt_fail;
|
|
|
|
UChar szB = 0; /* invalid */
|
|
switch (lg->cvt) {
|
|
case ILGop_Ident32: szB = 4; break;
|
|
case ILGop_Ident64: szB = 8; break;
|
|
case ILGop_IdentV128: szB = 16; break;
|
|
default: break;
|
|
}
|
|
if (szB == 0)
|
|
goto stmt_fail;
|
|
|
|
AMD64AMode* amAddr
|
|
= iselIntExpr_AMode(env, lg->addr);
|
|
HReg rAlt
|
|
= szB == 16 ? iselVecExpr(env, lg->alt)
|
|
: iselIntExpr_R(env, lg->alt);
|
|
HReg rDst
|
|
= lookupIRTemp(env, lg->dst);
|
|
|
|
/* Get the alt value into the dst. We'll do a conditional load
|
|
which overwrites it -- or not -- with loaded data. */
|
|
if (szB == 16) {
|
|
addInstr(env, mk_vMOVsd_RR(rAlt, rDst));
|
|
} else {
|
|
addInstr(env, mk_iMOVsd_RR(rAlt, rDst));
|
|
}
|
|
AMD64CondCode cc = iselCondCode(env, lg->guard);
|
|
if (szB == 16) {
|
|
addInstr(env, AMD64Instr_SseCLoad(cc, amAddr, rDst));
|
|
} else {
|
|
addInstr(env, AMD64Instr_CLoad(cc, szB, amAddr, rDst));
|
|
}
|
|
return;
|
|
}
|
|
|
|
/* --------- STOREG (guarded store) --------- */
|
|
case Ist_StoreG: {
|
|
IRStoreG* sg = stmt->Ist.StoreG.details;
|
|
if (sg->end != Iend_LE)
|
|
goto stmt_fail;
|
|
|
|
UChar szB = 0; /* invalid */
|
|
switch (typeOfIRExpr(env->type_env, sg->data)) {
|
|
case Ity_I32: szB = 4; break;
|
|
case Ity_I64: szB = 8; break;
|
|
case Ity_V128: szB = 16; break;
|
|
default: break;
|
|
}
|
|
if (szB == 0)
|
|
goto stmt_fail;
|
|
|
|
AMD64AMode* amAddr
|
|
= iselIntExpr_AMode(env, sg->addr);
|
|
HReg rSrc
|
|
= szB == 16 ? iselVecExpr(env, sg->data)
|
|
: iselIntExpr_R(env, sg->data);
|
|
AMD64CondCode cc
|
|
= iselCondCode(env, sg->guard);
|
|
if (szB == 16) {
|
|
addInstr(env, AMD64Instr_SseCStore(cc, rSrc, amAddr));
|
|
} else {
|
|
addInstr(env, AMD64Instr_CStore(cc, szB, rSrc, amAddr));
|
|
}
|
|
return;
|
|
}
|
|
|
|
/* --------- STORE --------- */
|
|
case Ist_Store: {
|
|
IRType tya = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
|
|
IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
|
|
IREndness end = stmt->Ist.Store.end;
|
|
|
|
if (tya != Ity_I64 || end != Iend_LE)
|
|
goto stmt_fail;
|
|
|
|
if (tyd == Ity_I64) {
|
|
AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
|
|
AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Store.data);
|
|
addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,ri,am));
|
|
return;
|
|
}
|
|
if (tyd == Ity_I8 || tyd == Ity_I16 || tyd == Ity_I32) {
|
|
AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
|
|
HReg r = iselIntExpr_R(env, stmt->Ist.Store.data);
|
|
addInstr(env, AMD64Instr_Store(
|
|
toUChar(tyd==Ity_I8 ? 1 : (tyd==Ity_I16 ? 2 : 4)),
|
|
r,am));
|
|
return;
|
|
}
|
|
if (tyd == Ity_F64) {
|
|
AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
|
|
HReg r = iselDblExpr(env, stmt->Ist.Store.data);
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, r, am));
|
|
return;
|
|
}
|
|
if (tyd == Ity_F32) {
|
|
AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
|
|
HReg r = iselFltExpr(env, stmt->Ist.Store.data);
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, r, am));
|
|
return;
|
|
}
|
|
if (tyd == Ity_V128) {
|
|
AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr);
|
|
HReg r = iselVecExpr(env, stmt->Ist.Store.data);
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, r, am));
|
|
return;
|
|
}
|
|
if (tyd == Ity_V256) {
|
|
HReg rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
|
|
AMD64AMode* am0 = AMD64AMode_IR(0, rA);
|
|
AMD64AMode* am16 = AMD64AMode_IR(16, rA);
|
|
HReg vHi, vLo;
|
|
iselDVecExpr(&vHi, &vLo, env, stmt->Ist.Store.data);
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vLo, am0));
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vHi, am16));
|
|
return;
|
|
}
|
|
break;
|
|
}
|
|
|
|
/* --------- PUT --------- */
|
|
case Ist_Put: {
|
|
IRType ty = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
|
|
if (ty == Ity_I64) {
|
|
/* We're going to write to memory, so compute the RHS into an
|
|
AMD64RI. */
|
|
AMD64RI* ri = iselIntExpr_RI(env, stmt->Ist.Put.data);
|
|
addInstr(env,
|
|
AMD64Instr_Alu64M(
|
|
Aalu_MOV,
|
|
ri,
|
|
AMD64AMode_IR(stmt->Ist.Put.offset,
|
|
hregAMD64_RBP())
|
|
));
|
|
return;
|
|
}
|
|
if (ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32) {
|
|
HReg r = iselIntExpr_R(env, stmt->Ist.Put.data);
|
|
addInstr(env, AMD64Instr_Store(
|
|
toUChar(ty==Ity_I8 ? 1 : (ty==Ity_I16 ? 2 : 4)),
|
|
r,
|
|
AMD64AMode_IR(stmt->Ist.Put.offset,
|
|
hregAMD64_RBP())));
|
|
return;
|
|
}
|
|
if (ty == Ity_F32) {
|
|
HReg f32 = iselFltExpr(env, stmt->Ist.Put.data);
|
|
AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset, hregAMD64_RBP());
|
|
set_SSE_rounding_default(env); /* paranoia */
|
|
addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 4, f32, am ));
|
|
return;
|
|
}
|
|
if (ty == Ity_F64) {
|
|
HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
|
|
AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset,
|
|
hregAMD64_RBP() );
|
|
addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am ));
|
|
return;
|
|
}
|
|
if (ty == Ity_V128) {
|
|
HReg vec = iselVecExpr(env, stmt->Ist.Put.data);
|
|
AMD64AMode* am = AMD64AMode_IR(stmt->Ist.Put.offset,
|
|
hregAMD64_RBP());
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, am));
|
|
return;
|
|
}
|
|
if (ty == Ity_V256) {
|
|
HReg vHi, vLo;
|
|
iselDVecExpr(&vHi, &vLo, env, stmt->Ist.Put.data);
|
|
HReg rbp = hregAMD64_RBP();
|
|
AMD64AMode* am0 = AMD64AMode_IR(stmt->Ist.Put.offset + 0, rbp);
|
|
AMD64AMode* am16 = AMD64AMode_IR(stmt->Ist.Put.offset + 16, rbp);
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vLo, am0));
|
|
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vHi, am16));
|
|
return;
|
|
}
|
|
break;
|
|
}
|
|
|
|
/* --------- Indexed PUT --------- */
|
|
case Ist_PutI: {
|
|
IRPutI *puti = stmt->Ist.PutI.details;
|
|
|
|
AMD64AMode* am
|
|
= genGuestArrayOffset(
|
|
env, puti->descr,
|
|
puti->ix, puti->bias );
|
|
|
|
IRType ty = typeOfIRExpr(env->type_env, puti->data);
|
|
if (ty == Ity_F64) {
|
|
HReg val = iselDblExpr(env, puti->data);
|
|
addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, val, am ));
|
|
return;
|
|
}
|
|
if (ty == Ity_I8) {
|
|
HReg r = iselIntExpr_R(env, puti->data);
|
|
addInstr(env, AMD64Instr_Store( 1, r, am ));
|
|
return;
|
|
}
|
|
if (ty == Ity_I64) {
|
|
AMD64RI* ri = iselIntExpr_RI(env, puti->data);
|
|
addInstr(env, AMD64Instr_Alu64M( Aalu_MOV, ri, am ));
|
|
return;
|
|
}
|
|
break;
|
|
}
|
|
|
|
/* --------- TMP --------- */
|
|
case Ist_WrTmp: {
|
|
IRTemp tmp = stmt->Ist.WrTmp.tmp;
|
|
IRType ty = typeOfIRTemp(env->type_env, tmp);
|
|
|
|
/* optimisation: if stmt->Ist.WrTmp.data is Add64(..,..),
|
|
compute it into an AMode and then use LEA. This usually
|
|
produces fewer instructions, often because (for memcheck
|
|
created IR) we get t = address-expression, (t is later used
|
|
twice) and so doing this naturally turns address-expression
|
|
back into an AMD64 amode. */
|
|
if (ty == Ity_I64
|
|
&& stmt->Ist.WrTmp.data->tag == Iex_Binop
|
|
&& stmt->Ist.WrTmp.data->Iex.Binop.op == Iop_Add64) {
|
|
AMD64AMode* am = iselIntExpr_AMode(env, stmt->Ist.WrTmp.data);
|
|
HReg dst = lookupIRTemp(env, tmp);
|
|
if (am->tag == Aam_IR && am->Aam.IR.imm == 0) {
|
|
/* Hmm, iselIntExpr_AMode wimped out and just computed the
|
|
value into a register. Just emit a normal reg-reg move
|
|
so reg-alloc can coalesce it away in the usual way. */
|
|
HReg src = am->Aam.IR.reg;
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst));
|
|
} else {
|
|
addInstr(env, AMD64Instr_Lea64(am,dst));
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (ty == Ity_I64 || ty == Ity_I32
|
|
|| ty == Ity_I16 || ty == Ity_I8) {
|
|
AMD64RMI* rmi = iselIntExpr_RMI(env, stmt->Ist.WrTmp.data);
|
|
HReg dst = lookupIRTemp(env, tmp);
|
|
addInstr(env, AMD64Instr_Alu64R(Aalu_MOV,rmi,dst));
|
|
return;
|
|
}
|
|
if (ty == Ity_I128) {
|
|
HReg rHi, rLo, dstHi, dstLo;
|
|
iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
|
|
lookupIRTempPair( &dstHi, &dstLo, env, tmp);
|
|
addInstr(env, mk_iMOVsd_RR(rHi,dstHi) );
|
|
addInstr(env, mk_iMOVsd_RR(rLo,dstLo) );
|
|
return;
|
|
}
|
|
if (ty == Ity_I1) {
|
|
AMD64CondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
|
|
HReg dst = lookupIRTemp(env, tmp);
|
|
addInstr(env, AMD64Instr_Set64(cond, dst));
|
|
return;
|
|
}
|
|
if (ty == Ity_F64) {
|
|
HReg dst = lookupIRTemp(env, tmp);
|
|
HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
|
|
addInstr(env, mk_vMOVsd_RR(src, dst));
|
|
return;
|
|
}
|
|
if (ty == Ity_F32) {
|
|
HReg dst = lookupIRTemp(env, tmp);
|
|
HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
|
|
addInstr(env, mk_vMOVsd_RR(src, dst));
|
|
return;
|
|
}
|
|
if (ty == Ity_V128) {
|
|
HReg dst = lookupIRTemp(env, tmp);
|
|
HReg src = iselVecExpr(env, stmt->Ist.WrTmp.data);
|
|
addInstr(env, mk_vMOVsd_RR(src, dst));
|
|
return;
|
|
}
|
|
if (ty == Ity_V256) {
|
|
HReg rHi, rLo, dstHi, dstLo;
|
|
iselDVecExpr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
|
|
lookupIRTempPair( &dstHi, &dstLo, env, tmp);
|
|
addInstr(env, mk_vMOVsd_RR(rHi,dstHi) );
|
|
addInstr(env, mk_vMOVsd_RR(rLo,dstLo) );
|
|
return;
|
|
}
|
|
break;
|
|
}
|
|
|
|
/* --------- Call to DIRTY helper --------- */
|
|
case Ist_Dirty: {
|
|
IRDirty* d = stmt->Ist.Dirty.details;
|
|
|
|
/* Figure out the return type, if any. */
|
|
IRType retty = Ity_INVALID;
|
|
if (d->tmp != IRTemp_INVALID)
|
|
retty = typeOfIRTemp(env->type_env, d->tmp);
|
|
|
|
/* Throw out any return types we don't know about. */
|
|
Bool retty_ok = False;
|
|
switch (retty) {
|
|
case Ity_INVALID: /* function doesn't return anything */
|
|
case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
|
|
case Ity_V128: case Ity_V256:
|
|
retty_ok = True; break;
|
|
default:
|
|
break;
|
|
}
|
|
if (!retty_ok)
|
|
break; /* will go to stmt_fail: */
|
|
|
|
/* Marshal args, do the call, and set the return value to
|
|
0x555..555 if this is a conditional call that returns a value
|
|
and the call is skipped. */
|
|
UInt addToSp = 0;
|
|
RetLoc rloc = mk_RetLoc_INVALID();
|
|
doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
|
|
vassert(is_sane_RetLoc(rloc));
|
|
|
|
/* Now figure out what to do with the returned value, if any. */
|
|
switch (retty) {
|
|
case Ity_INVALID: {
|
|
/* No return value. Nothing to do. */
|
|
vassert(d->tmp == IRTemp_INVALID);
|
|
vassert(rloc.pri == RLPri_None);
|
|
vassert(addToSp == 0);
|
|
return;
|
|
}
|
|
case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
|
|
/* The returned value is in %rax. Park it in the register
|
|
associated with tmp. */
|
|
vassert(rloc.pri == RLPri_Int);
|
|
vassert(addToSp == 0);
|
|
HReg dst = lookupIRTemp(env, d->tmp);
|
|
addInstr(env, mk_iMOVsd_RR(hregAMD64_RAX(),dst) );
|
|
return;
|
|
}
|
|
case Ity_V128: {
|
|
/* The returned value is on the stack, and rloc.spOff
|
|
tells us where. Fish it off the stack and then move
|
|
the stack pointer upwards to clear it, as directed by
|
|
doHelperCall. */
|
|
vassert(rloc.pri == RLPri_V128SpRel);
|
|
vassert(addToSp >= 16);
|
|
HReg dst = lookupIRTemp(env, d->tmp);
|
|
AMD64AMode* am = AMD64AMode_IR(rloc.spOff, hregAMD64_RSP());
|
|
addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
|
|
add_to_rsp(env, addToSp);
|
|
return;
|
|
}
|
|
case Ity_V256: {
|
|
/* See comments for Ity_V128. */
|
|
vassert(rloc.pri == RLPri_V256SpRel);
|
|
vassert(addToSp >= 32);
|
|
HReg dstLo, dstHi;
|
|
lookupIRTempPair(&dstHi, &dstLo, env, d->tmp);
|
|
AMD64AMode* amLo = AMD64AMode_IR(rloc.spOff, hregAMD64_RSP());
|
|
addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dstLo, amLo ));
|
|
AMD64AMode* amHi = AMD64AMode_IR(rloc.spOff+16, hregAMD64_RSP());
|
|
addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dstHi, amHi ));
|
|
add_to_rsp(env, addToSp);
|
|
return;
|
|
}
|
|
default:
|
|
/*NOTREACHED*/
|
|
vassert(0);
|
|
}
|
|
break;
|
|
}
|
|
|
|
/* --------- MEM FENCE --------- */
|
|
case Ist_MBE:
|
|
switch (stmt->Ist.MBE.event) {
|
|
case Imbe_Fence:
|
|
addInstr(env, AMD64Instr_MFence());
|
|
return;
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
|
|
/* --------- ACAS --------- */
|
|
case Ist_CAS:
|
|
if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
|
|
/* "normal" singleton CAS */
|
|
UChar sz;
|
|
IRCAS* cas = stmt->Ist.CAS.details;
|
|
IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
|
|
/* get: cas->expd into %rax, and cas->data into %rbx */
|
|
AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
|
|
HReg rData = iselIntExpr_R(env, cas->dataLo);
|
|
HReg rExpd = iselIntExpr_R(env, cas->expdLo);
|
|
HReg rOld = lookupIRTemp(env, cas->oldLo);
|
|
vassert(cas->expdHi == NULL);
|
|
vassert(cas->dataHi == NULL);
|
|
addInstr(env, mk_iMOVsd_RR(rExpd, rOld));
|
|
addInstr(env, mk_iMOVsd_RR(rExpd, hregAMD64_RAX()));
|
|
addInstr(env, mk_iMOVsd_RR(rData, hregAMD64_RBX()));
|
|
switch (ty) {
|
|
case Ity_I64: sz = 8; break;
|
|
case Ity_I32: sz = 4; break;
|
|
case Ity_I16: sz = 2; break;
|
|
case Ity_I8: sz = 1; break;
|
|
default: goto unhandled_cas;
|
|
}
|
|
addInstr(env, AMD64Instr_ACAS(am, sz));
|
|
addInstr(env, AMD64Instr_CMov64(Acc_NZ, hregAMD64_RAX(), rOld));
|
|
return;
|
|
} else {
|
|
/* double CAS */
|
|
UChar sz;
|
|
IRCAS* cas = stmt->Ist.CAS.details;
|
|
IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
|
|
/* only 32-bit and 64-bit allowed in this case */
|
|
/* get: cas->expdLo into %rax, and cas->dataLo into %rbx */
|
|
/* get: cas->expdHi into %rdx, and cas->dataHi into %rcx */
|
|
AMD64AMode* am = iselIntExpr_AMode(env, cas->addr);
|
|
HReg rDataHi = iselIntExpr_R(env, cas->dataHi);
|
|
HReg rDataLo = iselIntExpr_R(env, cas->dataLo);
|
|
HReg rExpdHi = iselIntExpr_R(env, cas->expdHi);
|
|
HReg rExpdLo = iselIntExpr_R(env, cas->expdLo);
|
|
HReg rOldHi = lookupIRTemp(env, cas->oldHi);
|
|
HReg rOldLo = lookupIRTemp(env, cas->oldLo);
|
|
switch (ty) {
|
|
case Ity_I64:
|
|
if (!(env->hwcaps & VEX_HWCAPS_AMD64_CX16))
|
|
goto unhandled_cas; /* we'd have to generate
|
|
cmpxchg16b, but the host
|
|
doesn't support that */
|
|
sz = 8;
|
|
break;
|
|
case Ity_I32:
|
|
sz = 4;
|
|
break;
|
|
default:
|
|
goto unhandled_cas;
|
|
}
|
|
addInstr(env, mk_iMOVsd_RR(rExpdHi, rOldHi));
|
|
addInstr(env, mk_iMOVsd_RR(rExpdLo, rOldLo));
|
|
addInstr(env, mk_iMOVsd_RR(rExpdHi, hregAMD64_RDX()));
|
|
addInstr(env, mk_iMOVsd_RR(rExpdLo, hregAMD64_RAX()));
|
|
addInstr(env, mk_iMOVsd_RR(rDataHi, hregAMD64_RCX()));
|
|
addInstr(env, mk_iMOVsd_RR(rDataLo, hregAMD64_RBX()));
|
|
addInstr(env, AMD64Instr_DACAS(am, sz));
|
|
addInstr(env, AMD64Instr_CMov64(Acc_NZ, hregAMD64_RDX(), rOldHi));
|
|
addInstr(env, AMD64Instr_CMov64(Acc_NZ, hregAMD64_RAX(), rOldLo));
|
|
return;
|
|
}
|
|
unhandled_cas:
|
|
break;
|
|
|
|
/* --------- INSTR MARK --------- */
|
|
/* Doesn't generate any executable code ... */
|
|
case Ist_IMark:
|
|
return;
|
|
|
|
/* --------- ABI HINT --------- */
|
|
/* These have no meaning (denotation in the IR) and so we ignore
|
|
them ... if any actually made it this far. */
|
|
case Ist_AbiHint:
|
|
return;
|
|
|
|
/* --------- NO-OP --------- */
|
|
case Ist_NoOp:
|
|
return;
|
|
|
|
/* --------- EXIT --------- */
|
|
case Ist_Exit: {
|
|
if (stmt->Ist.Exit.dst->tag != Ico_U64)
|
|
vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value");
|
|
|
|
AMD64CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
|
|
AMD64AMode* amRIP = AMD64AMode_IR(stmt->Ist.Exit.offsIP,
|
|
hregAMD64_RBP());
|
|
|
|
/* Case: boring transfer to known address */
|
|
if (stmt->Ist.Exit.jk == Ijk_Boring) {
|
|
if (env->chainingAllowed) {
|
|
/* .. almost always true .. */
|
|
/* Skip the event check at the dst if this is a forwards
|
|
edge. */
|
|
Bool toFastEP
|
|
= ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
|
|
if (0) vex_printf("%s", toFastEP ? "Y" : ",");
|
|
addInstr(env, AMD64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
|
|
amRIP, cc, toFastEP));
|
|
} else {
|
|
/* .. very occasionally .. */
|
|
/* We can't use chaining, so ask for an assisted transfer,
|
|
as that's the only alternative that is allowable. */
|
|
HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
|
|
addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, Ijk_Boring));
|
|
}
|
|
return;
|
|
}
|
|
|
|
/* Case: assisted transfer to arbitrary address */
|
|
switch (stmt->Ist.Exit.jk) {
|
|
/* Keep this list in sync with that in iselNext below */
|
|
case Ijk_ClientReq:
|
|
case Ijk_EmWarn:
|
|
case Ijk_NoDecode:
|
|
case Ijk_NoRedir:
|
|
case Ijk_SigSEGV:
|
|
case Ijk_SigTRAP:
|
|
case Ijk_Sys_syscall:
|
|
case Ijk_Sys_int210:
|
|
case Ijk_InvalICache:
|
|
case Ijk_Yield:
|
|
{
|
|
HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
|
|
addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, stmt->Ist.Exit.jk));
|
|
return;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
/* Do we ever expect to see any other kind? */
|
|
goto stmt_fail;
|
|
}
|
|
|
|
default: break;
|
|
}
|
|
stmt_fail:
|
|
ppIRStmt(stmt);
|
|
vpanic("iselStmt(amd64)");
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------------*/
|
|
/*--- ISEL: Basic block terminators (Nexts) ---*/
|
|
/*---------------------------------------------------------*/
|
|
|
|
static void iselNext ( ISelEnv* env,
|
|
IRExpr* next, IRJumpKind jk, Int offsIP )
|
|
{
|
|
if (vex_traceflags & VEX_TRACE_VCODE) {
|
|
vex_printf( "\n-- PUT(%d) = ", offsIP);
|
|
ppIRExpr( next );
|
|
vex_printf( "; exit-");
|
|
ppIRJumpKind(jk);
|
|
vex_printf( "\n");
|
|
}
|
|
|
|
/* Case: boring transfer to known address */
|
|
if (next->tag == Iex_Const) {
|
|
IRConst* cdst = next->Iex.Const.con;
|
|
vassert(cdst->tag == Ico_U64);
|
|
if (jk == Ijk_Boring || jk == Ijk_Call) {
|
|
/* Boring transfer to known address */
|
|
AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
|
|
if (env->chainingAllowed) {
|
|
/* .. almost always true .. */
|
|
/* Skip the event check at the dst if this is a forwards
|
|
edge. */
|
|
Bool toFastEP
|
|
= ((Addr64)cdst->Ico.U64) > env->max_ga;
|
|
if (0) vex_printf("%s", toFastEP ? "X" : ".");
|
|
addInstr(env, AMD64Instr_XDirect(cdst->Ico.U64,
|
|
amRIP, Acc_ALWAYS,
|
|
toFastEP));
|
|
} else {
|
|
/* .. very occasionally .. */
|
|
/* We can't use chaining, so ask for an indirect transfer,
|
|
as that's the cheapest alternative that is
|
|
allowable. */
|
|
HReg r = iselIntExpr_R(env, next);
|
|
addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS,
|
|
Ijk_Boring));
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* Case: call/return (==boring) transfer to any address */
|
|
switch (jk) {
|
|
case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
|
|
HReg r = iselIntExpr_R(env, next);
|
|
AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
|
|
if (env->chainingAllowed) {
|
|
addInstr(env, AMD64Instr_XIndir(r, amRIP, Acc_ALWAYS));
|
|
} else {
|
|
addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS,
|
|
Ijk_Boring));
|
|
}
|
|
return;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
/* Case: assisted transfer to arbitrary address */
|
|
switch (jk) {
|
|
/* Keep this list in sync with that for Ist_Exit above */
|
|
case Ijk_ClientReq:
|
|
case Ijk_EmWarn:
|
|
case Ijk_NoDecode:
|
|
case Ijk_NoRedir:
|
|
case Ijk_SigSEGV:
|
|
case Ijk_SigTRAP:
|
|
case Ijk_Sys_syscall:
|
|
case Ijk_Sys_int210:
|
|
case Ijk_InvalICache:
|
|
case Ijk_Yield: {
|
|
HReg r = iselIntExpr_R(env, next);
|
|
AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
|
|
addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, jk));
|
|
return;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
vex_printf( "\n-- PUT(%d) = ", offsIP);
|
|
ppIRExpr( next );
|
|
vex_printf( "; exit-");
|
|
ppIRJumpKind(jk);
|
|
vex_printf( "\n");
|
|
vassert(0); // are we expecting any other kind?
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------------*/
|
|
/*--- Insn selector top-level ---*/
|
|
/*---------------------------------------------------------*/
|
|
|
|
/* Translate an entire SB to amd64 code. */
|
|
|
|
HInstrArray* iselSB_AMD64 ( const IRSB* bb,
|
|
VexArch arch_host,
|
|
const VexArchInfo* archinfo_host,
|
|
const VexAbiInfo* vbi/*UNUSED*/,
|
|
Int offs_Host_EvC_Counter,
|
|
Int offs_Host_EvC_FailAddr,
|
|
Bool chainingAllowed,
|
|
Bool addProfInc,
|
|
Addr max_ga )
|
|
{
|
|
Int i, j;
|
|
HReg hreg, hregHI;
|
|
ISelEnv* env;
|
|
UInt hwcaps_host = archinfo_host->hwcaps;
|
|
AMD64AMode *amCounter, *amFailAddr;
|
|
|
|
/* sanity ... */
|
|
vassert(arch_host == VexArchAMD64);
|
|
vassert(0 == (hwcaps_host
|
|
& ~(VEX_HWCAPS_AMD64_SSE3
|
|
| VEX_HWCAPS_AMD64_CX16
|
|
| VEX_HWCAPS_AMD64_LZCNT
|
|
| VEX_HWCAPS_AMD64_AVX
|
|
| VEX_HWCAPS_AMD64_RDTSCP
|
|
| VEX_HWCAPS_AMD64_BMI
|
|
| VEX_HWCAPS_AMD64_AVX2)));
|
|
|
|
/* Check that the host's endianness is as expected. */
|
|
vassert(archinfo_host->endness == VexEndnessLE);
|
|
|
|
/* Make up an initial environment to use. */
|
|
env = LibVEX_Alloc_inline(sizeof(ISelEnv));
|
|
env->vreg_ctr = 0;
|
|
|
|
/* Set up output code array. */
|
|
env->code = newHInstrArray();
|
|
|
|
/* Copy BB's type env. */
|
|
env->type_env = bb->tyenv;
|
|
|
|
/* Make up an IRTemp -> virtual HReg mapping. This doesn't
|
|
change as we go along. */
|
|
env->n_vregmap = bb->tyenv->types_used;
|
|
env->vregmap = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
|
|
env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
|
|
|
|
/* and finally ... */
|
|
env->chainingAllowed = chainingAllowed;
|
|
env->hwcaps = hwcaps_host;
|
|
env->max_ga = max_ga;
|
|
|
|
/* For each IR temporary, allocate a suitably-kinded virtual
|
|
register. */
|
|
j = 0;
|
|
for (i = 0; i < env->n_vregmap; i++) {
|
|
hregHI = hreg = INVALID_HREG;
|
|
switch (bb->tyenv->types[i]) {
|
|
case Ity_I1:
|
|
case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
|
|
hreg = mkHReg(True, HRcInt64, 0, j++);
|
|
break;
|
|
case Ity_I128:
|
|
hreg = mkHReg(True, HRcInt64, 0, j++);
|
|
hregHI = mkHReg(True, HRcInt64, 0, j++);
|
|
break;
|
|
case Ity_F32:
|
|
case Ity_F64:
|
|
case Ity_V128:
|
|
hreg = mkHReg(True, HRcVec128, 0, j++);
|
|
break;
|
|
case Ity_V256:
|
|
hreg = mkHReg(True, HRcVec128, 0, j++);
|
|
hregHI = mkHReg(True, HRcVec128, 0, j++);
|
|
break;
|
|
default:
|
|
ppIRType(bb->tyenv->types[i]);
|
|
vpanic("iselBB(amd64): IRTemp type");
|
|
}
|
|
env->vregmap[i] = hreg;
|
|
env->vregmapHI[i] = hregHI;
|
|
}
|
|
env->vreg_ctr = j;
|
|
|
|
/* The very first instruction must be an event check. */
|
|
amCounter = AMD64AMode_IR(offs_Host_EvC_Counter, hregAMD64_RBP());
|
|
amFailAddr = AMD64AMode_IR(offs_Host_EvC_FailAddr, hregAMD64_RBP());
|
|
addInstr(env, AMD64Instr_EvCheck(amCounter, amFailAddr));
|
|
|
|
/* Possibly a block counter increment (for profiling). At this
|
|
point we don't know the address of the counter, so just pretend
|
|
it is zero. It will have to be patched later, but before this
|
|
translation is used, by a call to LibVEX_patchProfCtr. */
|
|
if (addProfInc) {
|
|
addInstr(env, AMD64Instr_ProfInc());
|
|
}
|
|
|
|
/* Ok, finally we can iterate over the statements. */
|
|
for (i = 0; i < bb->stmts_used; i++)
|
|
if (bb->stmts[i])
|
|
iselStmt(env, bb->stmts[i]);
|
|
|
|
iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
|
|
|
|
/* record the number of vregs we used. */
|
|
env->code->n_vregs = env->vreg_ctr;
|
|
return env->code;
|
|
}
|
|
|
|
|
|
/*---------------------------------------------------------------*/
|
|
/*--- end host_amd64_isel.c ---*/
|
|
/*---------------------------------------------------------------*/
|