Template:SSE2 Instructions

From Chessprogramming wiki
Jump to: navigation, search

x86 and x86-64 - SSE2 Instructions, C-Intrinsic reference from Intel Intrinsics Guide

Mnemonic Description C-Intrinsic
bitwise logical return parameter
pand packed and, r := a & b _m128i _mm_and_si128 (_m128i a, _m128i b)
pandn packed and not, r := ~a & b _m128i _mm_andnot_si128 (_m128i a, _m128i b)
por packed or, r := a | b _m128i _mm_or_si128 (_m128i a, _m128i b)
pxor packed xor, r:= a ^ b _m128i _mm_xor_si128 (_m128i a, _m128i b)
quad word shifts return parameter
psrlq packed shift right logical quad _m128i _mm_srl_epi64 (_m128i a, _m128i cnt)
immediate _m128i _mm_srli_epi64 (_m128i a, int cnt)
psllq packed shift left logical quad _m128i _mm_sll_epi64 (_m128i a, _m128i cnt)
immediate _m128i _mm_slli_epi64 (_m128i a, int cnt)
arithmetical return parameter
paddb packed add bytes _m128i _mm_add_epi8 (_m128i a, _m128i b)
psubb packed subtract bytes _m128i _mm_sub_epi8 (_m128i a, _m128i b)
psadbw packed sum of absolute differences
of bytes into a word
_m128i _mm_sad_epu8 (_m128i a, _m128i b)
pmaxsw packed maximum signed words _m128i _mm_max_epi16 (_m128i a, _m128i b)
pmaxub packed maximum unsigned bytes _m128i _mm_max_epu8 (_m128i a, _m128i b)
pminsw packed minimum signed words _m128i _mm_min_epi16 (_m128i a, _m128i b)
pminub packed minimum unsigned bytes _m128i _mm_min_epu8 (_m128i a, _m128i b)
pcmpeqb packed compare equal bytes _m128i _mm_cmpeq_epi8 (_m128i a, _m128i b)
pmullw packed multiply mow signed (unsigned) word _m128i _mm_mullo_epi16 (_m128i a, _m128i b)
pmulhw packed multiply high signed word _m128i _mm_mulhi_epi16 (_m128i a, _m128i b)
pmulhuw packed multiply high unsigned word _m128i _mm_mulhi_epu16 (_m128i a, _m128i b)
pmaddwd packed multiply words and add doublewords _m128 _mm_madd_epi16 (_m128i a, _m128i b)
unpack, shuffle return parameter
punpcklbw unpack and interleave low bytes
gGhHfFeE:dDcCbBaA :=
xxxxxxxx:GHFEDCBA #
_m128i _mm_unpacklo_epi8 (_m128i A, _m128i a)
punpckhbw unpack and interleave high bytes
gGhHfFeE:dDcCbBaA :=
GHFEDCBA:xxxxxxxx #
_m128i _mm_unpackhi_epi8 (_m128i A, _m128i a)
punpcklwd unpack and interleave low words
dDcC:bBaA := xxxx:DCBA#xxxx:dcba
_m128i _mm_unpacklo_epi16 (_m128i A, _m128i a)
punpckhwd unpack and interleave high words
dDcC:bBaA := DCBA:xxxx#dcba:xxxx
_m128i _mm_unpackhi_epi16 (_m128i A, _m128i a)
punpckldq unpack and interleave low doublewords
bB:aA := xx:BA # xx:ba
_m128i _mm_unpacklo_epi32 (_m128i A, _m128i a)
punpckhdq unpack and interleave high doublewords
bB:aA := BA:xx # ba:xx
_m128i _mm_unpackhi_epi32 (_m128i A, _m128i a)
punpcklqdq unpack and interleave low quadwords
a:A := x:A # x:a
_m128i _mm_unpacklo_epi64 (_m128i A, _m128i a)
punpckhqdq unpack and interleave high quadwords
a:A := A:x # a:x
_m128i _mm_unpackhi_epi64 (_m128i A, _m128i a)
pshuflw packed shuffle low words _m128i _mm_shufflelo_epi16 (_m128i a, int imm)
pshufhw packed shuffle high words _m128i _mm_shufflehi_epi16 (_m128i a, int imm)
pshufd packed shuffle doublewords _m128i _mm_shuffle_epi32 (_m128i a, int imm)
load, store, moves return parameter
movdqa move aligned double quadword
xmm := *p
_m128i _mm_load_si128 (_m128i const *p)
movdqu move unaligned double quadword
xmm := *p
_m128i _mm_loadu_si128 (_m128i const*p)
movdqa move aligned double quadword
*p := xmm
void _mm_store_si128 (_m128i *p, _m128i a)
movdqu move unaligned double quadword
*p := xmm
void _mm_storeu_si128 (_m128i *p, _m128i a)
movq move quadword, xmm := gp64 _m128i _mm_cvtsi64_si128 (_int64 a)
movq move quadword, gp64 := xmm _int64 _mm_cvtsi128_si64 (_m128i a)
movd move double word or quadword
xmm := gp64
_m128i _mm_cvtsi64x_si128 (_int64 value)
movd move doubleword, xmm := gp32 _m128i _mm_cvtsi32_si128 (int a)
movd move doubleword, gp32 := xmm int _mm_cvtsi128_si32 (_m128i a)
pextrw extract packed word, gp16 := xmm[i] int _mm_extract_epi16 (_m128i a, int imm)
pinsrw packed insert word, xmm[i] := gp16 _m128i _mm_insert_epi16 (_m128i a, int b, int imm)
pmovmskb packed move mask byte,
gp32 := 16 sign-bits(xmm)
int _mm_movemask_epi (_m128i a)
cache support return parameter
prefetch void _mm_prefetch (char const* p , int i)