LCOV - code coverage report
Current view: top level - usr/lib/gcc/i586-linux-gnu/4.9/include - smmintrin.h (source / functions) Hit Total Coverage
Test: PostgreSQL Lines: 2 2 100.0 %
Date: 2017-09-29 15:12:54 Functions: 0 0 -
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* Copyright (C) 2007-2014 Free Software Foundation, Inc.
       2             : 
       3             :    This file is part of GCC.
       4             : 
       5             :    GCC is free software; you can redistribute it and/or modify
       6             :    it under the terms of the GNU General Public License as published by
       7             :    the Free Software Foundation; either version 3, or (at your option)
       8             :    any later version.
       9             : 
      10             :    GCC is distributed in the hope that it will be useful,
      11             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      12             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13             :    GNU General Public License for more details.
      14             : 
      15             :    Under Section 7 of GPL version 3, you are granted additional
      16             :    permissions described in the GCC Runtime Library Exception, version
      17             :    3.1, as published by the Free Software Foundation.
      18             : 
      19             :    You should have received a copy of the GNU General Public License and
      20             :    a copy of the GCC Runtime Library Exception along with this program;
      21             :    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      22             :    <http://www.gnu.org/licenses/>.  */
      23             : 
      24             : /* Implemented from the specification included in the Intel C++ Compiler
      25             :    User Guide and Reference, version 10.0.  */
      26             : 
      27             : #ifndef _SMMINTRIN_H_INCLUDED
      28             : #define _SMMINTRIN_H_INCLUDED
      29             : 
      30             : /* We need definitions from the SSSE3, SSE3, SSE2 and SSE header
      31             :    files.  */
      32             : #include <tmmintrin.h>
      33             : 
      34             : #ifndef __SSE4_1__
      35             : #pragma GCC push_options
      36             : #pragma GCC target("sse4.1")
      37             : #define __DISABLE_SSE4_1__
      38             : #endif /* __SSE4_1__ */
      39             : 
      40             : /* Rounding mode macros. */
      41             : #define _MM_FROUND_TO_NEAREST_INT   0x00
      42             : #define _MM_FROUND_TO_NEG_INF       0x01
      43             : #define _MM_FROUND_TO_POS_INF       0x02
      44             : #define _MM_FROUND_TO_ZERO      0x03
      45             : #define _MM_FROUND_CUR_DIRECTION    0x04
      46             : 
      47             : #define _MM_FROUND_RAISE_EXC        0x00
      48             : #define _MM_FROUND_NO_EXC       0x08
      49             : 
      50             : #define _MM_FROUND_NINT     \
      51             :   (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
      52             : #define _MM_FROUND_FLOOR    \
      53             :   (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
      54             : #define _MM_FROUND_CEIL     \
      55             :   (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
      56             : #define _MM_FROUND_TRUNC    \
      57             :   (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
      58             : #define _MM_FROUND_RINT     \
      59             :   (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
      60             : #define _MM_FROUND_NEARBYINT    \
      61             :   (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
      62             : 
      63             : /* Test Instruction */
      64             : /* Packed integer 128-bit bitwise comparison. Return 1 if
      65             :    (__V & __M) == 0.  */
      66             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      67             : _mm_testz_si128 (__m128i __M, __m128i __V)
      68             : {
      69             :   return __builtin_ia32_ptestz128 ((__v2di)__M, (__v2di)__V);
      70             : }
      71             : 
      72             : /* Packed integer 128-bit bitwise comparison. Return 1 if
      73             :    (__V & ~__M) == 0.  */
      74             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      75             : _mm_testc_si128 (__m128i __M, __m128i __V)
      76             : {
      77             :   return __builtin_ia32_ptestc128 ((__v2di)__M, (__v2di)__V);
      78             : }
      79             : 
      80             : /* Packed integer 128-bit bitwise comparison. Return 1 if
      81             :    (__V & __M) != 0 && (__V & ~__M) != 0.  */
      82             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      83             : _mm_testnzc_si128 (__m128i __M, __m128i __V)
      84             : {
      85             :   return __builtin_ia32_ptestnzc128 ((__v2di)__M, (__v2di)__V);
      86             : }
      87             : 
      88             : /* Macros for packed integer 128-bit comparison intrinsics.  */
      89             : #define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
      90             : 
      91             : #define _mm_test_all_ones(V) \
      92             :   _mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V)))
      93             : 
      94             : #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V))
      95             : 
      96             : /* Packed/scalar double precision floating point rounding.  */
      97             : 
      98             : #ifdef __OPTIMIZE__
      99             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     100             : _mm_round_pd (__m128d __V, const int __M)
     101             : {
     102             :   return (__m128d) __builtin_ia32_roundpd ((__v2df)__V, __M);
     103             : }
     104             : 
     105             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     106             : _mm_round_sd(__m128d __D, __m128d __V, const int __M)
     107             : {
     108             :   return (__m128d) __builtin_ia32_roundsd ((__v2df)__D,
     109             :                        (__v2df)__V,
     110             :                        __M);
     111             : }
     112             : #else
     113             : #define _mm_round_pd(V, M) \
     114             :   ((__m128d) __builtin_ia32_roundpd ((__v2df)(__m128d)(V), (int)(M)))
     115             : 
     116             : #define _mm_round_sd(D, V, M)                       \
     117             :   ((__m128d) __builtin_ia32_roundsd ((__v2df)(__m128d)(D),      \
     118             :                      (__v2df)(__m128d)(V), (int)(M)))
     119             : #endif
     120             : 
     121             : /* Packed/scalar single precision floating point rounding.  */
     122             : 
     123             : #ifdef __OPTIMIZE__
     124             : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     125             : _mm_round_ps (__m128 __V, const int __M)
     126             : {
     127             :   return (__m128) __builtin_ia32_roundps ((__v4sf)__V, __M);
     128             : }
     129             : 
     130             : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     131             : _mm_round_ss (__m128 __D, __m128 __V, const int __M)
     132             : {
     133             :   return (__m128) __builtin_ia32_roundss ((__v4sf)__D,
     134             :                       (__v4sf)__V,
     135             :                       __M);
     136             : }
     137             : #else
     138             : #define _mm_round_ps(V, M) \
     139             :   ((__m128) __builtin_ia32_roundps ((__v4sf)(__m128)(V), (int)(M)))
     140             : 
     141             : #define _mm_round_ss(D, V, M)                       \
     142             :   ((__m128) __builtin_ia32_roundss ((__v4sf)(__m128)(D),        \
     143             :                     (__v4sf)(__m128)(V), (int)(M)))
     144             : #endif
     145             : 
     146             : /* Macros for ceil/floor intrinsics.  */
     147             : #define _mm_ceil_pd(V)     _mm_round_pd ((V), _MM_FROUND_CEIL)
     148             : #define _mm_ceil_sd(D, V)  _mm_round_sd ((D), (V), _MM_FROUND_CEIL)
     149             : 
     150             : #define _mm_floor_pd(V)    _mm_round_pd((V), _MM_FROUND_FLOOR)
     151             : #define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR)
     152             : 
     153             : #define _mm_ceil_ps(V)     _mm_round_ps ((V), _MM_FROUND_CEIL)
     154             : #define _mm_ceil_ss(D, V)  _mm_round_ss ((D), (V), _MM_FROUND_CEIL)
     155             : 
     156             : #define _mm_floor_ps(V)    _mm_round_ps ((V), _MM_FROUND_FLOOR)
     157             : #define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR)
     158             : 
     159             : /* SSE4.1 */
     160             : 
     161             : /* Integer blend instructions - select data from 2 sources using
     162             :    constant/variable mask.  */
     163             : 
     164             : #ifdef __OPTIMIZE__
     165             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     166             : _mm_blend_epi16 (__m128i __X, __m128i __Y, const int __M)
     167             : {
     168             :   return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__X,
     169             :                           (__v8hi)__Y,
     170             :                           __M);
     171             : }
     172             : #else
     173             : #define _mm_blend_epi16(X, Y, M)                    \
     174             :   ((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(X),       \
     175             :                     (__v8hi)(__m128i)(Y), (int)(M)))
     176             : #endif
     177             : 
     178             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     179             : _mm_blendv_epi8 (__m128i __X, __m128i __Y, __m128i __M)
     180             : {
     181             :   return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__X,
     182             :                            (__v16qi)__Y,
     183             :                            (__v16qi)__M);
     184             : }
     185             : 
     186             : /* Single precision floating point blend instructions - select data
     187             :    from 2 sources using constant/variable mask.  */
     188             : 
     189             : #ifdef __OPTIMIZE__
     190             : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     191             : _mm_blend_ps (__m128 __X, __m128 __Y, const int __M)
     192             : {
     193             :   return (__m128) __builtin_ia32_blendps ((__v4sf)__X,
     194             :                       (__v4sf)__Y,
     195             :                       __M);
     196             : }
     197             : #else
     198             : #define _mm_blend_ps(X, Y, M)                       \
     199             :   ((__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(X),        \
     200             :                     (__v4sf)(__m128)(Y), (int)(M)))
     201             : #endif
     202             : 
     203             : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     204             : _mm_blendv_ps (__m128 __X, __m128 __Y, __m128 __M)
     205             : {
     206             :   return (__m128) __builtin_ia32_blendvps ((__v4sf)__X,
     207             :                        (__v4sf)__Y,
     208             :                        (__v4sf)__M);
     209             : }
     210             : 
     211             : /* Double precision floating point blend instructions - select data
     212             :    from 2 sources using constant/variable mask.  */
     213             : 
     214             : #ifdef __OPTIMIZE__
     215             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     216             : _mm_blend_pd (__m128d __X, __m128d __Y, const int __M)
     217             : {
     218             :   return (__m128d) __builtin_ia32_blendpd ((__v2df)__X,
     219             :                        (__v2df)__Y,
     220             :                        __M);
     221             : }
     222             : #else
     223             : #define _mm_blend_pd(X, Y, M)                       \
     224             :   ((__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(X),      \
     225             :                      (__v2df)(__m128d)(Y), (int)(M)))
     226             : #endif
     227             : 
     228             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     229             : _mm_blendv_pd (__m128d __X, __m128d __Y, __m128d __M)
     230             : {
     231             :   return (__m128d) __builtin_ia32_blendvpd ((__v2df)__X,
     232             :                         (__v2df)__Y,
     233             :                         (__v2df)__M);
     234             : }
     235             : 
     236             : /* Dot product instructions with mask-defined summing and zeroing parts
     237             :    of result.  */
     238             : 
     239             : #ifdef __OPTIMIZE__
     240             : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     241             : _mm_dp_ps (__m128 __X, __m128 __Y, const int __M)
     242             : {
     243             :   return (__m128) __builtin_ia32_dpps ((__v4sf)__X,
     244             :                        (__v4sf)__Y,
     245             :                        __M);
     246             : }
     247             : 
     248             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     249             : _mm_dp_pd (__m128d __X, __m128d __Y, const int __M)
     250             : {
     251             :   return (__m128d) __builtin_ia32_dppd ((__v2df)__X,
     252             :                     (__v2df)__Y,
     253             :                     __M);
     254             : }
     255             : #else
     256             : #define _mm_dp_ps(X, Y, M)                      \
     257             :   ((__m128) __builtin_ia32_dpps ((__v4sf)(__m128)(X),           \
     258             :                  (__v4sf)(__m128)(Y), (int)(M)))
     259             : 
     260             : #define _mm_dp_pd(X, Y, M)                      \
     261             :   ((__m128d) __builtin_ia32_dppd ((__v2df)(__m128d)(X),         \
     262             :                   (__v2df)(__m128d)(Y), (int)(M)))
     263             : #endif
     264             : 
     265             : /* Packed integer 64-bit comparison, zeroing or filling with ones
     266             :    corresponding parts of result.  */
     267             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     268             : _mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
     269             : {
     270             :   return (__m128i) __builtin_ia32_pcmpeqq ((__v2di)__X, (__v2di)__Y);
     271             : }
     272             : 
     273             : /*  Min/max packed integer instructions.  */
     274             : 
     275             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     276             : _mm_min_epi8 (__m128i __X, __m128i __Y)
     277             : {
     278             :   return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y);
     279             : }
     280             : 
     281             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     282             : _mm_max_epi8 (__m128i __X, __m128i __Y)
     283             : {
     284             :   return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi)__X, (__v16qi)__Y);
     285             : }
     286             : 
     287             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     288             : _mm_min_epu16 (__m128i __X, __m128i __Y)
     289             : {
     290             :   return (__m128i) __builtin_ia32_pminuw128 ((__v8hi)__X, (__v8hi)__Y);
     291             : }
     292             : 
     293             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     294             : _mm_max_epu16 (__m128i __X, __m128i __Y)
     295             : {
     296             :   return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi)__X, (__v8hi)__Y);
     297             : }
     298             : 
     299             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     300             : _mm_min_epi32 (__m128i __X, __m128i __Y)
     301             : {
     302             :   return (__m128i) __builtin_ia32_pminsd128 ((__v4si)__X, (__v4si)__Y);
     303             : }
     304             : 
     305             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     306             : _mm_max_epi32 (__m128i __X, __m128i __Y)
     307             : {
     308             :   return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si)__X, (__v4si)__Y);
     309             : }
     310             : 
     311             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     312             : _mm_min_epu32 (__m128i __X, __m128i __Y)
     313             : {
     314             :   return (__m128i) __builtin_ia32_pminud128 ((__v4si)__X, (__v4si)__Y);
     315             : }
     316             : 
     317             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     318             : _mm_max_epu32 (__m128i __X, __m128i __Y)
     319             : {
     320             :   return (__m128i) __builtin_ia32_pmaxud128 ((__v4si)__X, (__v4si)__Y);
     321             : }
     322             : 
     323             : /* Packed integer 32-bit multiplication with truncation of upper
     324             :    halves of results.  */
     325             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     326             : _mm_mullo_epi32 (__m128i __X, __m128i __Y)
     327             : {
     328             :   return (__m128i) __builtin_ia32_pmulld128 ((__v4si)__X, (__v4si)__Y);
     329             : }
     330             : 
     331             : /* Packed integer 32-bit multiplication of 2 pairs of operands
     332             :    with two 64-bit results.  */
     333             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     334             : _mm_mul_epi32 (__m128i __X, __m128i __Y)
     335             : {
     336             :   return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__X, (__v4si)__Y);
     337             : }
     338             : 
     339             : /* Insert single precision float into packed single precision array
     340             :    element selected by index N.  The bits [7-6] of N define S
     341             :    index, the bits [5-4] define D index, and bits [3-0] define
     342             :    zeroing mask for D.  */
     343             : 
     344             : #ifdef __OPTIMIZE__
     345             : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     346             : _mm_insert_ps (__m128 __D, __m128 __S, const int __N)
     347             : {
     348             :   return (__m128) __builtin_ia32_insertps128 ((__v4sf)__D,
     349             :                           (__v4sf)__S,
     350             :                           __N);
     351             : }
     352             : #else
     353             : #define _mm_insert_ps(D, S, N)                      \
     354             :   ((__m128) __builtin_ia32_insertps128 ((__v4sf)(__m128)(D),        \
     355             :                     (__v4sf)(__m128)(S), (int)(N)))
     356             : #endif
     357             : 
     358             : /* Helper macro to create the N value for _mm_insert_ps.  */
     359             : #define _MM_MK_INSERTPS_NDX(S, D, M) (((S) << 6) | ((D) << 4) | (M))
     360             : 
     361             : /* Extract binary representation of single precision float from packed
     362             :    single precision array element of X selected by index N.  */
     363             : 
     364             : #ifdef __OPTIMIZE__
     365             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     366             : _mm_extract_ps (__m128 __X, const int __N)
     367             : {
     368             :   union { int i; float f; } __tmp;
     369             :   __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)__X, __N);
     370             :   return __tmp.i;
     371             : }
     372             : #else
     373             : #define _mm_extract_ps(X, N)                        \
     374             :   (__extension__                            \
     375             :    ({                                   \
     376             :      union { int i; float f; } __tmp;                   \
     377             :      __tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)(__m128)(X), (int)(N)); \
     378             :      __tmp.i;                               \
     379             :    }))
     380             : #endif
     381             : 
     382             : /* Extract binary representation of single precision float into
     383             :    D from packed single precision array element of S selected
     384             :    by index N.  */
     385             : #define _MM_EXTRACT_FLOAT(D, S, N) \
     386             :   { (D) = __builtin_ia32_vec_ext_v4sf ((__v4sf)(S), (N)); }
     387             :   
     388             : /* Extract specified single precision float element into the lower
     389             :    part of __m128.  */
     390             : #define _MM_PICK_OUT_PS(X, N)               \
     391             :   _mm_insert_ps (_mm_setzero_ps (), (X),        \
     392             :          _MM_MK_INSERTPS_NDX ((N), 0, 0x0e))
     393             : 
     394             : /* Insert integer, S, into packed integer array element of D
     395             :    selected by index N.  */
     396             : 
     397             : #ifdef __OPTIMIZE__
     398             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     399             : _mm_insert_epi8 (__m128i __D, int __S, const int __N)
     400             : {
     401             :   return (__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)__D,
     402             :                          __S, __N);
     403             : }
     404             : 
     405             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     406             : _mm_insert_epi32 (__m128i __D, int __S, const int __N)
     407             : {
     408             :   return (__m128i) __builtin_ia32_vec_set_v4si ((__v4si)__D,
     409             :                          __S, __N);
     410             : }
     411             : 
     412             : #ifdef __x86_64__
     413             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     414             : _mm_insert_epi64 (__m128i __D, long long __S, const int __N)
     415             : {
     416             :   return (__m128i) __builtin_ia32_vec_set_v2di ((__v2di)__D,
     417             :                          __S, __N);
     418             : }
     419             : #endif
     420             : #else
     421             : #define _mm_insert_epi8(D, S, N)                    \
     422             :   ((__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)(__m128i)(D),   \
     423             :                        (int)(S), (int)(N)))
     424             : 
     425             : #define _mm_insert_epi32(D, S, N)               \
     426             :   ((__m128i) __builtin_ia32_vec_set_v4si ((__v4si)(__m128i)(D), \
     427             :                       (int)(S), (int)(N)))
     428             : 
     429             : #ifdef __x86_64__
     430             : #define _mm_insert_epi64(D, S, N)                   \
     431             :   ((__m128i) __builtin_ia32_vec_set_v2di ((__v2di)(__m128i)(D),     \
     432             :                       (long long)(S), (int)(N)))
     433             : #endif
     434             : #endif
     435             : 
     436             : /* Extract integer from packed integer array element of X selected by
     437             :    index N.  */
     438             : 
     439             : #ifdef __OPTIMIZE__
     440             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     441             : _mm_extract_epi8 (__m128i __X, const int __N)
     442             : {
     443             :    return (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N);
     444             : }
     445             : 
     446             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     447             : _mm_extract_epi32 (__m128i __X, const int __N)
     448             : {
     449             :    return __builtin_ia32_vec_ext_v4si ((__v4si)__X, __N);
     450             : }
     451             : 
     452             : #ifdef __x86_64__
     453             : extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     454             : _mm_extract_epi64 (__m128i __X, const int __N)
     455             : {
     456             :   return __builtin_ia32_vec_ext_v2di ((__v2di)__X, __N);
     457             : }
     458             : #endif
     459             : #else
     460             : #define _mm_extract_epi8(X, N) \
     461             :   ((int) (unsigned char) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N)))
     462             : #define _mm_extract_epi32(X, N) \
     463             :   ((int) __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N)))
     464             : 
     465             : #ifdef __x86_64__
     466             : #define _mm_extract_epi64(X, N) \
     467             :   ((long long) __builtin_ia32_vec_ext_v2di ((__v2di)(__m128i)(X), (int)(N)))
     468             : #endif
     469             : #endif
     470             : 
     471             : /* Return horizontal packed word minimum and its index in bits [15:0]
     472             :    and bits [18:16] respectively.  */
     473             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     474             : _mm_minpos_epu16 (__m128i __X)
     475             : {
     476             :   return (__m128i) __builtin_ia32_phminposuw128 ((__v8hi)__X);
     477             : }
     478             : 
     479             : /* Packed integer sign-extension.  */
     480             : 
     481             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     482             : _mm_cvtepi8_epi32 (__m128i __X)
     483             : {
     484             :   return (__m128i) __builtin_ia32_pmovsxbd128 ((__v16qi)__X);
     485             : }
     486             : 
     487             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     488             : _mm_cvtepi16_epi32 (__m128i __X)
     489             : {
     490             :   return (__m128i) __builtin_ia32_pmovsxwd128 ((__v8hi)__X);
     491             : }
     492             : 
     493             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     494             : _mm_cvtepi8_epi64 (__m128i __X)
     495             : {
     496             :   return (__m128i) __builtin_ia32_pmovsxbq128 ((__v16qi)__X);
     497             : }
     498             : 
     499             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     500             : _mm_cvtepi32_epi64 (__m128i __X)
     501             : {
     502             :   return (__m128i) __builtin_ia32_pmovsxdq128 ((__v4si)__X);
     503             : }
     504             : 
     505             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     506             : _mm_cvtepi16_epi64 (__m128i __X)
     507             : {
     508             :   return (__m128i) __builtin_ia32_pmovsxwq128 ((__v8hi)__X);
     509             : }
     510             : 
     511             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     512             : _mm_cvtepi8_epi16 (__m128i __X)
     513             : {
     514             :   return (__m128i) __builtin_ia32_pmovsxbw128 ((__v16qi)__X);
     515             : }
     516             : 
     517             : /* Packed integer zero-extension. */
     518             : 
     519             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     520             : _mm_cvtepu8_epi32 (__m128i __X)
     521             : {
     522             :   return (__m128i) __builtin_ia32_pmovzxbd128 ((__v16qi)__X);
     523             : }
     524             : 
     525             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     526             : _mm_cvtepu16_epi32 (__m128i __X)
     527             : {
     528             :   return (__m128i) __builtin_ia32_pmovzxwd128 ((__v8hi)__X);
     529             : }
     530             : 
     531             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     532             : _mm_cvtepu8_epi64 (__m128i __X)
     533             : {
     534             :   return (__m128i) __builtin_ia32_pmovzxbq128 ((__v16qi)__X);
     535             : }
     536             : 
     537             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     538             : _mm_cvtepu32_epi64 (__m128i __X)
     539             : {
     540             :   return (__m128i) __builtin_ia32_pmovzxdq128 ((__v4si)__X);
     541             : }
     542             : 
     543             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     544             : _mm_cvtepu16_epi64 (__m128i __X)
     545             : {
     546             :   return (__m128i) __builtin_ia32_pmovzxwq128 ((__v8hi)__X);
     547             : }
     548             : 
     549             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     550             : _mm_cvtepu8_epi16 (__m128i __X)
     551             : {
     552             :   return (__m128i) __builtin_ia32_pmovzxbw128 ((__v16qi)__X);
     553             : }
     554             : 
     555             : /* Pack 8 double words from 2 operands into 8 words of result with
     556             :    unsigned saturation. */
     557             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     558             : _mm_packus_epi32 (__m128i __X, __m128i __Y)
     559             : {
     560             :   return (__m128i) __builtin_ia32_packusdw128 ((__v4si)__X, (__v4si)__Y);
     561             : }
     562             : 
     563             : /* Sum absolute 8-bit integer difference of adjacent groups of 4
     564             :    byte integers in the first 2 operands.  Starting offsets within
     565             :    operands are determined by the 3rd mask operand.  */
     566             : 
     567             : #ifdef __OPTIMIZE__
     568             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     569             : _mm_mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M)
     570             : {
     571             :   return (__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)__X,
     572             :                           (__v16qi)__Y, __M);
     573             : }
     574             : #else
     575             : #define _mm_mpsadbw_epu8(X, Y, M)                   \
     576             :   ((__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)(__m128i)(X),      \
     577             :                     (__v16qi)(__m128i)(Y), (int)(M)))
     578             : #endif
     579             : 
     580             : /* Load double quadword using non-temporal aligned hint.  */
     581             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     582             : _mm_stream_load_si128 (__m128i *__X)
     583             : {
     584             :   return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X);
     585             : }
     586             : 
     587             : #ifndef __SSE4_2__
     588             : #pragma GCC push_options
     589             : #pragma GCC target("sse4.2")
     590             : #define __DISABLE_SSE4_2__
     591             : #endif /* __SSE4_2__ */
     592             : 
     593             : /* These macros specify the source data format.  */
     594             : #define _SIDD_UBYTE_OPS         0x00
     595             : #define _SIDD_UWORD_OPS         0x01
     596             : #define _SIDD_SBYTE_OPS         0x02
     597             : #define _SIDD_SWORD_OPS         0x03
     598             : 
     599             : /* These macros specify the comparison operation.  */
     600             : #define _SIDD_CMP_EQUAL_ANY     0x00
     601             : #define _SIDD_CMP_RANGES        0x04
     602             : #define _SIDD_CMP_EQUAL_EACH        0x08
     603             : #define _SIDD_CMP_EQUAL_ORDERED     0x0c
     604             : 
     605             : /* These macros specify the polarity.  */
     606             : #define _SIDD_POSITIVE_POLARITY     0x00
     607             : #define _SIDD_NEGATIVE_POLARITY     0x10
     608             : #define _SIDD_MASKED_POSITIVE_POLARITY  0x20
     609             : #define _SIDD_MASKED_NEGATIVE_POLARITY  0x30
     610             : 
     611             : /* These macros specify the output selection in _mm_cmpXstri ().  */
     612             : #define _SIDD_LEAST_SIGNIFICANT     0x00
     613             : #define _SIDD_MOST_SIGNIFICANT      0x40
     614             : 
     615             : /* These macros specify the output selection in _mm_cmpXstrm ().  */
     616             : #define _SIDD_BIT_MASK          0x00
     617             : #define _SIDD_UNIT_MASK         0x40
     618             : 
     619             : /* Intrinsics for text/string processing.  */
     620             : 
     621             : #ifdef __OPTIMIZE__
     622             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     623             : _mm_cmpistrm (__m128i __X, __m128i __Y, const int __M)
     624             : {
     625             :   return (__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)__X,
     626             :                         (__v16qi)__Y,
     627             :                         __M);
     628             : }
     629             : 
     630             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     631             : _mm_cmpistri (__m128i __X, __m128i __Y, const int __M)
     632             : {
     633             :   return __builtin_ia32_pcmpistri128 ((__v16qi)__X,
     634             :                       (__v16qi)__Y,
     635             :                       __M);
     636             : }
     637             : 
     638             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     639             : _mm_cmpestrm (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
     640             : {
     641             :   return (__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)__X, __LX,
     642             :                         (__v16qi)__Y, __LY,
     643             :                         __M);
     644             : }
     645             : 
     646             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     647             : _mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
     648             : {
     649             :   return __builtin_ia32_pcmpestri128 ((__v16qi)__X, __LX,
     650             :                       (__v16qi)__Y, __LY,
     651             :                       __M);
     652             : }
     653             : #else
     654             : #define _mm_cmpistrm(X, Y, M)                       \
     655             :   ((__m128i) __builtin_ia32_pcmpistrm128 ((__v16qi)(__m128i)(X),    \
     656             :                       (__v16qi)(__m128i)(Y), (int)(M)))
     657             : #define _mm_cmpistri(X, Y, M)                       \
     658             :   ((int) __builtin_ia32_pcmpistri128 ((__v16qi)(__m128i)(X),        \
     659             :                       (__v16qi)(__m128i)(Y), (int)(M)))
     660             : 
     661             : #define _mm_cmpestrm(X, LX, Y, LY, M)                   \
     662             :   ((__m128i) __builtin_ia32_pcmpestrm128 ((__v16qi)(__m128i)(X),    \
     663             :                       (int)(LX), (__v16qi)(__m128i)(Y), \
     664             :                       (int)(LY), (int)(M)))
     665             : #define _mm_cmpestri(X, LX, Y, LY, M)                   \
     666             :   ((int) __builtin_ia32_pcmpestri128 ((__v16qi)(__m128i)(X), (int)(LX), \
     667             :                       (__v16qi)(__m128i)(Y), (int)(LY), \
     668             :                       (int)(M)))
     669             : #endif
     670             : 
     671             : /* Intrinsics for text/string processing and reading values of
     672             :    EFlags.  */
     673             : 
     674             : #ifdef __OPTIMIZE__
     675             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     676             : _mm_cmpistra (__m128i __X, __m128i __Y, const int __M)
     677             : {
     678             :   return __builtin_ia32_pcmpistria128 ((__v16qi)__X,
     679             :                        (__v16qi)__Y,
     680             :                        __M);
     681             : }
     682             : 
     683             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     684             : _mm_cmpistrc (__m128i __X, __m128i __Y, const int __M)
     685             : {
     686             :   return __builtin_ia32_pcmpistric128 ((__v16qi)__X,
     687             :                        (__v16qi)__Y,
     688             :                        __M);
     689             : }
     690             : 
     691             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     692             : _mm_cmpistro (__m128i __X, __m128i __Y, const int __M)
     693             : {
     694             :   return __builtin_ia32_pcmpistrio128 ((__v16qi)__X,
     695             :                        (__v16qi)__Y,
     696             :                        __M);
     697             : }
     698             : 
     699             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     700             : _mm_cmpistrs (__m128i __X, __m128i __Y, const int __M)
     701             : {
     702             :   return __builtin_ia32_pcmpistris128 ((__v16qi)__X,
     703             :                        (__v16qi)__Y,
     704             :                        __M);
     705             : }
     706             : 
     707             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     708             : _mm_cmpistrz (__m128i __X, __m128i __Y, const int __M)
     709             : {
     710             :   return __builtin_ia32_pcmpistriz128 ((__v16qi)__X,
     711             :                        (__v16qi)__Y,
     712             :                        __M);
     713             : }
     714             : 
     715             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     716             : _mm_cmpestra (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
     717             : {
     718             :   return __builtin_ia32_pcmpestria128 ((__v16qi)__X, __LX,
     719             :                        (__v16qi)__Y, __LY,
     720             :                        __M);
     721             : }
     722             : 
     723             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     724             : _mm_cmpestrc (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
     725             : {
     726             :   return __builtin_ia32_pcmpestric128 ((__v16qi)__X, __LX,
     727             :                        (__v16qi)__Y, __LY,
     728             :                        __M);
     729             : }
     730             : 
     731             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     732             : _mm_cmpestro (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
     733             : {
     734             :   return __builtin_ia32_pcmpestrio128 ((__v16qi)__X, __LX,
     735             :                        (__v16qi)__Y, __LY,
     736             :                        __M);
     737             : }
     738             : 
     739             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     740             : _mm_cmpestrs (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
     741             : {
     742             :   return __builtin_ia32_pcmpestris128 ((__v16qi)__X, __LX,
     743             :                        (__v16qi)__Y, __LY,
     744             :                        __M);
     745             : }
     746             : 
     747             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     748             : _mm_cmpestrz (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M)
     749             : {
     750             :   return __builtin_ia32_pcmpestriz128 ((__v16qi)__X, __LX,
     751             :                        (__v16qi)__Y, __LY,
     752             :                        __M);
     753             : }
     754             : #else
     755             : #define _mm_cmpistra(X, Y, M)                       \
     756             :   ((int) __builtin_ia32_pcmpistria128 ((__v16qi)(__m128i)(X),       \
     757             :                        (__v16qi)(__m128i)(Y), (int)(M)))
     758             : #define _mm_cmpistrc(X, Y, M)                       \
     759             :   ((int) __builtin_ia32_pcmpistric128 ((__v16qi)(__m128i)(X),       \
     760             :                        (__v16qi)(__m128i)(Y), (int)(M)))
     761             : #define _mm_cmpistro(X, Y, M)                       \
     762             :   ((int) __builtin_ia32_pcmpistrio128 ((__v16qi)(__m128i)(X),       \
     763             :                        (__v16qi)(__m128i)(Y), (int)(M)))
     764             : #define _mm_cmpistrs(X, Y, M)                       \
     765             :   ((int) __builtin_ia32_pcmpistris128 ((__v16qi)(__m128i)(X),       \
     766             :                        (__v16qi)(__m128i)(Y), (int)(M)))
     767             : #define _mm_cmpistrz(X, Y, M)                       \
     768             :   ((int) __builtin_ia32_pcmpistriz128 ((__v16qi)(__m128i)(X),       \
     769             :                        (__v16qi)(__m128i)(Y), (int)(M)))
     770             : 
     771             : #define _mm_cmpestra(X, LX, Y, LY, M)                   \
     772             :   ((int) __builtin_ia32_pcmpestria128 ((__v16qi)(__m128i)(X), (int)(LX), \
     773             :                        (__v16qi)(__m128i)(Y), (int)(LY), \
     774             :                        (int)(M)))
     775             : #define _mm_cmpestrc(X, LX, Y, LY, M)                   \
     776             :   ((int) __builtin_ia32_pcmpestric128 ((__v16qi)(__m128i)(X), (int)(LX), \
     777             :                        (__v16qi)(__m128i)(Y), (int)(LY), \
     778             :                        (int)(M)))
     779             : #define _mm_cmpestro(X, LX, Y, LY, M)                   \
     780             :   ((int) __builtin_ia32_pcmpestrio128 ((__v16qi)(__m128i)(X), (int)(LX), \
     781             :                        (__v16qi)(__m128i)(Y), (int)(LY), \
     782             :                        (int)(M)))
     783             : #define _mm_cmpestrs(X, LX, Y, LY, M)                   \
     784             :   ((int) __builtin_ia32_pcmpestris128 ((__v16qi)(__m128i)(X), (int)(LX), \
     785             :                        (__v16qi)(__m128i)(Y), (int)(LY), \
     786             :                        (int)(M)))
     787             : #define _mm_cmpestrz(X, LX, Y, LY, M)                   \
     788             :   ((int) __builtin_ia32_pcmpestriz128 ((__v16qi)(__m128i)(X), (int)(LX), \
     789             :                        (__v16qi)(__m128i)(Y), (int)(LY), \
     790             :                        (int)(M)))
     791             : #endif
     792             : 
     793             : /* Packed integer 64-bit comparison, zeroing or filling with ones
     794             :    corresponding parts of result.  */
     795             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     796             : _mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
     797             : {
     798             :   return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y);
     799             : }
     800             : 
     801             : #ifdef __DISABLE_SSE4_2__
     802             : #undef __DISABLE_SSE4_2__
     803             : #pragma GCC pop_options
     804             : #endif /* __DISABLE_SSE4_2__ */
     805             : 
     806             : #ifdef __DISABLE_SSE4_1__
     807             : #undef __DISABLE_SSE4_1__
     808             : #pragma GCC pop_options
     809             : #endif /* __DISABLE_SSE4_1__ */
     810             : 
     811             : #include <popcntintrin.h>
     812             : 
     813             : #ifndef __SSE4_1__
     814             : #pragma GCC push_options
     815             : #pragma GCC target("sse4.1")
     816             : #define __DISABLE_SSE4_1__
     817             : #endif /* __SSE4_1__ */
     818             : 
     819             : #ifndef __SSE4_2__
     820             : #pragma GCC push_options
     821             : #pragma GCC target("sse4.2")
     822             : #define __DISABLE_SSE4_2__
     823             : #endif /* __SSE4_1__ */
     824             : 
     825             : /* Accumulate CRC32 (polynomial 0x11EDC6F41) value.  */
     826             : extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     827             : _mm_crc32_u8 (unsigned int __C, unsigned char __V)
     828             : {
     829     6965392 :   return __builtin_ia32_crc32qi (__C, __V);
     830             : }
     831             : 
     832             : extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     833             : _mm_crc32_u16 (unsigned int __C, unsigned short __V)
     834             : {
     835             :   return __builtin_ia32_crc32hi (__C, __V);
     836             : }
     837             : 
     838             : extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     839             : _mm_crc32_u32 (unsigned int __C, unsigned int __V)
     840             : {
     841    42111892 :   return __builtin_ia32_crc32si (__C, __V);
     842             : }
     843             : 
     844             : #ifdef __x86_64__
     845             : extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     846             : _mm_crc32_u64 (unsigned long long __C, unsigned long long __V)
     847             : {
     848             :   return __builtin_ia32_crc32di (__C, __V);
     849             : }
     850             : #endif
     851             : 
     852             : #ifdef __DISABLE_SSE4_2__
     853             : #undef __DISABLE_SSE4_2__
     854             : #pragma GCC pop_options
     855             : #endif /* __DISABLE_SSE4_2__ */
     856             : 
     857             : #ifdef __DISABLE_SSE4_1__
     858             : #undef __DISABLE_SSE4_1__
     859             : #pragma GCC pop_options
     860             : #endif /* __DISABLE_SSE4_1__ */
     861             : 
     862             : #endif /* _SMMINTRIN_H_INCLUDED */

Generated by: LCOV version 1.11