1    	/* Copyright (C) 2014-2019 Free Software Foundation, Inc.
2    	
3    	   This file is part of GCC.
4    	
5    	   GCC is free software; you can redistribute it and/or modify
6    	   it under the terms of the GNU General Public License as published by
7    	   the Free Software Foundation; either version 3, or (at your option)
8    	   any later version.
9    	
10   	   GCC is distributed in the hope that it will be useful,
11   	   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   	   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   	   GNU General Public License for more details.
14   	
15   	   Under Section 7 of GPL version 3, you are granted additional
16   	   permissions described in the GCC Runtime Library Exception, version
17   	   3.1, as published by the Free Software Foundation.
18   	
19   	   You should have received a copy of the GNU General Public License and
20   	   a copy of the GCC Runtime Library Exception along with this program;
21   	   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22   	   <http://www.gnu.org/licenses/>.  */
23   	
24   	#ifndef _IMMINTRIN_H_INCLUDED
25   	#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
26   	#endif
27   	
28   	#ifndef _AVX512VLINTRIN_H_INCLUDED
29   	#define _AVX512VLINTRIN_H_INCLUDED
30   	
31   	#ifndef __AVX512VL__
32   	#pragma GCC push_options
33   	#pragma GCC target("avx512vl")
34   	#define __DISABLE_AVX512VL__
35   	#endif /* __AVX512VL__ */
36   	
37   	/* Internal data types for implementing the intrinsics.  */
38   	typedef unsigned int __mmask32;
39   	
40   	extern __inline __m256d
41   	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
42   	_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
43   	{
44   	  return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
45   							  (__v4df) __W,
46   							  (__mmask8) __U);
47   	}
48   	
49   	extern __inline __m256d
50   	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
51   	_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
52   	{
53   	  return (__m256d) __builtin_ia32_movapd256_mask ((__v4df) __A,
54   							  (__v4df)
55   							  _mm256_setzero_pd (),
56   							  (__mmask8) __U);
57   	}
58   	
59   	extern __inline __m128d
60   	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
61   	_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
62   	{
63   	  return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
64   							  (__v2df) __W,
65   							  (__mmask8) __U);
66   	}
67   	
68   	extern __inline __m128d
69   	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
70   	_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
71   	{
72   	  return (__m128d) __builtin_ia32_movapd128_mask ((__v2df) __A,
73   							  (__v2df)
74   							  _mm_setzero_pd (),
75   							  (__mmask8) __U);
76   	}
77   	
78   	extern __inline __m256d
79   	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
80   	_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
81   	{
82   	  return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
83   							   (__v4df) __W,
84   							   (__mmask8) __U);
85   	}
86   	
87   	extern __inline __m256d
88   	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
89   	_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
90   	{
91   	  return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
92   							   (__v4df)
93   							   _mm256_setzero_pd (),
94   							   (__mmask8) __U);
95   	}
96   	
97   	extern __inline __m128d
98   	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
99   	_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
100  	{
101  	  return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
102  							   (__v2df) __W,
103  							   (__mmask8) __U);
104  	}
105  	
106  	extern __inline __m128d
107  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
108  	_mm_maskz_load_pd (__mmask8 __U, void const *__P)
109  	{
110  	  return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
111  							   (__v2df)
112  							   _mm_setzero_pd (),
113  							   (__mmask8) __U);
114  	}
115  	
116  	extern __inline void
117  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
118  	_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
119  	{
120  	  __builtin_ia32_storeapd256_mask ((__v4df *) __P,
121  					   (__v4df) __A,
122  					   (__mmask8) __U);
123  	}
124  	
125  	extern __inline void
126  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
127  	_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
128  	{
129  	  __builtin_ia32_storeapd128_mask ((__v2df *) __P,
130  					   (__v2df) __A,
131  					   (__mmask8) __U);
132  	}
133  	
134  	extern __inline __m256
135  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136  	_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
137  	{
138  	  return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
139  							 (__v8sf) __W,
140  							 (__mmask8) __U);
141  	}
142  	
143  	extern __inline __m256
144  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
145  	_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
146  	{
147  	  return (__m256) __builtin_ia32_movaps256_mask ((__v8sf) __A,
148  							 (__v8sf)
149  							 _mm256_setzero_ps (),
150  							 (__mmask8) __U);
151  	}
152  	
153  	extern __inline __m128
154  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
155  	_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
156  	{
157  	  return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
158  							 (__v4sf) __W,
159  							 (__mmask8) __U);
160  	}
161  	
162  	extern __inline __m128
163  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164  	_mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
165  	{
166  	  return (__m128) __builtin_ia32_movaps128_mask ((__v4sf) __A,
167  							 (__v4sf)
168  							 _mm_setzero_ps (),
169  							 (__mmask8) __U);
170  	}
171  	
172  	extern __inline __m256
173  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174  	_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
175  	{
176  	  return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
177  							  (__v8sf) __W,
178  							  (__mmask8) __U);
179  	}
180  	
181  	extern __inline __m256
182  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
183  	_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
184  	{
185  	  return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
186  							  (__v8sf)
187  							  _mm256_setzero_ps (),
188  							  (__mmask8) __U);
189  	}
190  	
191  	extern __inline __m128
192  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
193  	_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
194  	{
195  	  return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
196  							  (__v4sf) __W,
197  							  (__mmask8) __U);
198  	}
199  	
200  	extern __inline __m128
201  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
202  	_mm_maskz_load_ps (__mmask8 __U, void const *__P)
203  	{
204  	  return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
205  							  (__v4sf)
206  							  _mm_setzero_ps (),
207  							  (__mmask8) __U);
208  	}
209  	
210  	extern __inline void
211  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
212  	_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
213  	{
214  	  __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
215  					   (__v8sf) __A,
216  					   (__mmask8) __U);
217  	}
218  	
219  	extern __inline void
220  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
221  	_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
222  	{
223  	  __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
224  					   (__v4sf) __A,
225  					   (__mmask8) __U);
226  	}
227  	
228  	extern __inline __m256i
229  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
230  	_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
231  	{
232  	  return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
233  							     (__v4di) __W,
234  							     (__mmask8) __U);
235  	}
236  	
237  	extern __inline __m256i
238  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
239  	_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
240  	{
241  	  return (__m256i) __builtin_ia32_movdqa64_256_mask ((__v4di) __A,
242  							     (__v4di)
243  							     _mm256_setzero_si256 (),
244  							     (__mmask8) __U);
245  	}
246  	
247  	extern __inline __m128i
248  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
249  	_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
250  	{
251  	  return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
252  							     (__v2di) __W,
253  							     (__mmask8) __U);
254  	}
255  	
256  	extern __inline __m128i
257  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
258  	_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
259  	{
260  	  return (__m128i) __builtin_ia32_movdqa64_128_mask ((__v2di) __A,
261  							     (__v2di)
262  							     _mm_setzero_si128 (),
263  							     (__mmask8) __U);
264  	}
265  	
266  	extern __inline __m256i
267  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
268  	_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
269  	{
270  	  return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
271  								(__v4di) __W,
272  								(__mmask8)
273  								__U);
274  	}
275  	
276  	extern __inline __m256i
277  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
278  	_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
279  	{
280  	  return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
281  								(__v4di)
282  								_mm256_setzero_si256 (),
283  								(__mmask8)
284  								__U);
285  	}
286  	
287  	extern __inline __m128i
288  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
289  	_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
290  	{
291  	  return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
292  								(__v2di) __W,
293  								(__mmask8)
294  								__U);
295  	}
296  	
297  	extern __inline __m128i
298  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
299  	_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
300  	{
301  	  return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
302  								(__v2di)
303  								_mm_setzero_si128 (),
304  								(__mmask8)
305  								__U);
306  	}
307  	
308  	extern __inline void
309  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310  	_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
311  	{
312  	  __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
313  						(__v4di) __A,
314  						(__mmask8) __U);
315  	}
316  	
317  	extern __inline void
318  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
319  	_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
320  	{
321  	  __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
322  						(__v2di) __A,
323  						(__mmask8) __U);
324  	}
325  	
326  	extern __inline __m256i
327  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
328  	_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
329  	{
330  	  return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
331  							     (__v8si) __W,
332  							     (__mmask8) __U);
333  	}
334  	
335  	extern __inline __m256i
336  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
337  	_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
338  	{
339  	  return (__m256i) __builtin_ia32_movdqa32_256_mask ((__v8si) __A,
340  							     (__v8si)
341  							     _mm256_setzero_si256 (),
342  							     (__mmask8) __U);
343  	}
344  	
345  	extern __inline __m128i
346  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
347  	_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
348  	{
349  	  return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
350  							     (__v4si) __W,
351  							     (__mmask8) __U);
352  	}
353  	
354  	extern __inline __m128i
355  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
356  	_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
357  	{
358  	  return (__m128i) __builtin_ia32_movdqa32_128_mask ((__v4si) __A,
359  							     (__v4si)
360  							     _mm_setzero_si128 (),
361  							     (__mmask8) __U);
362  	}
363  	
364  	extern __inline __m256i
365  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
366  	_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
367  	{
368  	  return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
369  								(__v8si) __W,
370  								(__mmask8)
371  								__U);
372  	}
373  	
374  	extern __inline __m256i
375  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
376  	_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
377  	{
378  	  return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
379  								(__v8si)
380  								_mm256_setzero_si256 (),
381  								(__mmask8)
382  								__U);
383  	}
384  	
385  	extern __inline __m128i
386  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
387  	_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
388  	{
389  	  return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
390  								(__v4si) __W,
391  								(__mmask8)
392  								__U);
393  	}
394  	
395  	extern __inline __m128i
396  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
397  	_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
398  	{
399  	  return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
400  								(__v4si)
401  								_mm_setzero_si128 (),
402  								(__mmask8)
403  								__U);
404  	}
405  	
406  	extern __inline void
407  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
408  	_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
409  	{
410  	  __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
411  						(__v8si) __A,
412  						(__mmask8) __U);
413  	}
414  	
415  	extern __inline void
416  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
417  	_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
418  	{
419  	  __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
420  						(__v4si) __A,
421  						(__mmask8) __U);
422  	}
423  	
424  	extern __inline __m128d
425  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
426  	_mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
427  	{
428  	  return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
429  							 (__v2df) __B,
430  							 (__v2df) __W,
431  							 (__mmask8) __U);
432  	}
433  	
434  	extern __inline __m128d
435  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
436  	_mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B)
437  	{
438  	  return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
439  							 (__v2df) __B,
440  							 (__v2df)
441  							 _mm_setzero_pd (),
442  							 (__mmask8) __U);
443  	}
444  	
445  	extern __inline __m256d
446  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
447  	_mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A,
448  			    __m256d __B)
449  	{
450  	  return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
451  							 (__v4df) __B,
452  							 (__v4df) __W,
453  							 (__mmask8) __U);
454  	}
455  	
456  	extern __inline __m256d
457  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
458  	_mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
459  	{
460  	  return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
461  							 (__v4df) __B,
462  							 (__v4df)
463  							 _mm256_setzero_pd (),
464  							 (__mmask8) __U);
465  	}
466  	
467  	extern __inline __m128
468  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
469  	_mm_mask_add_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
470  	{
471  	  return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
472  							(__v4sf) __B,
473  							(__v4sf) __W,
474  							(__mmask8) __U);
475  	}
476  	
477  	extern __inline __m128
478  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
479  	_mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B)
480  	{
481  	  return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
482  							(__v4sf) __B,
483  							(__v4sf)
484  							_mm_setzero_ps (),
485  							(__mmask8) __U);
486  	}
487  	
488  	extern __inline __m256
489  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
490  	_mm256_mask_add_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
491  	{
492  	  return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
493  							(__v8sf) __B,
494  							(__v8sf) __W,
495  							(__mmask8) __U);
496  	}
497  	
498  	extern __inline __m256
499  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
500  	_mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B)
501  	{
502  	  return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
503  							(__v8sf) __B,
504  							(__v8sf)
505  							_mm256_setzero_ps (),
506  							(__mmask8) __U);
507  	}
508  	
509  	extern __inline __m128d
510  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
511  	_mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
512  	{
513  	  return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
514  							 (__v2df) __B,
515  							 (__v2df) __W,
516  							 (__mmask8) __U);
517  	}
518  	
519  	extern __inline __m128d
520  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
521  	_mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B)
522  	{
523  	  return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
524  							 (__v2df) __B,
525  							 (__v2df)
526  							 _mm_setzero_pd (),
527  							 (__mmask8) __U);
528  	}
529  	
530  	extern __inline __m256d
531  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
532  	_mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
533  			    __m256d __B)
534  	{
535  	  return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
536  							 (__v4df) __B,
537  							 (__v4df) __W,
538  							 (__mmask8) __U);
539  	}
540  	
541  	extern __inline __m256d
542  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543  	_mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
544  	{
545  	  return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
546  							 (__v4df) __B,
547  							 (__v4df)
548  							 _mm256_setzero_pd (),
549  							 (__mmask8) __U);
550  	}
551  	
552  	extern __inline __m128
553  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
554  	_mm_mask_sub_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
555  	{
556  	  return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
557  							(__v4sf) __B,
558  							(__v4sf) __W,
559  							(__mmask8) __U);
560  	}
561  	
562  	extern __inline __m128
563  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
564  	_mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B)
565  	{
566  	  return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
567  							(__v4sf) __B,
568  							(__v4sf)
569  							_mm_setzero_ps (),
570  							(__mmask8) __U);
571  	}
572  	
573  	extern __inline __m256
574  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
575  	_mm256_mask_sub_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
576  	{
577  	  return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
578  							(__v8sf) __B,
579  							(__v8sf) __W,
580  							(__mmask8) __U);
581  	}
582  	
583  	extern __inline __m256
584  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
585  	_mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B)
586  	{
587  	  return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
588  							(__v8sf) __B,
589  							(__v8sf)
590  							_mm256_setzero_ps (),
591  							(__mmask8) __U);
592  	}
593  	
594  	extern __inline void
595  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
596  	_mm256_store_epi64 (void *__P, __m256i __A)
597  	{
598  	  *(__m256i *) __P = __A;
599  	}
600  	
601  	extern __inline void
602  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
603  	_mm_store_epi64 (void *__P, __m128i __A)
604  	{
605  	  *(__m128i *) __P = __A;
606  	}
607  	
608  	extern __inline __m256d
609  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
610  	_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
611  	{
612  	  return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
613  							   (__v4df) __W,
614  							   (__mmask8) __U);
615  	}
616  	
617  	extern __inline __m256d
618  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
619  	_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
620  	{
621  	  return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
622  							   (__v4df)
623  							   _mm256_setzero_pd (),
624  							   (__mmask8) __U);
625  	}
626  	
627  	extern __inline __m128d
628  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
629  	_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
630  	{
631  	  return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
632  							   (__v2df) __W,
633  							   (__mmask8) __U);
634  	}
635  	
636  	extern __inline __m128d
637  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
638  	_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
639  	{
640  	  return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
641  							   (__v2df)
642  							   _mm_setzero_pd (),
643  							   (__mmask8) __U);
644  	}
645  	
646  	extern __inline void
647  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
648  	_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
649  	{
650  	  __builtin_ia32_storeupd256_mask ((double *) __P,
651  					   (__v4df) __A,
652  					   (__mmask8) __U);
653  	}
654  	
655  	extern __inline void
656  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
657  	_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
658  	{
659  	  __builtin_ia32_storeupd128_mask ((double *) __P,
660  					   (__v2df) __A,
661  					   (__mmask8) __U);
662  	}
663  	
664  	extern __inline __m256
665  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
666  	_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
667  	{
668  	  return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
669  							  (__v8sf) __W,
670  							  (__mmask8) __U);
671  	}
672  	
673  	extern __inline __m256
674  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
675  	_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
676  	{
677  	  return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
678  							  (__v8sf)
679  							  _mm256_setzero_ps (),
680  							  (__mmask8) __U);
681  	}
682  	
683  	extern __inline __m128
684  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
685  	_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
686  	{
687  	  return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
688  							  (__v4sf) __W,
689  							  (__mmask8) __U);
690  	}
691  	
692  	extern __inline __m128
693  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
694  	_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
695  	{
696  	  return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
697  							  (__v4sf)
698  							  _mm_setzero_ps (),
699  							  (__mmask8) __U);
700  	}
701  	
702  	extern __inline void
703  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
704  	_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
705  	{
706  	  __builtin_ia32_storeups256_mask ((float *) __P,
707  					   (__v8sf) __A,
708  					   (__mmask8) __U);
709  	}
710  	
711  	extern __inline void
712  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
713  	_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
714  	{
715  	  __builtin_ia32_storeups128_mask ((float *) __P,
716  					   (__v4sf) __A,
717  					   (__mmask8) __U);
718  	}
719  	
720  	extern __inline __m256i
721  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
722  	_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
723  	{
724  	  return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
725  							     (__v4di) __W,
726  							     (__mmask8) __U);
727  	}
728  	
729  	extern __inline __m256i
730  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
731  	_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
732  	{
733  	  return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
734  							     (__v4di)
735  							     _mm256_setzero_si256 (),
736  							     (__mmask8) __U);
737  	}
738  	
739  	extern __inline __m128i
740  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
741  	_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
742  	{
743  	  return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
744  							     (__v2di) __W,
745  							     (__mmask8) __U);
746  	}
747  	
748  	extern __inline __m128i
749  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
750  	_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
751  	{
752  	  return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
753  							     (__v2di)
754  							     _mm_setzero_si128 (),
755  							     (__mmask8) __U);
756  	}
757  	
758  	extern __inline void
759  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
760  	_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
761  	{
762  	  __builtin_ia32_storedqudi256_mask ((long long *) __P,
763  					     (__v4di) __A,
764  					     (__mmask8) __U);
765  	}
766  	
767  	extern __inline void
768  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
769  	_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
770  	{
771  	  __builtin_ia32_storedqudi128_mask ((long long *) __P,
772  					     (__v2di) __A,
773  					     (__mmask8) __U);
774  	}
775  	
776  	extern __inline __m256i
777  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
778  	_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
779  	{
780  	  return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
781  							     (__v8si) __W,
782  							     (__mmask8) __U);
783  	}
784  	
785  	extern __inline __m256i
786  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
787  	_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
788  	{
789  	  return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
790  							     (__v8si)
791  							     _mm256_setzero_si256 (),
792  							     (__mmask8) __U);
793  	}
794  	
795  	extern __inline __m128i
796  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
797  	_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
798  	{
799  	  return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
800  							     (__v4si) __W,
801  							     (__mmask8) __U);
802  	}
803  	
804  	extern __inline __m128i
805  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
806  	_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
807  	{
808  	  return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
809  							     (__v4si)
810  							     _mm_setzero_si128 (),
811  							     (__mmask8) __U);
812  	}
813  	
814  	extern __inline void
815  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
816  	_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
817  	{
818  	  __builtin_ia32_storedqusi256_mask ((int *) __P,
819  					     (__v8si) __A,
820  					     (__mmask8) __U);
821  	}
822  	
823  	extern __inline void
824  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825  	_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
826  	{
827  	  __builtin_ia32_storedqusi128_mask ((int *) __P,
828  					     (__v4si) __A,
829  					     (__mmask8) __U);
830  	}
831  	
832  	extern __inline __m256i
833  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
834  	_mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
835  	{
836  	  return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
837  							 (__v8si) __W,
838  							 (__mmask8) __U);
839  	}
840  	
841  	extern __inline __m256i
842  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
843  	_mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A)
844  	{
845  	  return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
846  							 (__v8si)
847  							 _mm256_setzero_si256 (),
848  							 (__mmask8) __U);
849  	}
850  	
851  	extern __inline __m128i
852  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
853  	_mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
854  	{
855  	  return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
856  							 (__v4si) __W,
857  							 (__mmask8) __U);
858  	}
859  	
860  	extern __inline __m128i
861  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
862  	_mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A)
863  	{
864  	  return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
865  							 (__v4si)
866  							 _mm_setzero_si128 (),
867  							 (__mmask8) __U);
868  	}
869  	
870  	extern __inline __m256i
871  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
872  	_mm256_abs_epi64 (__m256i __A)
873  	{
874  	  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
875  							 (__v4di)
876  							 _mm256_setzero_si256 (),
877  							 (__mmask8) -1);
878  	}
879  	
880  	extern __inline __m256i
881  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
882  	_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
883  	{
884  	  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
885  							 (__v4di) __W,
886  							 (__mmask8) __U);
887  	}
888  	
889  	extern __inline __m256i
890  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
891  	_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A)
892  	{
893  	  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
894  							 (__v4di)
895  							 _mm256_setzero_si256 (),
896  							 (__mmask8) __U);
897  	}
898  	
899  	extern __inline __m128i
900  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
901  	_mm_abs_epi64 (__m128i __A)
902  	{
903  	  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
904  							 (__v2di)
905  							 _mm_setzero_si128 (),
906  							 (__mmask8) -1);
907  	}
908  	
909  	extern __inline __m128i
910  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
911  	_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
912  	{
913  	  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
914  							 (__v2di) __W,
915  							 (__mmask8) __U);
916  	}
917  	
918  	extern __inline __m128i
919  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
920  	_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A)
921  	{
922  	  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
923  							 (__v2di)
924  							 _mm_setzero_si128 (),
925  							 (__mmask8) __U);
926  	}
927  	
928  	extern __inline __m128i
929  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
930  	_mm256_cvtpd_epu32 (__m256d __A)
931  	{
932  	  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
933  							     (__v4si)
934  							     _mm_setzero_si128 (),
935  							     (__mmask8) -1);
936  	}
937  	
938  	extern __inline __m128i
939  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
940  	_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
941  	{
942  	  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
943  							     (__v4si) __W,
944  							     (__mmask8) __U);
945  	}
946  	
947  	extern __inline __m128i
948  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
949  	_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A)
950  	{
951  	  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
952  							     (__v4si)
953  							     _mm_setzero_si128 (),
954  							     (__mmask8) __U);
955  	}
956  	
957  	extern __inline __m128i
958  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
959  	_mm_cvtpd_epu32 (__m128d __A)
960  	{
961  	  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
962  							     (__v4si)
963  							     _mm_setzero_si128 (),
964  							     (__mmask8) -1);
965  	}
966  	
967  	extern __inline __m128i
968  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
969  	_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
970  	{
971  	  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
972  							     (__v4si) __W,
973  							     (__mmask8) __U);
974  	}
975  	
976  	extern __inline __m128i
977  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
978  	_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A)
979  	{
980  	  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
981  							     (__v4si)
982  							     _mm_setzero_si128 (),
983  							     (__mmask8) __U);
984  	}
985  	
986  	extern __inline __m256i
987  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
988  	_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
989  	{
990  	  return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
991  							     (__v8si) __W,
992  							     (__mmask8) __U);
993  	}
994  	
995  	extern __inline __m256i
996  	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
997  	_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A)
998  	{
999  	  return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
1000 							     (__v8si)
1001 							     _mm256_setzero_si256 (),
1002 							     (__mmask8) __U);
1003 	}
1004 	
1005 	extern __inline __m128i
1006 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1007 	_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
1008 	{
1009 	  return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1010 							     (__v4si) __W,
1011 							     (__mmask8) __U);
1012 	}
1013 	
1014 	extern __inline __m128i
1015 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1016 	_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A)
1017 	{
1018 	  return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
1019 							     (__v4si)
1020 							     _mm_setzero_si128 (),
1021 							     (__mmask8) __U);
1022 	}
1023 	
1024 	extern __inline __m256i
1025 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1026 	_mm256_cvttps_epu32 (__m256 __A)
1027 	{
1028 	  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1029 							      (__v8si)
1030 							      _mm256_setzero_si256 (),
1031 							      (__mmask8) -1);
1032 	}
1033 	
1034 	extern __inline __m256i
1035 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1036 	_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
1037 	{
1038 	  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1039 							      (__v8si) __W,
1040 							      (__mmask8) __U);
1041 	}
1042 	
1043 	extern __inline __m256i
1044 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1045 	_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A)
1046 	{
1047 	  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
1048 							      (__v8si)
1049 							      _mm256_setzero_si256 (),
1050 							      (__mmask8) __U);
1051 	}
1052 	
1053 	extern __inline __m128i
1054 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1055 	_mm_cvttps_epu32 (__m128 __A)
1056 	{
1057 	  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1058 							      (__v4si)
1059 							      _mm_setzero_si128 (),
1060 							      (__mmask8) -1);
1061 	}
1062 	
1063 	extern __inline __m128i
1064 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1065 	_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
1066 	{
1067 	  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1068 							      (__v4si) __W,
1069 							      (__mmask8) __U);
1070 	}
1071 	
1072 	extern __inline __m128i
1073 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1074 	_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A)
1075 	{
1076 	  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
1077 							      (__v4si)
1078 							      _mm_setzero_si128 (),
1079 							      (__mmask8) __U);
1080 	}
1081 	
1082 	extern __inline __m128i
1083 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1084 	_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1085 	{
1086 	  return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1087 							     (__v4si) __W,
1088 							     (__mmask8) __U);
1089 	}
1090 	
1091 	extern __inline __m128i
1092 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1093 	_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A)
1094 	{
1095 	  return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
1096 							     (__v4si)
1097 							     _mm_setzero_si128 (),
1098 							     (__mmask8) __U);
1099 	}
1100 	
1101 	extern __inline __m128i
1102 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1103 	_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1104 	{
1105 	  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1106 							     (__v4si) __W,
1107 							     (__mmask8) __U);
1108 	}
1109 	
1110 	extern __inline __m128i
1111 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1112 	_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A)
1113 	{
1114 	  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
1115 							     (__v4si)
1116 							     _mm_setzero_si128 (),
1117 							     (__mmask8) __U);
1118 	}
1119 	
1120 	extern __inline __m128i
1121 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1122 	_mm256_cvttpd_epu32 (__m256d __A)
1123 	{
1124 	  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1125 							      (__v4si)
1126 							      _mm_setzero_si128 (),
1127 							      (__mmask8) -1);
1128 	}
1129 	
1130 	extern __inline __m128i
1131 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1132 	_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
1133 	{
1134 	  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1135 							      (__v4si) __W,
1136 							      (__mmask8) __U);
1137 	}
1138 	
1139 	extern __inline __m128i
1140 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1141 	_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A)
1142 	{
1143 	  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
1144 							      (__v4si)
1145 							      _mm_setzero_si128 (),
1146 							      (__mmask8) __U);
1147 	}
1148 	
1149 	extern __inline __m128i
1150 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1151 	_mm_cvttpd_epu32 (__m128d __A)
1152 	{
1153 	  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1154 							      (__v4si)
1155 							      _mm_setzero_si128 (),
1156 							      (__mmask8) -1);
1157 	}
1158 	
1159 	extern __inline __m128i
1160 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1161 	_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
1162 	{
1163 	  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1164 							      (__v4si) __W,
1165 							      (__mmask8) __U);
1166 	}
1167 	
1168 	extern __inline __m128i
1169 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1170 	_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A)
1171 	{
1172 	  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
1173 							      (__v4si)
1174 							      _mm_setzero_si128 (),
1175 							      (__mmask8) __U);
1176 	}
1177 	
1178 	extern __inline __m128i
1179 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1180 	_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
1181 	{
1182 	  return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1183 							    (__v4si) __W,
1184 							    (__mmask8) __U);
1185 	}
1186 	
1187 	extern __inline __m128i
1188 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1189 	_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A)
1190 	{
1191 	  return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
1192 							    (__v4si)
1193 							    _mm_setzero_si128 (),
1194 							    (__mmask8) __U);
1195 	}
1196 	
1197 	extern __inline __m128i
1198 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1199 	_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
1200 	{
1201 	  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1202 							    (__v4si) __W,
1203 							    (__mmask8) __U);
1204 	}
1205 	
1206 	extern __inline __m128i
1207 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1208 	_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A)
1209 	{
1210 	  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1211 							    (__v4si)
1212 							    _mm_setzero_si128 (),
1213 							    (__mmask8) __U);
1214 	}
1215 	
1216 	extern __inline __m256d
1217 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1218 	_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1219 	{
1220 	  return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1221 							    (__v4df) __W,
1222 							    (__mmask8) __U);
1223 	}
1224 	
1225 	extern __inline __m256d
1226 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1227 	_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1228 	{
1229 	  return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
1230 							    (__v4df)
1231 							    _mm256_setzero_pd (),
1232 							    (__mmask8) __U);
1233 	}
1234 	
1235 	extern __inline __m128d
1236 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1237 	_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1238 	{
1239 	  return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1240 							    (__v2df) __W,
1241 							    (__mmask8) __U);
1242 	}
1243 	
1244 	extern __inline __m128d
1245 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1246 	_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A)
1247 	{
1248 	  return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
1249 							    (__v2df)
1250 							    _mm_setzero_pd (),
1251 							    (__mmask8) __U);
1252 	}
1253 	
1254 	extern __inline __m256d
1255 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1256 	_mm256_cvtepu32_pd (__m128i __A)
1257 	{
1258 	  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1259 							     (__v4df)
1260 							     _mm256_setzero_pd (),
1261 							     (__mmask8) -1);
1262 	}
1263 	
1264 	extern __inline __m256d
1265 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1266 	_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A)
1267 	{
1268 	  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1269 							     (__v4df) __W,
1270 							     (__mmask8) __U);
1271 	}
1272 	
1273 	extern __inline __m256d
1274 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1275 	_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1276 	{
1277 	  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
1278 							     (__v4df)
1279 							     _mm256_setzero_pd (),
1280 							     (__mmask8) __U);
1281 	}
1282 	
1283 	extern __inline __m128d
1284 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1285 	_mm_cvtepu32_pd (__m128i __A)
1286 	{
1287 	  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1288 							     (__v2df)
1289 							     _mm_setzero_pd (),
1290 							     (__mmask8) -1);
1291 	}
1292 	
1293 	extern __inline __m128d
1294 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1295 	_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A)
1296 	{
1297 	  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1298 							     (__v2df) __W,
1299 							     (__mmask8) __U);
1300 	}
1301 	
1302 	extern __inline __m128d
1303 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1304 	_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A)
1305 	{
1306 	  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
1307 							     (__v2df)
1308 							     _mm_setzero_pd (),
1309 							     (__mmask8) __U);
1310 	}
1311 	
1312 	extern __inline __m256
1313 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1314 	_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1315 	{
1316 	  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1317 							   (__v8sf) __W,
1318 							   (__mmask8) __U);
1319 	}
1320 	
1321 	extern __inline __m256
1322 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1323 	_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A)
1324 	{
1325 	  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
1326 							   (__v8sf)
1327 							   _mm256_setzero_ps (),
1328 							   (__mmask8) __U);
1329 	}
1330 	
1331 	extern __inline __m128
1332 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1333 	_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1334 	{
1335 	  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1336 							   (__v4sf) __W,
1337 							   (__mmask8) __U);
1338 	}
1339 	
1340 	extern __inline __m128
1341 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1342 	_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A)
1343 	{
1344 	  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
1345 							   (__v4sf)
1346 							   _mm_setzero_ps (),
1347 							   (__mmask8) __U);
1348 	}
1349 	
1350 	extern __inline __m256
1351 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1352 	_mm256_cvtepu32_ps (__m256i __A)
1353 	{
1354 	  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1355 							    (__v8sf)
1356 							    _mm256_setzero_ps (),
1357 							    (__mmask8) -1);
1358 	}
1359 	
1360 	extern __inline __m256
1361 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1362 	_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A)
1363 	{
1364 	  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1365 							    (__v8sf) __W,
1366 							    (__mmask8) __U);
1367 	}
1368 	
1369 	extern __inline __m256
1370 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1371 	_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A)
1372 	{
1373 	  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
1374 							    (__v8sf)
1375 							    _mm256_setzero_ps (),
1376 							    (__mmask8) __U);
1377 	}
1378 	
1379 	extern __inline __m128
1380 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1381 	_mm_cvtepu32_ps (__m128i __A)
1382 	{
1383 	  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1384 							    (__v4sf)
1385 							    _mm_setzero_ps (),
1386 							    (__mmask8) -1);
1387 	}
1388 	
1389 	extern __inline __m128
1390 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1391 	_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A)
1392 	{
1393 	  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1394 							    (__v4sf) __W,
1395 							    (__mmask8) __U);
1396 	}
1397 	
1398 	extern __inline __m128
1399 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1400 	_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A)
1401 	{
1402 	  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
1403 							    (__v4sf)
1404 							    _mm_setzero_ps (),
1405 							    (__mmask8) __U);
1406 	}
1407 	
1408 	extern __inline __m256d
1409 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1410 	_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A)
1411 	{
1412 	  return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1413 							    (__v4df) __W,
1414 							    (__mmask8) __U);
1415 	}
1416 	
1417 	extern __inline __m256d
1418 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1419 	_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1420 	{
1421 	  return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
1422 							    (__v4df)
1423 							    _mm256_setzero_pd (),
1424 							    (__mmask8) __U);
1425 	}
1426 	
1427 	extern __inline __m128d
1428 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1429 	_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A)
1430 	{
1431 	  return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1432 							    (__v2df) __W,
1433 							    (__mmask8) __U);
1434 	}
1435 	
1436 	extern __inline __m128d
1437 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1438 	_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A)
1439 	{
1440 	  return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
1441 							    (__v2df)
1442 							    _mm_setzero_pd (),
1443 							    (__mmask8) __U);
1444 	}
1445 	
1446 	extern __inline __m128i
1447 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1448 	_mm_cvtepi32_epi8 (__m128i __A)
1449 	{
1450 	  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1451 							  (__v16qi)
1452 							  _mm_undefined_si128 (),
1453 							  (__mmask8) -1);
1454 	}
1455 	
1456 	extern __inline void
1457 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1458 	_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1459 	{
1460 	  __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1461 	}
1462 	
1463 	extern __inline __m128i
1464 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1465 	_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1466 	{
1467 	  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1468 							  (__v16qi) __O, __M);
1469 	}
1470 	
1471 	extern __inline __m128i
1472 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1473 	_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
1474 	{
1475 	  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
1476 							  (__v16qi)
1477 							  _mm_setzero_si128 (),
1478 							  __M);
1479 	}
1480 	
1481 	extern __inline __m128i
1482 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1483 	_mm256_cvtepi32_epi8 (__m256i __A)
1484 	{
1485 	  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1486 							  (__v16qi)
1487 							  _mm_undefined_si128 (),
1488 							  (__mmask8) -1);
1489 	}
1490 	
1491 	extern __inline __m128i
1492 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1493 	_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1494 	{
1495 	  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1496 							  (__v16qi) __O, __M);
1497 	}
1498 	
1499 	extern __inline void
1500 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1501 	_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1502 	{
1503 	  __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1504 	}
1505 	
1506 	extern __inline __m128i
1507 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1508 	_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
1509 	{
1510 	  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
1511 							  (__v16qi)
1512 							  _mm_setzero_si128 (),
1513 							  __M);
1514 	}
1515 	
1516 	extern __inline __m128i
1517 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1518 	_mm_cvtsepi32_epi8 (__m128i __A)
1519 	{
1520 	  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1521 							   (__v16qi)
1522 							   _mm_undefined_si128 (),
1523 							   (__mmask8) -1);
1524 	}
1525 	
1526 	extern __inline void
1527 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1528 	_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1529 	{
1530 	  __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1531 	}
1532 	
1533 	extern __inline __m128i
1534 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1535 	_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1536 	{
1537 	  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1538 							   (__v16qi) __O, __M);
1539 	}
1540 	
1541 	extern __inline __m128i
1542 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1543 	_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
1544 	{
1545 	  return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
1546 							   (__v16qi)
1547 							   _mm_setzero_si128 (),
1548 							   __M);
1549 	}
1550 	
1551 	extern __inline __m128i
1552 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1553 	_mm256_cvtsepi32_epi8 (__m256i __A)
1554 	{
1555 	  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1556 							   (__v16qi)
1557 							   _mm_undefined_si128 (),
1558 							   (__mmask8) -1);
1559 	}
1560 	
1561 	extern __inline void
1562 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1563 	_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1564 	{
1565 	  __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
1566 	}
1567 	
1568 	extern __inline __m128i
1569 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1570 	_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1571 	{
1572 	  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1573 							   (__v16qi) __O, __M);
1574 	}
1575 	
1576 	extern __inline __m128i
1577 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1578 	_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
1579 	{
1580 	  return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
1581 							   (__v16qi)
1582 							   _mm_setzero_si128 (),
1583 							   __M);
1584 	}
1585 	
1586 	extern __inline __m128i
1587 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1588 	_mm_cvtusepi32_epi8 (__m128i __A)
1589 	{
1590 	  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1591 							    (__v16qi)
1592 							    _mm_undefined_si128 (),
1593 							    (__mmask8) -1);
1594 	}
1595 	
1596 	extern __inline void
1597 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1598 	_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1599 	{
1600 	  __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
1601 	}
1602 	
1603 	extern __inline __m128i
1604 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1605 	_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1606 	{
1607 	  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1608 							    (__v16qi) __O,
1609 							    __M);
1610 	}
1611 	
1612 	extern __inline __m128i
1613 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1614 	_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
1615 	{
1616 	  return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
1617 							    (__v16qi)
1618 							    _mm_setzero_si128 (),
1619 							    __M);
1620 	}
1621 	
1622 	extern __inline __m128i
1623 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1624 	_mm256_cvtusepi32_epi8 (__m256i __A)
1625 	{
1626 	  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1627 							    (__v16qi)
1628 							    _mm_undefined_si128 (),
1629 							    (__mmask8) -1);
1630 	}
1631 	
1632 	extern __inline void
1633 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1634 	_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1635 	{
1636 	  __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
1637 	}
1638 	
1639 	extern __inline __m128i
1640 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1641 	_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1642 	{
1643 	  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1644 							    (__v16qi) __O,
1645 							    __M);
1646 	}
1647 	
1648 	extern __inline __m128i
1649 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1650 	_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
1651 	{
1652 	  return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
1653 							    (__v16qi)
1654 							    _mm_setzero_si128 (),
1655 							    __M);
1656 	}
1657 	
1658 	extern __inline __m128i
1659 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1660 	_mm_cvtepi32_epi16 (__m128i __A)
1661 	{
1662 	  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1663 							  (__v8hi)
1664 							  _mm_setzero_si128 (),
1665 							  (__mmask8) -1);
1666 	}
1667 	
1668 	extern __inline void
1669 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1670 	_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1671 	{
1672 	  __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1673 	}
1674 	
1675 	extern __inline __m128i
1676 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1677 	_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1678 	{
1679 	  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1680 							  (__v8hi) __O, __M);
1681 	}
1682 	
1683 	extern __inline __m128i
1684 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1685 	_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
1686 	{
1687 	  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
1688 							  (__v8hi)
1689 							  _mm_setzero_si128 (),
1690 							  __M);
1691 	}
1692 	
1693 	extern __inline __m128i
1694 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1695 	_mm256_cvtepi32_epi16 (__m256i __A)
1696 	{
1697 	  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1698 							  (__v8hi)
1699 							  _mm_setzero_si128 (),
1700 							  (__mmask8) -1);
1701 	}
1702 	
1703 	extern __inline void
1704 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1705 	_mm256_mask_cvtepi32_storeu_epi16 (void *  __P, __mmask8 __M, __m256i __A)
1706 	{
1707 	  __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1708 	}
1709 	
1710 	extern __inline __m128i
1711 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1712 	_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1713 	{
1714 	  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1715 							  (__v8hi) __O, __M);
1716 	}
1717 	
1718 	extern __inline __m128i
1719 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1720 	_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
1721 	{
1722 	  return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
1723 							  (__v8hi)
1724 							  _mm_setzero_si128 (),
1725 							  __M);
1726 	}
1727 	
1728 	extern __inline __m128i
1729 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1730 	_mm_cvtsepi32_epi16 (__m128i __A)
1731 	{
1732 	  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1733 							   (__v8hi)
1734 							   _mm_setzero_si128 (),
1735 							   (__mmask8) -1);
1736 	}
1737 	
1738 	extern __inline void
1739 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1740 	_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1741 	{
1742 	  __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1743 	}
1744 	
1745 	extern __inline __m128i
1746 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1747 	_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1748 	{
1749 	  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1750 							   (__v8hi)__O,
1751 							   __M);
1752 	}
1753 	
1754 	extern __inline __m128i
1755 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1756 	_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
1757 	{
1758 	  return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
1759 							   (__v8hi)
1760 							   _mm_setzero_si128 (),
1761 							   __M);
1762 	}
1763 	
1764 	extern __inline __m128i
1765 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1766 	_mm256_cvtsepi32_epi16 (__m256i __A)
1767 	{
1768 	  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1769 							   (__v8hi)
1770 							   _mm_undefined_si128 (),
1771 							   (__mmask8) -1);
1772 	}
1773 	
1774 	extern __inline void
1775 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1776 	_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1777 	{
1778 	  __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1779 	}
1780 	
1781 	extern __inline __m128i
1782 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1783 	_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1784 	{
1785 	  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1786 							   (__v8hi) __O, __M);
1787 	}
1788 	
1789 	extern __inline __m128i
1790 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1791 	_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
1792 	{
1793 	  return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
1794 							   (__v8hi)
1795 							   _mm_setzero_si128 (),
1796 							   __M);
1797 	}
1798 	
1799 	extern __inline __m128i
1800 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1801 	_mm_cvtusepi32_epi16 (__m128i __A)
1802 	{
1803 	  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1804 							    (__v8hi)
1805 							    _mm_undefined_si128 (),
1806 							    (__mmask8) -1);
1807 	}
1808 	
1809 	extern __inline void
1810 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1811 	_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
1812 	{
1813 	  __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
1814 	}
1815 	
1816 	extern __inline __m128i
1817 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1818 	_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
1819 	{
1820 	  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1821 							    (__v8hi) __O, __M);
1822 	}
1823 	
1824 	extern __inline __m128i
1825 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1826 	_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
1827 	{
1828 	  return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
1829 							    (__v8hi)
1830 							    _mm_setzero_si128 (),
1831 							    __M);
1832 	}
1833 	
1834 	extern __inline __m128i
1835 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1836 	_mm256_cvtusepi32_epi16 (__m256i __A)
1837 	{
1838 	  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1839 							    (__v8hi)
1840 							    _mm_undefined_si128 (),
1841 							    (__mmask8) -1);
1842 	}
1843 	
1844 	extern __inline void
1845 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1846 	_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
1847 	{
1848 	  __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
1849 	}
1850 	
1851 	extern __inline __m128i
1852 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1853 	_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
1854 	{
1855 	  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1856 							    (__v8hi) __O, __M);
1857 	}
1858 	
1859 	extern __inline __m128i
1860 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1861 	_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
1862 	{
1863 	  return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
1864 							    (__v8hi)
1865 							    _mm_setzero_si128 (),
1866 							    __M);
1867 	}
1868 	
1869 	extern __inline __m128i
1870 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1871 	_mm_cvtepi64_epi8 (__m128i __A)
1872 	{
1873 	  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1874 							  (__v16qi)
1875 							  _mm_undefined_si128 (),
1876 							  (__mmask8) -1);
1877 	}
1878 	
1879 	extern __inline void
1880 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1881 	_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1882 	{
1883 	  __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1884 	}
1885 	
1886 	extern __inline __m128i
1887 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1888 	_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1889 	{
1890 	  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1891 							  (__v16qi) __O, __M);
1892 	}
1893 	
1894 	extern __inline __m128i
1895 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1896 	_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
1897 	{
1898 	  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
1899 							  (__v16qi)
1900 							  _mm_setzero_si128 (),
1901 							  __M);
1902 	}
1903 	
1904 	extern __inline __m128i
1905 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1906 	_mm256_cvtepi64_epi8 (__m256i __A)
1907 	{
1908 	  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1909 							  (__v16qi)
1910 							  _mm_undefined_si128 (),
1911 							  (__mmask8) -1);
1912 	}
1913 	
1914 	extern __inline void
1915 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1916 	_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1917 	{
1918 	  __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1919 	}
1920 	
1921 	extern __inline __m128i
1922 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1923 	_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1924 	{
1925 	  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1926 							  (__v16qi) __O, __M);
1927 	}
1928 	
1929 	extern __inline __m128i
1930 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1931 	_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
1932 	{
1933 	  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
1934 							  (__v16qi)
1935 							  _mm_setzero_si128 (),
1936 							  __M);
1937 	}
1938 	
1939 	extern __inline __m128i
1940 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1941 	_mm_cvtsepi64_epi8 (__m128i __A)
1942 	{
1943 	  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1944 							   (__v16qi)
1945 							   _mm_undefined_si128 (),
1946 							   (__mmask8) -1);
1947 	}
1948 	
1949 	extern __inline void
1950 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1951 	_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
1952 	{
1953 	  __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
1954 	}
1955 	
1956 	extern __inline __m128i
1957 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1958 	_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
1959 	{
1960 	  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1961 							   (__v16qi) __O, __M);
1962 	}
1963 	
1964 	extern __inline __m128i
1965 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1966 	_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
1967 	{
1968 	  return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
1969 							   (__v16qi)
1970 							   _mm_setzero_si128 (),
1971 							   __M);
1972 	}
1973 	
1974 	extern __inline __m128i
1975 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1976 	_mm256_cvtsepi64_epi8 (__m256i __A)
1977 	{
1978 	  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1979 							   (__v16qi)
1980 							   _mm_undefined_si128 (),
1981 							   (__mmask8) -1);
1982 	}
1983 	
1984 	extern __inline void
1985 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1986 	_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
1987 	{
1988 	  __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
1989 	}
1990 	
1991 	extern __inline __m128i
1992 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1993 	_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
1994 	{
1995 	  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
1996 							   (__v16qi) __O, __M);
1997 	}
1998 	
1999 	extern __inline __m128i
2000 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2001 	_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
2002 	{
2003 	  return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
2004 							   (__v16qi)
2005 							   _mm_setzero_si128 (),
2006 							   __M);
2007 	}
2008 	
2009 	extern __inline __m128i
2010 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2011 	_mm_cvtusepi64_epi8 (__m128i __A)
2012 	{
2013 	  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2014 							    (__v16qi)
2015 							    _mm_undefined_si128 (),
2016 							    (__mmask8) -1);
2017 	}
2018 	
2019 	extern __inline void
2020 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2021 	_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
2022 	{
2023 	  __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
2024 	}
2025 	
2026 	extern __inline __m128i
2027 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2028 	_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
2029 	{
2030 	  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2031 							    (__v16qi) __O,
2032 							    __M);
2033 	}
2034 	
2035 	extern __inline __m128i
2036 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2037 	_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
2038 	{
2039 	  return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
2040 							    (__v16qi)
2041 							    _mm_setzero_si128 (),
2042 							    __M);
2043 	}
2044 	
2045 	extern __inline __m128i
2046 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2047 	_mm256_cvtusepi64_epi8 (__m256i __A)
2048 	{
2049 	  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2050 							    (__v16qi)
2051 							    _mm_undefined_si128 (),
2052 							    (__mmask8) -1);
2053 	}
2054 	
2055 	extern __inline void
2056 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2057 	_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
2058 	{
2059 	  __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
2060 	}
2061 	
2062 	extern __inline __m128i
2063 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2064 	_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
2065 	{
2066 	  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2067 							    (__v16qi) __O,
2068 							    __M);
2069 	}
2070 	
2071 	extern __inline __m128i
2072 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2073 	_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
2074 	{
2075 	  return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
2076 							    (__v16qi)
2077 							    _mm_setzero_si128 (),
2078 							    __M);
2079 	}
2080 	
2081 	extern __inline __m128i
2082 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2083 	_mm_cvtepi64_epi16 (__m128i __A)
2084 	{
2085 	  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2086 							  (__v8hi)
2087 							  _mm_undefined_si128 (),
2088 							  (__mmask8) -1);
2089 	}
2090 	
2091 	extern __inline void
2092 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2093 	_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2094 	{
2095 	  __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2096 	}
2097 	
2098 	extern __inline __m128i
2099 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2100 	_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2101 	{
2102 	  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2103 							  (__v8hi)__O,
2104 							  __M);
2105 	}
2106 	
2107 	extern __inline __m128i
2108 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2109 	_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
2110 	{
2111 	  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
2112 							  (__v8hi)
2113 							  _mm_setzero_si128 (),
2114 							  __M);
2115 	}
2116 	
2117 	extern __inline __m128i
2118 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2119 	_mm256_cvtepi64_epi16 (__m256i __A)
2120 	{
2121 	  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2122 							  (__v8hi)
2123 							  _mm_undefined_si128 (),
2124 							  (__mmask8) -1);
2125 	}
2126 	
2127 	extern __inline void
2128 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2129 	_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2130 	{
2131 	  __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2132 	}
2133 	
2134 	extern __inline __m128i
2135 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2136 	_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2137 	{
2138 	  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2139 							  (__v8hi) __O, __M);
2140 	}
2141 	
2142 	extern __inline __m128i
2143 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2144 	_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
2145 	{
2146 	  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
2147 							  (__v8hi)
2148 							  _mm_setzero_si128 (),
2149 							  __M);
2150 	}
2151 	
2152 	extern __inline __m128i
2153 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2154 	_mm_cvtsepi64_epi16 (__m128i __A)
2155 	{
2156 	  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2157 							   (__v8hi)
2158 							   _mm_undefined_si128 (),
2159 							   (__mmask8) -1);
2160 	}
2161 	
2162 	extern __inline void
2163 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2164 	_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2165 	{
2166 	  __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2167 	}
2168 	
2169 	extern __inline __m128i
2170 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2171 	_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2172 	{
2173 	  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2174 							   (__v8hi) __O, __M);
2175 	}
2176 	
2177 	extern __inline __m128i
2178 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2179 	_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
2180 	{
2181 	  return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
2182 							   (__v8hi)
2183 							   _mm_setzero_si128 (),
2184 							   __M);
2185 	}
2186 	
2187 	extern __inline __m128i
2188 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2189 	_mm256_cvtsepi64_epi16 (__m256i __A)
2190 	{
2191 	  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2192 							   (__v8hi)
2193 							   _mm_undefined_si128 (),
2194 							   (__mmask8) -1);
2195 	}
2196 	
2197 	extern __inline void
2198 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2199 	_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2200 	{
2201 	  __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2202 	}
2203 	
2204 	extern __inline __m128i
2205 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2206 	_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2207 	{
2208 	  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2209 							   (__v8hi) __O, __M);
2210 	}
2211 	
2212 	extern __inline __m128i
2213 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2214 	_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
2215 	{
2216 	  return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
2217 							   (__v8hi)
2218 							   _mm_setzero_si128 (),
2219 							   __M);
2220 	}
2221 	
2222 	extern __inline __m128i
2223 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2224 	_mm_cvtusepi64_epi16 (__m128i __A)
2225 	{
2226 	  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2227 							    (__v8hi)
2228 							    _mm_undefined_si128 (),
2229 							    (__mmask8) -1);
2230 	}
2231 	
2232 	extern __inline void
2233 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2234 	_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
2235 	{
2236 	  __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
2237 	}
2238 	
2239 	extern __inline __m128i
2240 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2241 	_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
2242 	{
2243 	  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2244 							    (__v8hi) __O, __M);
2245 	}
2246 	
2247 	extern __inline __m128i
2248 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2249 	_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
2250 	{
2251 	  return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
2252 							    (__v8hi)
2253 							    _mm_setzero_si128 (),
2254 							    __M);
2255 	}
2256 	
2257 	extern __inline __m128i
2258 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2259 	_mm256_cvtusepi64_epi16 (__m256i __A)
2260 	{
2261 	  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2262 							    (__v8hi)
2263 							    _mm_undefined_si128 (),
2264 							    (__mmask8) -1);
2265 	}
2266 	
2267 	extern __inline void
2268 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2269 	_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
2270 	{
2271 	  __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
2272 	}
2273 	
2274 	extern __inline __m128i
2275 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2276 	_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
2277 	{
2278 	  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2279 							    (__v8hi) __O, __M);
2280 	}
2281 	
2282 	extern __inline __m128i
2283 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2284 	_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
2285 	{
2286 	  return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
2287 							    (__v8hi)
2288 							    _mm_setzero_si128 (),
2289 							    __M);
2290 	}
2291 	
2292 	extern __inline __m128i
2293 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2294 	_mm_cvtepi64_epi32 (__m128i __A)
2295 	{
2296 	  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2297 							  (__v4si)
2298 							  _mm_undefined_si128 (),
2299 							  (__mmask8) -1);
2300 	}
2301 	
2302 	extern __inline void
2303 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2304 	_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2305 	{
2306 	  __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2307 	}
2308 	
2309 	extern __inline __m128i
2310 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2311 	_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2312 	{
2313 	  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2314 							  (__v4si) __O, __M);
2315 	}
2316 	
2317 	extern __inline __m128i
2318 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2319 	_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
2320 	{
2321 	  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
2322 							  (__v4si)
2323 							  _mm_setzero_si128 (),
2324 							  __M);
2325 	}
2326 	
2327 	extern __inline __m128i
2328 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2329 	_mm256_cvtepi64_epi32 (__m256i __A)
2330 	{
2331 	  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2332 							  (__v4si)
2333 							  _mm_undefined_si128 (),
2334 							  (__mmask8) -1);
2335 	}
2336 	
2337 	extern __inline void
2338 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2339 	_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2340 	{
2341 	  __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2342 	}
2343 	
2344 	extern __inline __m128i
2345 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2346 	_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2347 	{
2348 	  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2349 							  (__v4si) __O, __M);
2350 	}
2351 	
2352 	extern __inline __m128i
2353 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2354 	_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
2355 	{
2356 	  return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
2357 							  (__v4si)
2358 							  _mm_setzero_si128 (),
2359 							  __M);
2360 	}
2361 	
2362 	extern __inline __m128i
2363 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2364 	_mm_cvtsepi64_epi32 (__m128i __A)
2365 	{
2366 	  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2367 							   (__v4si)
2368 							   _mm_undefined_si128 (),
2369 							   (__mmask8) -1);
2370 	}
2371 	
2372 	extern __inline void
2373 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2374 	_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2375 	{
2376 	  __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2377 	}
2378 	
2379 	extern __inline __m128i
2380 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2381 	_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2382 	{
2383 	  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2384 							   (__v4si) __O, __M);
2385 	}
2386 	
2387 	extern __inline __m128i
2388 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2389 	_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
2390 	{
2391 	  return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
2392 							   (__v4si)
2393 							   _mm_setzero_si128 (),
2394 							   __M);
2395 	}
2396 	
2397 	extern __inline __m128i
2398 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2399 	_mm256_cvtsepi64_epi32 (__m256i __A)
2400 	{
2401 	  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2402 							   (__v4si)
2403 							   _mm_undefined_si128 (),
2404 							   (__mmask8) -1);
2405 	}
2406 	
2407 	extern __inline void
2408 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2409 	_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2410 	{
2411 	  __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2412 	}
2413 	
2414 	extern __inline __m128i
2415 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2416 	_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2417 	{
2418 	  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2419 							   (__v4si)__O,
2420 							   __M);
2421 	}
2422 	
2423 	extern __inline __m128i
2424 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2425 	_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
2426 	{
2427 	  return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
2428 							   (__v4si)
2429 							   _mm_setzero_si128 (),
2430 							   __M);
2431 	}
2432 	
2433 	extern __inline __m128i
2434 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2435 	_mm_cvtusepi64_epi32 (__m128i __A)
2436 	{
2437 	  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2438 							    (__v4si)
2439 							    _mm_undefined_si128 (),
2440 							    (__mmask8) -1);
2441 	}
2442 	
2443 	extern __inline void
2444 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2445 	_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
2446 	{
2447 	  __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
2448 	}
2449 	
2450 	extern __inline __m128i
2451 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2452 	_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2453 	{
2454 	  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2455 							    (__v4si) __O, __M);
2456 	}
2457 	
2458 	extern __inline __m128i
2459 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2460 	_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
2461 	{
2462 	  return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
2463 							    (__v4si)
2464 							    _mm_setzero_si128 (),
2465 							    __M);
2466 	}
2467 	
2468 	extern __inline __m128i
2469 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2470 	_mm256_cvtusepi64_epi32 (__m256i __A)
2471 	{
2472 	  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2473 							    (__v4si)
2474 							    _mm_undefined_si128 (),
2475 							    (__mmask8) -1);
2476 	}
2477 	
2478 	extern __inline void
2479 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2480 	_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
2481 	{
2482 	  __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
2483 	}
2484 	
2485 	extern __inline __m128i
2486 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2487 	_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
2488 	{
2489 	  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2490 							    (__v4si) __O, __M);
2491 	}
2492 	
2493 	extern __inline __m128i
2494 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2495 	_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
2496 	{
2497 	  return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
2498 							    (__v4si)
2499 							    _mm_setzero_si128 (),
2500 							    __M);
2501 	}
2502 	
2503 	extern __inline __m256
2504 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2505 	_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
2506 	{
2507 	  return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2508 							      (__v8sf) __O,
2509 							      __M);
2510 	}
2511 	
2512 	extern __inline __m256
2513 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2514 	_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2515 	{
2516 	  return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A,
2517 							      (__v8sf)
2518 							      _mm256_setzero_ps (),
2519 							      __M);
2520 	}
2521 	
2522 	extern __inline __m128
2523 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2524 	_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
2525 	{
2526 	  return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2527 							      (__v4sf) __O,
2528 							      __M);
2529 	}
2530 	
2531 	extern __inline __m128
2532 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2533 	_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
2534 	{
2535 	  return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A,
2536 							      (__v4sf)
2537 							      _mm_setzero_ps (),
2538 							      __M);
2539 	}
2540 	
2541 	extern __inline __m256d
2542 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2543 	_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
2544 	{
2545 	  return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2546 							       (__v4df) __O,
2547 							       __M);
2548 	}
2549 	
2550 	extern __inline __m256d
2551 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2552 	_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
2553 	{
2554 	  return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A,
2555 							       (__v4df)
2556 							       _mm256_setzero_pd (),
2557 							       __M);
2558 	}
2559 	
2560 	extern __inline __m256i
2561 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2562 	_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
2563 	{
2564 	  return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2565 							       (__v8si) __O,
2566 							       __M);
2567 	}
2568 	
2569 	extern __inline __m256i
2570 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2571 	_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2572 	{
2573 	  return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A,
2574 							       (__v8si)
2575 							       _mm256_setzero_si256 (),
2576 							       __M);
2577 	}
2578 	
2579 	extern __inline __m256i
2580 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2581 	_mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A)
2582 	{
2583 	  return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O,
2584 								   __M);
2585 	}
2586 	
2587 	extern __inline __m256i
2588 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2589 	_mm256_maskz_set1_epi32 (__mmask8 __M, int __A)
2590 	{
2591 	  return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A,
2592 								   (__v8si)
2593 								   _mm256_setzero_si256 (),
2594 								   __M);
2595 	}
2596 	
2597 	extern __inline __m128i
2598 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2599 	_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
2600 	{
2601 	  return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2602 							       (__v4si) __O,
2603 							       __M);
2604 	}
2605 	
2606 	extern __inline __m128i
2607 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2608 	_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
2609 	{
2610 	  return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A,
2611 							       (__v4si)
2612 							       _mm_setzero_si128 (),
2613 							       __M);
2614 	}
2615 	
2616 	extern __inline __m128i
2617 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2618 	_mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A)
2619 	{
2620 	  return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O,
2621 								   __M);
2622 	}
2623 	
2624 	extern __inline __m128i
2625 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2626 	_mm_maskz_set1_epi32 (__mmask8 __M, int __A)
2627 	{
2628 	  return (__m128i)
2629 		 __builtin_ia32_pbroadcastd128_gpr_mask (__A,
2630 							 (__v4si) _mm_setzero_si128 (),
2631 							 __M);
2632 	}
2633 	
2634 	extern __inline __m256i
2635 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2636 	_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
2637 	{
2638 	  return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2639 							       (__v4di) __O,
2640 							       __M);
2641 	}
2642 	
2643 	extern __inline __m256i
2644 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2645 	_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2646 	{
2647 	  return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A,
2648 							       (__v4di)
2649 							       _mm256_setzero_si256 (),
2650 							       __M);
2651 	}
2652 	
2653 	extern __inline __m256i
2654 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2655 	_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
2656 	{
2657 	  return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
2658 								   __M);
2659 	}
2660 	
2661 	extern __inline __m256i
2662 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2663 	_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
2664 	{
2665 	  return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
2666 								   (__v4di)
2667 								   _mm256_setzero_si256 (),
2668 								   __M);
2669 	}
2670 	
2671 	extern __inline __m128i
2672 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2673 	_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
2674 	{
2675 	  return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2676 							       (__v2di) __O,
2677 							       __M);
2678 	}
2679 	
2680 	extern __inline __m128i
2681 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2682 	_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
2683 	{
2684 	  return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A,
2685 							       (__v2di)
2686 							       _mm_setzero_si128 (),
2687 							       __M);
2688 	}
2689 	
2690 	extern __inline __m128i
2691 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2692 	_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
2693 	{
2694 	  return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
2695 								   __M);
2696 	}
2697 	
2698 	extern __inline __m128i
2699 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2700 	_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
2701 	{
2702 	  return (__m128i)
2703 		 __builtin_ia32_pbroadcastq128_gpr_mask (__A,
2704 							 (__v2di) _mm_setzero_si128 (),
2705 							 __M);
2706 	}
2707 	
2708 	extern __inline __m256
2709 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2710 	_mm256_broadcast_f32x4 (__m128 __A)
2711 	{
2712 	  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2713 							          (__v8sf)_mm256_undefined_pd (),
2714 								  (__mmask8) -1);
2715 	}
2716 	
2717 	extern __inline __m256
2718 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2719 	_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A)
2720 	{
2721 	  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2722 								  (__v8sf) __O,
2723 								  __M);
2724 	}
2725 	
2726 	extern __inline __m256
2727 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2728 	_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
2729 	{
2730 	  return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A,
2731 								  (__v8sf)
2732 								  _mm256_setzero_ps (),
2733 								  __M);
2734 	}
2735 	
2736 	extern __inline __m256i
2737 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2738 	_mm256_broadcast_i32x4 (__m128i __A)
2739 	{
2740 	  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2741 								   __A,
2742 							           (__v8si)_mm256_undefined_si256 (),
2743 								   (__mmask8) -1);
2744 	}
2745 	
2746 	extern __inline __m256i
2747 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2748 	_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A)
2749 	{
2750 	  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2751 								   __A,
2752 								   (__v8si)
2753 								   __O, __M);
2754 	}
2755 	
2756 	extern __inline __m256i
2757 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2758 	_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A)
2759 	{
2760 	  return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si)
2761 								   __A,
2762 								   (__v8si)
2763 								   _mm256_setzero_si256 (),
2764 								   __M);
2765 	}
2766 	
2767 	extern __inline __m256i
2768 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2769 	_mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2770 	{
2771 	  return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2772 							    (__v8si) __W,
2773 							    (__mmask8) __U);
2774 	}
2775 	
2776 	extern __inline __m256i
2777 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2778 	_mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2779 	{
2780 	  return (__m256i) __builtin_ia32_pmovsxbd256_mask ((__v16qi) __A,
2781 							    (__v8si)
2782 							    _mm256_setzero_si256 (),
2783 							    (__mmask8) __U);
2784 	}
2785 	
2786 	extern __inline __m128i
2787 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2788 	_mm_mask_cvtepi8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2789 	{
2790 	  return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2791 							    (__v4si) __W,
2792 							    (__mmask8) __U);
2793 	}
2794 	
2795 	extern __inline __m128i
2796 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2797 	_mm_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
2798 	{
2799 	  return (__m128i) __builtin_ia32_pmovsxbd128_mask ((__v16qi) __A,
2800 							    (__v4si)
2801 							    _mm_setzero_si128 (),
2802 							    (__mmask8) __U);
2803 	}
2804 	
2805 	extern __inline __m256i
2806 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2807 	_mm256_mask_cvtepi8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2808 	{
2809 	  return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2810 							    (__v4di) __W,
2811 							    (__mmask8) __U);
2812 	}
2813 	
2814 	extern __inline __m256i
2815 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2816 	_mm256_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2817 	{
2818 	  return (__m256i) __builtin_ia32_pmovsxbq256_mask ((__v16qi) __A,
2819 							    (__v4di)
2820 							    _mm256_setzero_si256 (),
2821 							    (__mmask8) __U);
2822 	}
2823 	
2824 	extern __inline __m128i
2825 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2826 	_mm_mask_cvtepi8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2827 	{
2828 	  return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2829 							    (__v2di) __W,
2830 							    (__mmask8) __U);
2831 	}
2832 	
2833 	extern __inline __m128i
2834 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2835 	_mm_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
2836 	{
2837 	  return (__m128i) __builtin_ia32_pmovsxbq128_mask ((__v16qi) __A,
2838 							    (__v2di)
2839 							    _mm_setzero_si128 (),
2840 							    (__mmask8) __U);
2841 	}
2842 	
2843 	extern __inline __m256i
2844 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2845 	_mm256_mask_cvtepi16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2846 	{
2847 	  return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2848 							    (__v8si) __W,
2849 							    (__mmask8) __U);
2850 	}
2851 	
2852 	extern __inline __m256i
2853 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2854 	_mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2855 	{
2856 	  return (__m256i) __builtin_ia32_pmovsxwd256_mask ((__v8hi) __A,
2857 							    (__v8si)
2858 							    _mm256_setzero_si256 (),
2859 							    (__mmask8) __U);
2860 	}
2861 	
2862 	extern __inline __m128i
2863 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2864 	_mm_mask_cvtepi16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2865 	{
2866 	  return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2867 							    (__v4si) __W,
2868 							    (__mmask8) __U);
2869 	}
2870 	
2871 	extern __inline __m128i
2872 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2873 	_mm_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
2874 	{
2875 	  return (__m128i) __builtin_ia32_pmovsxwd128_mask ((__v8hi) __A,
2876 							    (__v4si)
2877 							    _mm_setzero_si128 (),
2878 							    (__mmask8) __U);
2879 	}
2880 	
2881 	extern __inline __m256i
2882 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2883 	_mm256_mask_cvtepi16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2884 	{
2885 	  return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2886 							    (__v4di) __W,
2887 							    (__mmask8) __U);
2888 	}
2889 	
2890 	extern __inline __m256i
2891 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2892 	_mm256_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2893 	{
2894 	  return (__m256i) __builtin_ia32_pmovsxwq256_mask ((__v8hi) __A,
2895 							    (__v4di)
2896 							    _mm256_setzero_si256 (),
2897 							    (__mmask8) __U);
2898 	}
2899 	
2900 	extern __inline __m128i
2901 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2902 	_mm_mask_cvtepi16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
2903 	{
2904 	  return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2905 							    (__v2di) __W,
2906 							    (__mmask8) __U);
2907 	}
2908 	
2909 	extern __inline __m128i
2910 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2911 	_mm_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
2912 	{
2913 	  return (__m128i) __builtin_ia32_pmovsxwq128_mask ((__v8hi) __A,
2914 							    (__v2di)
2915 							    _mm_setzero_si128 (),
2916 							    (__mmask8) __U);
2917 	}
2918 	
2919 	extern __inline __m256i
2920 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2921 	_mm256_mask_cvtepi32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
2922 	{
2923 	  return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2924 							    (__v4di) __W,
2925 							    (__mmask8) __U);
2926 	}
2927 	
2928 	extern __inline __m256i
2929 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2930 	_mm256_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2931 	{
2932 	  return (__m256i) __builtin_ia32_pmovsxdq256_mask ((__v4si) __X,
2933 							    (__v4di)
2934 							    _mm256_setzero_si256 (),
2935 							    (__mmask8) __U);
2936 	}
2937 	
2938 	extern __inline __m128i
2939 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2940 	_mm_mask_cvtepi32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
2941 	{
2942 	  return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2943 							    (__v2di) __W,
2944 							    (__mmask8) __U);
2945 	}
2946 	
2947 	extern __inline __m128i
2948 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2949 	_mm_maskz_cvtepi32_epi64 (__mmask8 __U, __m128i __X)
2950 	{
2951 	  return (__m128i) __builtin_ia32_pmovsxdq128_mask ((__v4si) __X,
2952 							    (__v2di)
2953 							    _mm_setzero_si128 (),
2954 							    (__mmask8) __U);
2955 	}
2956 	
2957 	extern __inline __m256i
2958 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2959 	_mm256_mask_cvtepu8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
2960 	{
2961 	  return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2962 							    (__v8si) __W,
2963 							    (__mmask8) __U);
2964 	}
2965 	
2966 	extern __inline __m256i
2967 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2968 	_mm256_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2969 	{
2970 	  return (__m256i) __builtin_ia32_pmovzxbd256_mask ((__v16qi) __A,
2971 							    (__v8si)
2972 							    _mm256_setzero_si256 (),
2973 							    (__mmask8) __U);
2974 	}
2975 	
2976 	extern __inline __m128i
2977 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2978 	_mm_mask_cvtepu8_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
2979 	{
2980 	  return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2981 							    (__v4si) __W,
2982 							    (__mmask8) __U);
2983 	}
2984 	
2985 	extern __inline __m128i
2986 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2987 	_mm_maskz_cvtepu8_epi32 (__mmask8 __U, __m128i __A)
2988 	{
2989 	  return (__m128i) __builtin_ia32_pmovzxbd128_mask ((__v16qi) __A,
2990 							    (__v4si)
2991 							    _mm_setzero_si128 (),
2992 							    (__mmask8) __U);
2993 	}
2994 	
2995 	extern __inline __m256i
2996 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2997 	_mm256_mask_cvtepu8_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
2998 	{
2999 	  return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3000 							    (__v4di) __W,
3001 							    (__mmask8) __U);
3002 	}
3003 	
3004 	extern __inline __m256i
3005 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3006 	_mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3007 	{
3008 	  return (__m256i) __builtin_ia32_pmovzxbq256_mask ((__v16qi) __A,
3009 							    (__v4di)
3010 							    _mm256_setzero_si256 (),
3011 							    (__mmask8) __U);
3012 	}
3013 	
3014 	extern __inline __m128i
3015 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3016 	_mm_mask_cvtepu8_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3017 	{
3018 	  return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3019 							    (__v2di) __W,
3020 							    (__mmask8) __U);
3021 	}
3022 	
3023 	extern __inline __m128i
3024 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3025 	_mm_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
3026 	{
3027 	  return (__m128i) __builtin_ia32_pmovzxbq128_mask ((__v16qi) __A,
3028 							    (__v2di)
3029 							    _mm_setzero_si128 (),
3030 							    (__mmask8) __U);
3031 	}
3032 	
3033 	extern __inline __m256i
3034 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3035 	_mm256_mask_cvtepu16_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3036 	{
3037 	  return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3038 							    (__v8si) __W,
3039 							    (__mmask8) __U);
3040 	}
3041 	
3042 	extern __inline __m256i
3043 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3044 	_mm256_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3045 	{
3046 	  return (__m256i) __builtin_ia32_pmovzxwd256_mask ((__v8hi) __A,
3047 							    (__v8si)
3048 							    _mm256_setzero_si256 (),
3049 							    (__mmask8) __U);
3050 	}
3051 	
3052 	extern __inline __m128i
3053 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3054 	_mm_mask_cvtepu16_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
3055 	{
3056 	  return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3057 							    (__v4si) __W,
3058 							    (__mmask8) __U);
3059 	}
3060 	
3061 	extern __inline __m128i
3062 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3063 	_mm_maskz_cvtepu16_epi32 (__mmask8 __U, __m128i __A)
3064 	{
3065 	  return (__m128i) __builtin_ia32_pmovzxwd128_mask ((__v8hi) __A,
3066 							    (__v4si)
3067 							    _mm_setzero_si128 (),
3068 							    (__mmask8) __U);
3069 	}
3070 	
3071 	extern __inline __m256i
3072 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3073 	_mm256_mask_cvtepu16_epi64 (__m256i __W, __mmask8 __U, __m128i __A)
3074 	{
3075 	  return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3076 							    (__v4di) __W,
3077 							    (__mmask8) __U);
3078 	}
3079 	
3080 	extern __inline __m256i
3081 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3082 	_mm256_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3083 	{
3084 	  return (__m256i) __builtin_ia32_pmovzxwq256_mask ((__v8hi) __A,
3085 							    (__v4di)
3086 							    _mm256_setzero_si256 (),
3087 							    (__mmask8) __U);
3088 	}
3089 	
3090 	extern __inline __m128i
3091 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3092 	_mm_mask_cvtepu16_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
3093 	{
3094 	  return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3095 							    (__v2di) __W,
3096 							    (__mmask8) __U);
3097 	}
3098 	
3099 	extern __inline __m128i
3100 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3101 	_mm_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
3102 	{
3103 	  return (__m128i) __builtin_ia32_pmovzxwq128_mask ((__v8hi) __A,
3104 							    (__v2di)
3105 							    _mm_setzero_si128 (),
3106 							    (__mmask8) __U);
3107 	}
3108 	
3109 	extern __inline __m256i
3110 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3111 	_mm256_mask_cvtepu32_epi64 (__m256i __W, __mmask8 __U, __m128i __X)
3112 	{
3113 	  return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3114 							    (__v4di) __W,
3115 							    (__mmask8) __U);
3116 	}
3117 	
3118 	extern __inline __m256i
3119 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3120 	_mm256_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3121 	{
3122 	  return (__m256i) __builtin_ia32_pmovzxdq256_mask ((__v4si) __X,
3123 							    (__v4di)
3124 							    _mm256_setzero_si256 (),
3125 							    (__mmask8) __U);
3126 	}
3127 	
3128 	extern __inline __m128i
3129 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3130 	_mm_mask_cvtepu32_epi64 (__m128i __W, __mmask8 __U, __m128i __X)
3131 	{
3132 	  return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3133 							    (__v2di) __W,
3134 							    (__mmask8) __U);
3135 	}
3136 	
3137 	extern __inline __m128i
3138 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3139 	_mm_maskz_cvtepu32_epi64 (__mmask8 __U, __m128i __X)
3140 	{
3141 	  return (__m128i) __builtin_ia32_pmovzxdq128_mask ((__v4si) __X,
3142 							    (__v2di)
3143 							    _mm_setzero_si128 (),
3144 							    (__mmask8) __U);
3145 	}
3146 	
3147 	extern __inline __m256d
3148 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3149 	_mm256_rcp14_pd (__m256d __A)
3150 	{
3151 	  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3152 						      (__v4df)
3153 						      _mm256_setzero_pd (),
3154 						      (__mmask8) -1);
3155 	}
3156 	
3157 	extern __inline __m256d
3158 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3159 	_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3160 	{
3161 	  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3162 						      (__v4df) __W,
3163 						      (__mmask8) __U);
3164 	}
3165 	
3166 	extern __inline __m256d
3167 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3168 	_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
3169 	{
3170 	  return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
3171 						      (__v4df)
3172 						      _mm256_setzero_pd (),
3173 						      (__mmask8) __U);
3174 	}
3175 	
3176 	extern __inline __m128d
3177 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3178 	_mm_rcp14_pd (__m128d __A)
3179 	{
3180 	  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3181 						      (__v2df)
3182 						      _mm_setzero_pd (),
3183 						      (__mmask8) -1);
3184 	}
3185 	
3186 	extern __inline __m128d
3187 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3188 	_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3189 	{
3190 	  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3191 						      (__v2df) __W,
3192 						      (__mmask8) __U);
3193 	}
3194 	
3195 	extern __inline __m128d
3196 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3197 	_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
3198 	{
3199 	  return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
3200 						      (__v2df)
3201 						      _mm_setzero_pd (),
3202 						      (__mmask8) __U);
3203 	}
3204 	
3205 	extern __inline __m256
3206 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3207 	_mm256_rcp14_ps (__m256 __A)
3208 	{
3209 	  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3210 						     (__v8sf)
3211 						     _mm256_setzero_ps (),
3212 						     (__mmask8) -1);
3213 	}
3214 	
3215 	extern __inline __m256
3216 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3217 	_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3218 	{
3219 	  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3220 						     (__v8sf) __W,
3221 						     (__mmask8) __U);
3222 	}
3223 	
3224 	extern __inline __m256
3225 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3226 	_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
3227 	{
3228 	  return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
3229 						     (__v8sf)
3230 						     _mm256_setzero_ps (),
3231 						     (__mmask8) __U);
3232 	}
3233 	
3234 	extern __inline __m128
3235 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3236 	_mm_rcp14_ps (__m128 __A)
3237 	{
3238 	  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3239 						     (__v4sf)
3240 						     _mm_setzero_ps (),
3241 						     (__mmask8) -1);
3242 	}
3243 	
3244 	extern __inline __m128
3245 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3246 	_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3247 	{
3248 	  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3249 						     (__v4sf) __W,
3250 						     (__mmask8) __U);
3251 	}
3252 	
3253 	extern __inline __m128
3254 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3255 	_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
3256 	{
3257 	  return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
3258 						     (__v4sf)
3259 						     _mm_setzero_ps (),
3260 						     (__mmask8) __U);
3261 	}
3262 	
3263 	extern __inline __m256d
3264 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3265 	_mm256_rsqrt14_pd (__m256d __A)
3266 	{
3267 	  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3268 							     (__v4df)
3269 							     _mm256_setzero_pd (),
3270 							     (__mmask8) -1);
3271 	}
3272 	
3273 	extern __inline __m256d
3274 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3275 	_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
3276 	{
3277 	  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3278 							     (__v4df) __W,
3279 							     (__mmask8) __U);
3280 	}
3281 	
3282 	extern __inline __m256d
3283 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3284 	_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
3285 	{
3286 	  return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
3287 							     (__v4df)
3288 							     _mm256_setzero_pd (),
3289 							     (__mmask8) __U);
3290 	}
3291 	
3292 	extern __inline __m128d
3293 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3294 	_mm_rsqrt14_pd (__m128d __A)
3295 	{
3296 	  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3297 							     (__v2df)
3298 							     _mm_setzero_pd (),
3299 							     (__mmask8) -1);
3300 	}
3301 	
3302 	extern __inline __m128d
3303 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3304 	_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
3305 	{
3306 	  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3307 							     (__v2df) __W,
3308 							     (__mmask8) __U);
3309 	}
3310 	
3311 	extern __inline __m128d
3312 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3313 	_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
3314 	{
3315 	  return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
3316 							     (__v2df)
3317 							     _mm_setzero_pd (),
3318 							     (__mmask8) __U);
3319 	}
3320 	
3321 	extern __inline __m256
3322 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3323 	_mm256_rsqrt14_ps (__m256 __A)
3324 	{
3325 	  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3326 							    (__v8sf)
3327 							    _mm256_setzero_ps (),
3328 							    (__mmask8) -1);
3329 	}
3330 	
3331 	extern __inline __m256
3332 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3333 	_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
3334 	{
3335 	  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3336 							    (__v8sf) __W,
3337 							    (__mmask8) __U);
3338 	}
3339 	
3340 	extern __inline __m256
3341 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3342 	_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
3343 	{
3344 	  return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
3345 							    (__v8sf)
3346 							    _mm256_setzero_ps (),
3347 							    (__mmask8) __U);
3348 	}
3349 	
3350 	extern __inline __m128
3351 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3352 	_mm_rsqrt14_ps (__m128 __A)
3353 	{
3354 	  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3355 							    (__v4sf)
3356 							    _mm_setzero_ps (),
3357 							    (__mmask8) -1);
3358 	}
3359 	
3360 	extern __inline __m128
3361 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3362 	_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
3363 	{
3364 	  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3365 							    (__v4sf) __W,
3366 							    (__mmask8) __U);
3367 	}
3368 	
3369 	extern __inline __m128
3370 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3371 	_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
3372 	{
3373 	  return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
3374 							    (__v4sf)
3375 							    _mm_setzero_ps (),
3376 							    (__mmask8) __U);
3377 	}
3378 	
3379 	extern __inline __m256d
3380 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3381 	_mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A)
3382 	{
3383 	  return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3384 							  (__v4df) __W,
3385 							  (__mmask8) __U);
3386 	}
3387 	
3388 	extern __inline __m256d
3389 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3390 	_mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A)
3391 	{
3392 	  return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
3393 							  (__v4df)
3394 							  _mm256_setzero_pd (),
3395 							  (__mmask8) __U);
3396 	}
3397 	
3398 	extern __inline __m128d
3399 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3400 	_mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A)
3401 	{
3402 	  return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3403 							  (__v2df) __W,
3404 							  (__mmask8) __U);
3405 	}
3406 	
3407 	extern __inline __m128d
3408 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3409 	_mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A)
3410 	{
3411 	  return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
3412 							  (__v2df)
3413 							  _mm_setzero_pd (),
3414 							  (__mmask8) __U);
3415 	}
3416 	
3417 	extern __inline __m256
3418 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3419 	_mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A)
3420 	{
3421 	  return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3422 							 (__v8sf) __W,
3423 							 (__mmask8) __U);
3424 	}
3425 	
3426 	extern __inline __m256
3427 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3428 	_mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A)
3429 	{
3430 	  return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
3431 							 (__v8sf)
3432 							 _mm256_setzero_ps (),
3433 							 (__mmask8) __U);
3434 	}
3435 	
3436 	extern __inline __m128
3437 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3438 	_mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A)
3439 	{
3440 	  return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3441 							 (__v4sf) __W,
3442 							 (__mmask8) __U);
3443 	}
3444 	
3445 	extern __inline __m128
3446 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3447 	_mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A)
3448 	{
3449 	  return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
3450 							 (__v4sf)
3451 							 _mm_setzero_ps (),
3452 							 (__mmask8) __U);
3453 	}
3454 	
3455 	extern __inline __m256i
3456 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3457 	_mm256_mask_add_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3458 			       __m256i __B)
3459 	{
3460 	  return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3461 							 (__v8si) __B,
3462 							 (__v8si) __W,
3463 							 (__mmask8) __U);
3464 	}
3465 	
3466 	extern __inline __m256i
3467 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3468 	_mm256_maskz_add_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3469 	{
3470 	  return (__m256i) __builtin_ia32_paddd256_mask ((__v8si) __A,
3471 							 (__v8si) __B,
3472 							 (__v8si)
3473 							 _mm256_setzero_si256 (),
3474 							 (__mmask8) __U);
3475 	}
3476 	
3477 	extern __inline __m256i
3478 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3479 	_mm256_mask_add_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3480 			       __m256i __B)
3481 	{
3482 	  return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3483 							 (__v4di) __B,
3484 							 (__v4di) __W,
3485 							 (__mmask8) __U);
3486 	}
3487 	
3488 	extern __inline __m256i
3489 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3490 	_mm256_maskz_add_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3491 	{
3492 	  return (__m256i) __builtin_ia32_paddq256_mask ((__v4di) __A,
3493 							 (__v4di) __B,
3494 							 (__v4di)
3495 							 _mm256_setzero_si256 (),
3496 							 (__mmask8) __U);
3497 	}
3498 	
3499 	extern __inline __m256i
3500 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3501 	_mm256_mask_sub_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3502 			       __m256i __B)
3503 	{
3504 	  return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3505 							 (__v8si) __B,
3506 							 (__v8si) __W,
3507 							 (__mmask8) __U);
3508 	}
3509 	
3510 	extern __inline __m256i
3511 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3512 	_mm256_maskz_sub_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3513 	{
3514 	  return (__m256i) __builtin_ia32_psubd256_mask ((__v8si) __A,
3515 							 (__v8si) __B,
3516 							 (__v8si)
3517 							 _mm256_setzero_si256 (),
3518 							 (__mmask8) __U);
3519 	}
3520 	
3521 	extern __inline __m256i
3522 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3523 	_mm256_mask_sub_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3524 			       __m256i __B)
3525 	{
3526 	  return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3527 							 (__v4di) __B,
3528 							 (__v4di) __W,
3529 							 (__mmask8) __U);
3530 	}
3531 	
3532 	extern __inline __m256i
3533 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3534 	_mm256_maskz_sub_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
3535 	{
3536 	  return (__m256i) __builtin_ia32_psubq256_mask ((__v4di) __A,
3537 							 (__v4di) __B,
3538 							 (__v4di)
3539 							 _mm256_setzero_si256 (),
3540 							 (__mmask8) __U);
3541 	}
3542 	
3543 	extern __inline __m128i
3544 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3545 	_mm_mask_add_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3546 			    __m128i __B)
3547 	{
3548 	  return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3549 							 (__v4si) __B,
3550 							 (__v4si) __W,
3551 							 (__mmask8) __U);
3552 	}
3553 	
3554 	extern __inline __m128i
3555 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3556 	_mm_maskz_add_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3557 	{
3558 	  return (__m128i) __builtin_ia32_paddd128_mask ((__v4si) __A,
3559 							 (__v4si) __B,
3560 							 (__v4si)
3561 							 _mm_setzero_si128 (),
3562 							 (__mmask8) __U);
3563 	}
3564 	
3565 	extern __inline __m128i
3566 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3567 	_mm_mask_add_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3568 			    __m128i __B)
3569 	{
3570 	  return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3571 							 (__v2di) __B,
3572 							 (__v2di) __W,
3573 							 (__mmask8) __U);
3574 	}
3575 	
3576 	extern __inline __m128i
3577 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3578 	_mm_maskz_add_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3579 	{
3580 	  return (__m128i) __builtin_ia32_paddq128_mask ((__v2di) __A,
3581 							 (__v2di) __B,
3582 							 (__v2di)
3583 							 _mm_setzero_si128 (),
3584 							 (__mmask8) __U);
3585 	}
3586 	
3587 	extern __inline __m128i
3588 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3589 	_mm_mask_sub_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3590 			    __m128i __B)
3591 	{
3592 	  return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3593 							 (__v4si) __B,
3594 							 (__v4si) __W,
3595 							 (__mmask8) __U);
3596 	}
3597 	
3598 	extern __inline __m128i
3599 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3600 	_mm_maskz_sub_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3601 	{
3602 	  return (__m128i) __builtin_ia32_psubd128_mask ((__v4si) __A,
3603 							 (__v4si) __B,
3604 							 (__v4si)
3605 							 _mm_setzero_si128 (),
3606 							 (__mmask8) __U);
3607 	}
3608 	
3609 	extern __inline __m128i
3610 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3611 	_mm_mask_sub_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3612 			    __m128i __B)
3613 	{
3614 	  return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3615 							 (__v2di) __B,
3616 							 (__v2di) __W,
3617 							 (__mmask8) __U);
3618 	}
3619 	
3620 	extern __inline __m128i
3621 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3622 	_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3623 	{
3624 	  return (__m128i) __builtin_ia32_psubq128_mask ((__v2di) __A,
3625 							 (__v2di) __B,
3626 							 (__v2di)
3627 							 _mm_setzero_si128 (),
3628 							 (__mmask8) __U);
3629 	}
3630 	
3631 	extern __inline __m256
3632 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3633 	_mm256_getexp_ps (__m256 __A)
3634 	{
3635 	  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3636 							   (__v8sf)
3637 							   _mm256_setzero_ps (),
3638 							   (__mmask8) -1);
3639 	}
3640 	
3641 	extern __inline __m256
3642 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3643 	_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A)
3644 	{
3645 	  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3646 							   (__v8sf) __W,
3647 							   (__mmask8) __U);
3648 	}
3649 	
3650 	extern __inline __m256
3651 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3652 	_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A)
3653 	{
3654 	  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3655 							   (__v8sf)
3656 							   _mm256_setzero_ps (),
3657 							   (__mmask8) __U);
3658 	}
3659 	
3660 	extern __inline __m256d
3661 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3662 	_mm256_getexp_pd (__m256d __A)
3663 	{
3664 	  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3665 							    (__v4df)
3666 							    _mm256_setzero_pd (),
3667 							    (__mmask8) -1);
3668 	}
3669 	
3670 	extern __inline __m256d
3671 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3672 	_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A)
3673 	{
3674 	  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3675 							    (__v4df) __W,
3676 							    (__mmask8) __U);
3677 	}
3678 	
3679 	extern __inline __m256d
3680 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3681 	_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A)
3682 	{
3683 	  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3684 							    (__v4df)
3685 							    _mm256_setzero_pd (),
3686 							    (__mmask8) __U);
3687 	}
3688 	
3689 	extern __inline __m128
3690 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3691 	_mm_getexp_ps (__m128 __A)
3692 	{
3693 	  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3694 							   (__v4sf)
3695 							   _mm_setzero_ps (),
3696 							   (__mmask8) -1);
3697 	}
3698 	
3699 	extern __inline __m128
3700 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3701 	_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A)
3702 	{
3703 	  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3704 							   (__v4sf) __W,
3705 							   (__mmask8) __U);
3706 	}
3707 	
3708 	extern __inline __m128
3709 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3710 	_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A)
3711 	{
3712 	  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3713 							   (__v4sf)
3714 							   _mm_setzero_ps (),
3715 							   (__mmask8) __U);
3716 	}
3717 	
3718 	extern __inline __m128d
3719 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3720 	_mm_getexp_pd (__m128d __A)
3721 	{
3722 	  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3723 							    (__v2df)
3724 							    _mm_setzero_pd (),
3725 							    (__mmask8) -1);
3726 	}
3727 	
3728 	extern __inline __m128d
3729 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3730 	_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A)
3731 	{
3732 	  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3733 							    (__v2df) __W,
3734 							    (__mmask8) __U);
3735 	}
3736 	
3737 	extern __inline __m128d
3738 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3739 	_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A)
3740 	{
3741 	  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
3742 							    (__v2df)
3743 							    _mm_setzero_pd (),
3744 							    (__mmask8) __U);
3745 	}
3746 	
3747 	extern __inline __m256i
3748 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3749 	_mm256_mask_srl_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3750 			       __m128i __B)
3751 	{
3752 	  return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3753 							 (__v4si) __B,
3754 							 (__v8si) __W,
3755 							 (__mmask8) __U);
3756 	}
3757 	
3758 	extern __inline __m256i
3759 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3760 	_mm256_maskz_srl_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
3761 	{
3762 	  return (__m256i) __builtin_ia32_psrld256_mask ((__v8si) __A,
3763 							 (__v4si) __B,
3764 							 (__v8si)
3765 							 _mm256_setzero_si256 (),
3766 							 (__mmask8) __U);
3767 	}
3768 	
3769 	extern __inline __m128i
3770 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3771 	_mm_mask_srl_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
3772 			    __m128i __B)
3773 	{
3774 	  return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3775 							 (__v4si) __B,
3776 							 (__v4si) __W,
3777 							 (__mmask8) __U);
3778 	}
3779 	
3780 	extern __inline __m128i
3781 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3782 	_mm_maskz_srl_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
3783 	{
3784 	  return (__m128i) __builtin_ia32_psrld128_mask ((__v4si) __A,
3785 							 (__v4si) __B,
3786 							 (__v4si)
3787 							 _mm_setzero_si128 (),
3788 							 (__mmask8) __U);
3789 	}
3790 	
3791 	extern __inline __m256i
3792 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3793 	_mm256_mask_srl_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
3794 			       __m128i __B)
3795 	{
3796 	  return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3797 							 (__v2di) __B,
3798 							 (__v4di) __W,
3799 							 (__mmask8) __U);
3800 	}
3801 	
3802 	extern __inline __m256i
3803 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3804 	_mm256_maskz_srl_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
3805 	{
3806 	  return (__m256i) __builtin_ia32_psrlq256_mask ((__v4di) __A,
3807 							 (__v2di) __B,
3808 							 (__v4di)
3809 							 _mm256_setzero_si256 (),
3810 							 (__mmask8) __U);
3811 	}
3812 	
3813 	extern __inline __m128i
3814 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3815 	_mm_mask_srl_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
3816 			    __m128i __B)
3817 	{
3818 	  return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3819 							 (__v2di) __B,
3820 							 (__v2di) __W,
3821 							 (__mmask8) __U);
3822 	}
3823 	
3824 	extern __inline __m128i
3825 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3826 	_mm_maskz_srl_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
3827 	{
3828 	  return (__m128i) __builtin_ia32_psrlq128_mask ((__v2di) __A,
3829 							 (__v2di) __B,
3830 							 (__v2di)
3831 							 _mm_setzero_si128 (),
3832 							 (__mmask8) __U);
3833 	}
3834 	
3835 	extern __inline __m256i
3836 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3837 	_mm256_mask_and_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
3838 			       __m256i __B)
3839 	{
3840 	  return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3841 							 (__v8si) __B,
3842 							 (__v8si) __W,
3843 							 (__mmask8) __U);
3844 	}
3845 	
3846 	extern __inline __m256i
3847 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3848 	_mm256_maskz_and_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
3849 	{
3850 	  return (__m256i) __builtin_ia32_pandd256_mask ((__v8si) __A,
3851 							 (__v8si) __B,
3852 							 (__v8si)
3853 							 _mm256_setzero_si256 (),
3854 							 (__mmask8) __U);
3855 	}
3856 	
3857 	extern __inline __m256d
3858 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3859 	_mm256_scalef_pd (__m256d __A, __m256d __B)
3860 	{
3861 	  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3862 							    (__v4df) __B,
3863 							    (__v4df)
3864 							    _mm256_setzero_pd (),
3865 							    (__mmask8) -1);
3866 	}
3867 	
3868 	extern __inline __m256d
3869 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3870 	_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3871 			       __m256d __B)
3872 	{
3873 	  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3874 							    (__v4df) __B,
3875 							    (__v4df) __W,
3876 							    (__mmask8) __U);
3877 	}
3878 	
3879 	extern __inline __m256d
3880 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3881 	_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B)
3882 	{
3883 	  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3884 							    (__v4df) __B,
3885 							    (__v4df)
3886 							    _mm256_setzero_pd (),
3887 							    (__mmask8) __U);
3888 	}
3889 	
3890 	extern __inline __m256
3891 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3892 	_mm256_scalef_ps (__m256 __A, __m256 __B)
3893 	{
3894 	  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3895 							   (__v8sf) __B,
3896 							   (__v8sf)
3897 							   _mm256_setzero_ps (),
3898 							   (__mmask8) -1);
3899 	}
3900 	
3901 	extern __inline __m256
3902 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3903 	_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3904 			       __m256 __B)
3905 	{
3906 	  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3907 							   (__v8sf) __B,
3908 							   (__v8sf) __W,
3909 							   (__mmask8) __U);
3910 	}
3911 	
3912 	extern __inline __m256
3913 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3914 	_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B)
3915 	{
3916 	  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3917 							   (__v8sf) __B,
3918 							   (__v8sf)
3919 							   _mm256_setzero_ps (),
3920 							   (__mmask8) __U);
3921 	}
3922 	
3923 	extern __inline __m128d
3924 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3925 	_mm_scalef_pd (__m128d __A, __m128d __B)
3926 	{
3927 	  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3928 							    (__v2df) __B,
3929 							    (__v2df)
3930 							    _mm_setzero_pd (),
3931 							    (__mmask8) -1);
3932 	}
3933 	
3934 	extern __inline __m128d
3935 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3936 	_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3937 			    __m128d __B)
3938 	{
3939 	  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3940 							    (__v2df) __B,
3941 							    (__v2df) __W,
3942 							    (__mmask8) __U);
3943 	}
3944 	
3945 	extern __inline __m128d
3946 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3947 	_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B)
3948 	{
3949 	  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3950 							    (__v2df) __B,
3951 							    (__v2df)
3952 							    _mm_setzero_pd (),
3953 							    (__mmask8) __U);
3954 	}
3955 	
3956 	extern __inline __m128
3957 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3958 	_mm_scalef_ps (__m128 __A, __m128 __B)
3959 	{
3960 	  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3961 							   (__v4sf) __B,
3962 							   (__v4sf)
3963 							   _mm_setzero_ps (),
3964 							   (__mmask8) -1);
3965 	}
3966 	
3967 	extern __inline __m128
3968 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3969 	_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
3970 	{
3971 	  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3972 							   (__v4sf) __B,
3973 							   (__v4sf) __W,
3974 							   (__mmask8) __U);
3975 	}
3976 	
3977 	extern __inline __m128
3978 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3979 	_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B)
3980 	{
3981 	  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3982 							   (__v4sf) __B,
3983 							   (__v4sf)
3984 							   _mm_setzero_ps (),
3985 							   (__mmask8) __U);
3986 	}
3987 	
3988 	extern __inline __m256d
3989 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3990 	_mm256_mask_fmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
3991 			      __m256d __C)
3992 	{
3993 	  return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
3994 							    (__v4df) __B,
3995 							    (__v4df) __C,
3996 							    (__mmask8) __U);
3997 	}
3998 	
3999 	extern __inline __m256d
4000 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4001 	_mm256_mask3_fmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4002 			       __mmask8 __U)
4003 	{
4004 	  return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
4005 							     (__v4df) __B,
4006 							     (__v4df) __C,
4007 							     (__mmask8) __U);
4008 	}
4009 	
4010 	extern __inline __m256d
4011 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4012 	_mm256_maskz_fmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4013 			       __m256d __C)
4014 	{
4015 	  return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
4016 							     (__v4df) __B,
4017 							     (__v4df) __C,
4018 							     (__mmask8) __U);
4019 	}
4020 	
4021 	extern __inline __m128d
4022 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4023 	_mm_mask_fmadd_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4024 	{
4025 	  return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
4026 							    (__v2df) __B,
4027 							    (__v2df) __C,
4028 							    (__mmask8) __U);
4029 	}
4030 	
4031 	extern __inline __m128d
4032 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4033 	_mm_mask3_fmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4034 			    __mmask8 __U)
4035 	{
4036 	  return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
4037 							     (__v2df) __B,
4038 							     (__v2df) __C,
4039 							     (__mmask8) __U);
4040 	}
4041 	
4042 	extern __inline __m128d
4043 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4044 	_mm_maskz_fmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4045 			    __m128d __C)
4046 	{
4047 	  return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
4048 							     (__v2df) __B,
4049 							     (__v2df) __C,
4050 							     (__mmask8) __U);
4051 	}
4052 	
4053 	extern __inline __m256
4054 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4055 	_mm256_mask_fmadd_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4056 	{
4057 	  return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
4058 							   (__v8sf) __B,
4059 							   (__v8sf) __C,
4060 							   (__mmask8) __U);
4061 	}
4062 	
4063 	extern __inline __m256
4064 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4065 	_mm256_mask3_fmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4066 			       __mmask8 __U)
4067 	{
4068 	  return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
4069 							    (__v8sf) __B,
4070 							    (__v8sf) __C,
4071 							    (__mmask8) __U);
4072 	}
4073 	
4074 	extern __inline __m256
4075 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4076 	_mm256_maskz_fmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4077 			       __m256 __C)
4078 	{
4079 	  return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
4080 							    (__v8sf) __B,
4081 							    (__v8sf) __C,
4082 							    (__mmask8) __U);
4083 	}
4084 	
4085 	extern __inline __m128
4086 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4087 	_mm_mask_fmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4088 	{
4089 	  return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
4090 							   (__v4sf) __B,
4091 							   (__v4sf) __C,
4092 							   (__mmask8) __U);
4093 	}
4094 	
4095 	extern __inline __m128
4096 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4097 	_mm_mask3_fmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4098 	{
4099 	  return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
4100 							    (__v4sf) __B,
4101 							    (__v4sf) __C,
4102 							    (__mmask8) __U);
4103 	}
4104 	
4105 	extern __inline __m128
4106 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4107 	_mm_maskz_fmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4108 	{
4109 	  return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
4110 							    (__v4sf) __B,
4111 							    (__v4sf) __C,
4112 							    (__mmask8) __U);
4113 	}
4114 	
4115 	extern __inline __m256d
4116 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4117 	_mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4118 			      __m256d __C)
4119 	{
4120 	  return (__m256d) __builtin_ia32_vfmsubpd256_mask ((__v4df) __A,
4121 							    (__v4df) __B,
4122 							    (__v4df) __C,
4123 							    (__mmask8) __U);
4124 	}
4125 	
4126 	extern __inline __m256d
4127 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4128 	_mm256_mask3_fmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4129 			       __mmask8 __U)
4130 	{
4131 	  return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
4132 							     (__v4df) __B,
4133 							     (__v4df) __C,
4134 							     (__mmask8) __U);
4135 	}
4136 	
4137 	extern __inline __m256d
4138 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4139 	_mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4140 			       __m256d __C)
4141 	{
4142 	  return (__m256d) __builtin_ia32_vfmsubpd256_maskz ((__v4df) __A,
4143 							     (__v4df) __B,
4144 							     (__v4df) __C,
4145 							     (__mmask8) __U);
4146 	}
4147 	
4148 	extern __inline __m128d
4149 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4150 	_mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
4151 	{
4152 	  return (__m128d) __builtin_ia32_vfmsubpd128_mask ((__v2df) __A,
4153 							    (__v2df) __B,
4154 							    (__v2df) __C,
4155 							    (__mmask8) __U);
4156 	}
4157 	
4158 	extern __inline __m128d
4159 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4160 	_mm_mask3_fmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4161 			    __mmask8 __U)
4162 	{
4163 	  return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
4164 							     (__v2df) __B,
4165 							     (__v2df) __C,
4166 							     (__mmask8) __U);
4167 	}
4168 	
4169 	extern __inline __m128d
4170 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4171 	_mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4172 			    __m128d __C)
4173 	{
4174 	  return (__m128d) __builtin_ia32_vfmsubpd128_maskz ((__v2df) __A,
4175 							     (__v2df) __B,
4176 							     (__v2df) __C,
4177 							     (__mmask8) __U);
4178 	}
4179 	
4180 	extern __inline __m256
4181 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4182 	_mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
4183 	{
(1) Event bad_cast: invalid type conversion
(2) Event caretline: ^
4184 	  return (__m256) __builtin_ia32_vfmsubps256_mask ((__v8sf) __A,
4185 							   (__v8sf) __B,
4186 							   (__v8sf) __C,
4187 							   (__mmask8) __U);
4188 	}
4189 	
4190 	extern __inline __m256
4191 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4192 	_mm256_mask3_fmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4193 			       __mmask8 __U)
4194 	{
4195 	  return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
4196 							    (__v8sf) __B,
4197 							    (__v8sf) __C,
4198 							    (__mmask8) __U);
4199 	}
4200 	
4201 	extern __inline __m256
4202 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4203 	_mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4204 			       __m256 __C)
4205 	{
4206 	  return (__m256) __builtin_ia32_vfmsubps256_maskz ((__v8sf) __A,
4207 							    (__v8sf) __B,
4208 							    (__v8sf) __C,
4209 							    (__mmask8) __U);
4210 	}
4211 	
4212 	extern __inline __m128
4213 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4214 	_mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4215 	{
4216 	  return (__m128) __builtin_ia32_vfmsubps128_mask ((__v4sf) __A,
4217 							   (__v4sf) __B,
4218 							   (__v4sf) __C,
4219 							   (__mmask8) __U);
4220 	}
4221 	
4222 	extern __inline __m128
4223 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4224 	_mm_mask3_fmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4225 	{
4226 	  return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
4227 							    (__v4sf) __B,
4228 							    (__v4sf) __C,
4229 							    (__mmask8) __U);
4230 	}
4231 	
4232 	extern __inline __m128
4233 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4234 	_mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4235 	{
4236 	  return (__m128) __builtin_ia32_vfmsubps128_maskz ((__v4sf) __A,
4237 							    (__v4sf) __B,
4238 							    (__v4sf) __C,
4239 							    (__mmask8) __U);
4240 	}
4241 	
4242 	extern __inline __m256d
4243 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4244 	_mm256_mask_fmaddsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4245 				 __m256d __C)
4246 	{
4247 	  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4248 							       (__v4df) __B,
4249 							       (__v4df) __C,
4250 							       (__mmask8) __U);
4251 	}
4252 	
4253 	extern __inline __m256d
4254 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4255 	_mm256_mask3_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C,
4256 				  __mmask8 __U)
4257 	{
4258 	  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
4259 								(__v4df) __B,
4260 								(__v4df) __C,
4261 								(__mmask8)
4262 								__U);
4263 	}
4264 	
4265 	extern __inline __m256d
4266 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4267 	_mm256_maskz_fmaddsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4268 				  __m256d __C)
4269 	{
4270 	  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4271 								(__v4df) __B,
4272 								(__v4df) __C,
4273 								(__mmask8)
4274 								__U);
4275 	}
4276 	
4277 	extern __inline __m128d
4278 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4279 	_mm_mask_fmaddsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4280 			      __m128d __C)
4281 	{
4282 	  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4283 							       (__v2df) __B,
4284 							       (__v2df) __C,
4285 							       (__mmask8) __U);
4286 	}
4287 	
4288 	extern __inline __m128d
4289 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4290 	_mm_mask3_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C,
4291 			       __mmask8 __U)
4292 	{
4293 	  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
4294 								(__v2df) __B,
4295 								(__v2df) __C,
4296 								(__mmask8)
4297 								__U);
4298 	}
4299 	
4300 	extern __inline __m128d
4301 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4302 	_mm_maskz_fmaddsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4303 			       __m128d __C)
4304 	{
4305 	  return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4306 								(__v2df) __B,
4307 								(__v2df) __C,
4308 								(__mmask8)
4309 								__U);
4310 	}
4311 	
4312 	extern __inline __m256
4313 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4314 	_mm256_mask_fmaddsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4315 				 __m256 __C)
4316 	{
4317 	  return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4318 							      (__v8sf) __B,
4319 							      (__v8sf) __C,
4320 							      (__mmask8) __U);
4321 	}
4322 	
4323 	extern __inline __m256
4324 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4325 	_mm256_mask3_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C,
4326 				  __mmask8 __U)
4327 	{
4328 	  return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
4329 							       (__v8sf) __B,
4330 							       (__v8sf) __C,
4331 							       (__mmask8) __U);
4332 	}
4333 	
4334 	extern __inline __m256
4335 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4336 	_mm256_maskz_fmaddsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4337 				  __m256 __C)
4338 	{
4339 	  return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4340 							       (__v8sf) __B,
4341 							       (__v8sf) __C,
4342 							       (__mmask8) __U);
4343 	}
4344 	
4345 	extern __inline __m128
4346 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4347 	_mm_mask_fmaddsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4348 	{
4349 	  return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4350 							      (__v4sf) __B,
4351 							      (__v4sf) __C,
4352 							      (__mmask8) __U);
4353 	}
4354 	
4355 	extern __inline __m128
4356 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4357 	_mm_mask3_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C,
4358 			       __mmask8 __U)
4359 	{
4360 	  return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
4361 							       (__v4sf) __B,
4362 							       (__v4sf) __C,
4363 							       (__mmask8) __U);
4364 	}
4365 	
4366 	extern __inline __m128
4367 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4368 	_mm_maskz_fmaddsub_ps (__mmask8 __U, __m128 __A, __m128 __B,
4369 			       __m128 __C)
4370 	{
4371 	  return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4372 							       (__v4sf) __B,
4373 							       (__v4sf) __C,
4374 							       (__mmask8) __U);
4375 	}
4376 	
4377 	extern __inline __m256d
4378 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4379 	_mm256_mask_fmsubadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4380 				 __m256d __C)
4381 	{
4382 	  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
4383 							       (__v4df) __B,
4384 							       -(__v4df) __C,
4385 							       (__mmask8) __U);
4386 	}
4387 	
4388 	extern __inline __m256d
4389 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4390 	_mm256_mask3_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C,
4391 				  __mmask8 __U)
4392 	{
4393 	  return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
4394 								(__v4df) __B,
4395 								(__v4df) __C,
4396 								(__mmask8)
4397 								__U);
4398 	}
4399 	
4400 	extern __inline __m256d
4401 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4402 	_mm256_maskz_fmsubadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4403 				  __m256d __C)
4404 	{
4405 	  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
4406 								(__v4df) __B,
4407 								-(__v4df) __C,
4408 								(__mmask8)
4409 								__U);
4410 	}
4411 	
4412 	extern __inline __m128d
4413 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4414 	_mm_mask_fmsubadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4415 			      __m128d __C)
4416 	{
4417 	  return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
4418 							       (__v2df) __B,
4419 							       -(__v2df) __C,
4420 							       (__mmask8) __U);
4421 	}
4422 	
4423 	extern __inline __m128d
4424 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4425 	_mm_mask3_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C,
4426 			       __mmask8 __U)
4427 	{
4428 	  return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
4429 								(__v2df) __B,
4430 								(__v2df) __C,
4431 								(__mmask8)
4432 								__U);
4433 	}
4434 	
4435 	extern __inline __m128d
4436 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4437 	_mm_maskz_fmsubadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4438 			       __m128d __C)
4439 	{
4440 	  return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
4441 								(__v2df) __B,
4442 								-(__v2df) __C,
4443 								(__mmask8)
4444 								__U);
4445 	}
4446 	
4447 	extern __inline __m256
4448 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4449 	_mm256_mask_fmsubadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4450 				 __m256 __C)
4451 	{
4452 	  return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
4453 							      (__v8sf) __B,
4454 							      -(__v8sf) __C,
4455 							      (__mmask8) __U);
4456 	}
4457 	
4458 	extern __inline __m256
4459 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4460 	_mm256_mask3_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C,
4461 				  __mmask8 __U)
4462 	{
4463 	  return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
4464 							       (__v8sf) __B,
4465 							       (__v8sf) __C,
4466 							       (__mmask8) __U);
4467 	}
4468 	
4469 	extern __inline __m256
4470 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4471 	_mm256_maskz_fmsubadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4472 				  __m256 __C)
4473 	{
4474 	  return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
4475 							       (__v8sf) __B,
4476 							       -(__v8sf) __C,
4477 							       (__mmask8) __U);
4478 	}
4479 	
4480 	extern __inline __m128
4481 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4482 	_mm_mask_fmsubadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4483 	{
4484 	  return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
4485 							      (__v4sf) __B,
4486 							      -(__v4sf) __C,
4487 							      (__mmask8) __U);
4488 	}
4489 	
4490 	extern __inline __m128
4491 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4492 	_mm_mask3_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C,
4493 			       __mmask8 __U)
4494 	{
4495 	  return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
4496 							       (__v4sf) __B,
4497 							       (__v4sf) __C,
4498 							       (__mmask8) __U);
4499 	}
4500 	
4501 	extern __inline __m128
4502 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4503 	_mm_maskz_fmsubadd_ps (__mmask8 __U, __m128 __A, __m128 __B,
4504 			       __m128 __C)
4505 	{
4506 	  return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
4507 							       (__v4sf) __B,
4508 							       -(__v4sf) __C,
4509 							       (__mmask8) __U);
4510 	}
4511 	
4512 	extern __inline __m256d
4513 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4514 	_mm256_mask_fnmadd_pd (__m256d __A, __mmask8 __U, __m256d __B,
4515 			       __m256d __C)
4516 	{
4517 	  return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
4518 							     (__v4df) __B,
4519 							     (__v4df) __C,
4520 							     (__mmask8) __U);
4521 	}
4522 	
4523 	extern __inline __m256d
4524 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4525 	_mm256_mask3_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C,
4526 				__mmask8 __U)
4527 	{
4528 	  return (__m256d) __builtin_ia32_vfnmaddpd256_mask3 ((__v4df) __A,
4529 							      (__v4df) __B,
4530 							      (__v4df) __C,
4531 							      (__mmask8) __U);
4532 	}
4533 	
4534 	extern __inline __m256d
4535 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4536 	_mm256_maskz_fnmadd_pd (__mmask8 __U, __m256d __A, __m256d __B,
4537 				__m256d __C)
4538 	{
4539 	  return (__m256d) __builtin_ia32_vfnmaddpd256_maskz ((__v4df) __A,
4540 							      (__v4df) __B,
4541 							      (__v4df) __C,
4542 							      (__mmask8) __U);
4543 	}
4544 	
4545 	extern __inline __m128d
4546 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4547 	_mm_mask_fnmadd_pd (__m128d __A, __mmask8 __U, __m128d __B,
4548 			    __m128d __C)
4549 	{
4550 	  return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
4551 							     (__v2df) __B,
4552 							     (__v2df) __C,
4553 							     (__mmask8) __U);
4554 	}
4555 	
4556 	extern __inline __m128d
4557 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4558 	_mm_mask3_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C,
4559 			     __mmask8 __U)
4560 	{
4561 	  return (__m128d) __builtin_ia32_vfnmaddpd128_mask3 ((__v2df) __A,
4562 							      (__v2df) __B,
4563 							      (__v2df) __C,
4564 							      (__mmask8) __U);
4565 	}
4566 	
4567 	extern __inline __m128d
4568 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4569 	_mm_maskz_fnmadd_pd (__mmask8 __U, __m128d __A, __m128d __B,
4570 			     __m128d __C)
4571 	{
4572 	  return (__m128d) __builtin_ia32_vfnmaddpd128_maskz ((__v2df) __A,
4573 							      (__v2df) __B,
4574 							      (__v2df) __C,
4575 							      (__mmask8) __U);
4576 	}
4577 	
4578 	extern __inline __m256
4579 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4580 	_mm256_mask_fnmadd_ps (__m256 __A, __mmask8 __U, __m256 __B,
4581 			       __m256 __C)
4582 	{
4583 	  return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
4584 							    (__v8sf) __B,
4585 							    (__v8sf) __C,
4586 							    (__mmask8) __U);
4587 	}
4588 	
4589 	extern __inline __m256
4590 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4591 	_mm256_mask3_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C,
4592 				__mmask8 __U)
4593 	{
4594 	  return (__m256) __builtin_ia32_vfnmaddps256_mask3 ((__v8sf) __A,
4595 							     (__v8sf) __B,
4596 							     (__v8sf) __C,
4597 							     (__mmask8) __U);
4598 	}
4599 	
4600 	extern __inline __m256
4601 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4602 	_mm256_maskz_fnmadd_ps (__mmask8 __U, __m256 __A, __m256 __B,
4603 				__m256 __C)
4604 	{
4605 	  return (__m256) __builtin_ia32_vfnmaddps256_maskz ((__v8sf) __A,
4606 							     (__v8sf) __B,
4607 							     (__v8sf) __C,
4608 							     (__mmask8) __U);
4609 	}
4610 	
4611 	extern __inline __m128
4612 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4613 	_mm_mask_fnmadd_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4614 	{
4615 	  return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
4616 							    (__v4sf) __B,
4617 							    (__v4sf) __C,
4618 							    (__mmask8) __U);
4619 	}
4620 	
4621 	extern __inline __m128
4622 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4623 	_mm_mask3_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4624 	{
4625 	  return (__m128) __builtin_ia32_vfnmaddps128_mask3 ((__v4sf) __A,
4626 							     (__v4sf) __B,
4627 							     (__v4sf) __C,
4628 							     (__mmask8) __U);
4629 	}
4630 	
4631 	extern __inline __m128
4632 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4633 	_mm_maskz_fnmadd_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4634 	{
4635 	  return (__m128) __builtin_ia32_vfnmaddps128_maskz ((__v4sf) __A,
4636 							     (__v4sf) __B,
4637 							     (__v4sf) __C,
4638 							     (__mmask8) __U);
4639 	}
4640 	
4641 	extern __inline __m256d
4642 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4643 	_mm256_mask_fnmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
4644 			       __m256d __C)
4645 	{
4646 	  return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
4647 							     (__v4df) __B,
4648 							     (__v4df) __C,
4649 							     (__mmask8) __U);
4650 	}
4651 	
4652 	extern __inline __m256d
4653 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4654 	_mm256_mask3_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C,
4655 				__mmask8 __U)
4656 	{
4657 	  return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
4658 							      (__v4df) __B,
4659 							      (__v4df) __C,
4660 							      (__mmask8) __U);
4661 	}
4662 	
4663 	extern __inline __m256d
4664 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4665 	_mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
4666 				__m256d __C)
4667 	{
4668 	  return (__m256d) __builtin_ia32_vfnmsubpd256_maskz ((__v4df) __A,
4669 							      (__v4df) __B,
4670 							      (__v4df) __C,
4671 							      (__mmask8) __U);
4672 	}
4673 	
4674 	extern __inline __m128d
4675 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4676 	_mm_mask_fnmsub_pd (__m128d __A, __mmask8 __U, __m128d __B,
4677 			    __m128d __C)
4678 	{
4679 	  return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
4680 							     (__v2df) __B,
4681 							     (__v2df) __C,
4682 							     (__mmask8) __U);
4683 	}
4684 	
4685 	extern __inline __m128d
4686 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4687 	_mm_mask3_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C,
4688 			     __mmask8 __U)
4689 	{
4690 	  return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
4691 							      (__v2df) __B,
4692 							      (__v2df) __C,
4693 							      (__mmask8) __U);
4694 	}
4695 	
4696 	extern __inline __m128d
4697 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4698 	_mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
4699 			     __m128d __C)
4700 	{
4701 	  return (__m128d) __builtin_ia32_vfnmsubpd128_maskz ((__v2df) __A,
4702 							      (__v2df) __B,
4703 							      (__v2df) __C,
4704 							      (__mmask8) __U);
4705 	}
4706 	
4707 	extern __inline __m256
4708 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4709 	_mm256_mask_fnmsub_ps (__m256 __A, __mmask8 __U, __m256 __B,
4710 			       __m256 __C)
4711 	{
4712 	  return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
4713 							    (__v8sf) __B,
4714 							    (__v8sf) __C,
4715 							    (__mmask8) __U);
4716 	}
4717 	
4718 	extern __inline __m256
4719 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4720 	_mm256_mask3_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C,
4721 				__mmask8 __U)
4722 	{
4723 	  return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
4724 							     (__v8sf) __B,
4725 							     (__v8sf) __C,
4726 							     (__mmask8) __U);
4727 	}
4728 	
4729 	extern __inline __m256
4730 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4731 	_mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
4732 				__m256 __C)
4733 	{
4734 	  return (__m256) __builtin_ia32_vfnmsubps256_maskz ((__v8sf) __A,
4735 							     (__v8sf) __B,
4736 							     (__v8sf) __C,
4737 							     (__mmask8) __U);
4738 	}
4739 	
4740 	extern __inline __m128
4741 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4742 	_mm_mask_fnmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
4743 	{
4744 	  return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
4745 							    (__v4sf) __B,
4746 							    (__v4sf) __C,
4747 							    (__mmask8) __U);
4748 	}
4749 	
4750 	extern __inline __m128
4751 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4752 	_mm_mask3_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
4753 	{
4754 	  return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
4755 							     (__v4sf) __B,
4756 							     (__v4sf) __C,
4757 							     (__mmask8) __U);
4758 	}
4759 	
4760 	extern __inline __m128
4761 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4762 	_mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
4763 	{
4764 	  return (__m128) __builtin_ia32_vfnmsubps128_maskz ((__v4sf) __A,
4765 							     (__v4sf) __B,
4766 							     (__v4sf) __C,
4767 							     (__mmask8) __U);
4768 	}
4769 	
4770 	extern __inline __m128i
4771 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4772 	_mm_mask_and_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4773 			    __m128i __B)
4774 	{
4775 	  return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4776 							 (__v4si) __B,
4777 							 (__v4si) __W,
4778 							 (__mmask8) __U);
4779 	}
4780 	
4781 	extern __inline __m128i
4782 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4783 	_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4784 	{
4785 	  return (__m128i) __builtin_ia32_pandd128_mask ((__v4si) __A,
4786 							 (__v4si) __B,
4787 							 (__v4si)
4788 							 _mm_setzero_si128 (),
4789 							 (__mmask8) __U);
4790 	}
4791 	
4792 	extern __inline __m256i
4793 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4794 	_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4795 				  __m256i __B)
4796 	{
4797 	  return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4798 							  (__v8si) __B,
4799 							  (__v8si) __W,
4800 							  (__mmask8) __U);
4801 	}
4802 	
4803 	extern __inline __m256i
4804 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4805 	_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4806 	{
4807 	  return (__m256i) __builtin_ia32_pandnd256_mask ((__v8si) __A,
4808 							  (__v8si) __B,
4809 							  (__v8si)
4810 							  _mm256_setzero_si256 (),
4811 							  (__mmask8) __U);
4812 	}
4813 	
4814 	extern __inline __m128i
4815 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4816 	_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4817 			       __m128i __B)
4818 	{
4819 	  return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4820 							  (__v4si) __B,
4821 							  (__v4si) __W,
4822 							  (__mmask8) __U);
4823 	}
4824 	
4825 	extern __inline __m128i
4826 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4827 	_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4828 	{
4829 	  return (__m128i) __builtin_ia32_pandnd128_mask ((__v4si) __A,
4830 							  (__v4si) __B,
4831 							  (__v4si)
4832 							  _mm_setzero_si128 (),
4833 							  (__mmask8) __U);
4834 	}
4835 	
4836 	extern __inline __m256i
4837 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4838 	_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4839 			      __m256i __B)
4840 	{
4841 	  return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4842 							(__v8si) __B,
4843 							(__v8si) __W,
4844 							(__mmask8) __U);
4845 	}
4846 	
4847 	extern __inline __m256i
4848 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4849 	_mm256_maskz_or_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4850 	{
4851 	  return (__m256i) __builtin_ia32_pord256_mask ((__v8si) __A,
4852 							(__v8si) __B,
4853 							(__v8si)
4854 							_mm256_setzero_si256 (),
4855 							(__mmask8) __U);
4856 	}
4857 	
4858 	extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4859 	_mm256_or_epi32 (__m256i __A, __m256i __B)
4860 	{
4861 	  return (__m256i) ((__v8su)__A | (__v8su)__B);
4862 	}
4863 	
4864 	extern __inline __m128i
4865 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4866 	_mm_mask_or_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4867 	{
4868 	  return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4869 							(__v4si) __B,
4870 							(__v4si) __W,
4871 							(__mmask8) __U);
4872 	}
4873 	
4874 	extern __inline __m128i
4875 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4876 	_mm_maskz_or_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4877 	{
4878 	  return (__m128i) __builtin_ia32_pord128_mask ((__v4si) __A,
4879 							(__v4si) __B,
4880 							(__v4si)
4881 							_mm_setzero_si128 (),
4882 							(__mmask8) __U);
4883 	}
4884 	
4885 	extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4886 	_mm_or_epi32 (__m128i __A, __m128i __B)
4887 	{
4888 	  return (__m128i) ((__v4su)__A | (__v4su)__B);
4889 	}
4890 	
4891 	extern __inline __m256i
4892 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4893 	_mm256_mask_xor_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
4894 			       __m256i __B)
4895 	{
4896 	  return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4897 							 (__v8si) __B,
4898 							 (__v8si) __W,
4899 							 (__mmask8) __U);
4900 	}
4901 	
4902 	extern __inline __m256i
4903 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4904 	_mm256_maskz_xor_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4905 	{
4906 	  return (__m256i) __builtin_ia32_pxord256_mask ((__v8si) __A,
4907 							 (__v8si) __B,
4908 							 (__v8si)
4909 							 _mm256_setzero_si256 (),
4910 							 (__mmask8) __U);
4911 	}
4912 	
4913 	extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4914 	_mm256_xor_epi32 (__m256i __A, __m256i __B)
4915 	{
4916 	  return (__m256i) ((__v8su)__A ^ (__v8su)__B);
4917 	}
4918 	
4919 	extern __inline __m128i
4920 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4921 	_mm_mask_xor_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
4922 			    __m128i __B)
4923 	{
4924 	  return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4925 							 (__v4si) __B,
4926 							 (__v4si) __W,
4927 							 (__mmask8) __U);
4928 	}
4929 	
4930 	extern __inline __m128i
4931 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4932 	_mm_maskz_xor_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4933 	{
4934 	  return (__m128i) __builtin_ia32_pxord128_mask ((__v4si) __A,
4935 							 (__v4si) __B,
4936 							 (__v4si)
4937 							 _mm_setzero_si128 (),
4938 							 (__mmask8) __U);
4939 	}
4940 	
4941 	extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
4942 	_mm_xor_epi32 (__m128i __A, __m128i __B)
4943 	{
4944 	  return (__m128i) ((__v4su)__A ^ (__v4su)__B);
4945 	}
4946 	
4947 	extern __inline __m128
4948 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4949 	_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A)
4950 	{
4951 	  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4952 							(__v4sf) __W,
4953 							(__mmask8) __U);
4954 	}
4955 	
4956 	extern __inline __m128
4957 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4958 	_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A)
4959 	{
4960 	  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
4961 							(__v4sf)
4962 							_mm_setzero_ps (),
4963 							(__mmask8) __U);
4964 	}
4965 	
4966 	extern __inline __m128
4967 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4968 	_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A)
4969 	{
4970 	  return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4971 							   (__v4sf) __W,
4972 							   (__mmask8) __U);
4973 	}
4974 	
4975 	extern __inline __m128
4976 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4977 	_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A)
4978 	{
4979 	  return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
4980 							   (__v4sf)
4981 							   _mm_setzero_ps (),
4982 							   (__mmask8) __U);
4983 	}
4984 	
4985 	extern __inline __m256i
4986 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4987 	_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
4988 	{
4989 	  return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4990 							    (__v8si) __W,
4991 							    (__mmask8) __U);
4992 	}
4993 	
4994 	extern __inline __m256i
4995 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4996 	_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A)
4997 	{
4998 	  return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
4999 							    (__v8si)
5000 							    _mm256_setzero_si256 (),
5001 							    (__mmask8) __U);
5002 	}
5003 	
5004 	extern __inline __m128i
5005 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5006 	_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
5007 	{
5008 	  return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
5009 							    (__v4si) __W,
5010 							    (__mmask8) __U);
5011 	}
5012 	
5013 	extern __inline __m128i
5014 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5015 	_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A)
5016 	{
5017 	  return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
5018 							    (__v4si)
5019 							    _mm_setzero_si128 (),
5020 							    (__mmask8) __U);
5021 	}
5022 	
5023 	extern __inline __m256i
5024 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5025 	_mm256_cvtps_epu32 (__m256 __A)
5026 	{
5027 	  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5028 							     (__v8si)
5029 							     _mm256_setzero_si256 (),
5030 							     (__mmask8) -1);
5031 	}
5032 	
5033 	extern __inline __m256i
5034 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5035 	_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
5036 	{
5037 	  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5038 							     (__v8si) __W,
5039 							     (__mmask8) __U);
5040 	}
5041 	
5042 	extern __inline __m256i
5043 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5044 	_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A)
5045 	{
5046 	  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
5047 							     (__v8si)
5048 							     _mm256_setzero_si256 (),
5049 							     (__mmask8) __U);
5050 	}
5051 	
5052 	extern __inline __m128i
5053 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5054 	_mm_cvtps_epu32 (__m128 __A)
5055 	{
5056 	  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5057 							     (__v4si)
5058 							     _mm_setzero_si128 (),
5059 							     (__mmask8) -1);
5060 	}
5061 	
5062 	extern __inline __m128i
5063 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5064 	_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
5065 	{
5066 	  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5067 							     (__v4si) __W,
5068 							     (__mmask8) __U);
5069 	}
5070 	
5071 	extern __inline __m128i
5072 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5073 	_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A)
5074 	{
5075 	  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
5076 							     (__v4si)
5077 							     _mm_setzero_si128 (),
5078 							     (__mmask8) __U);
5079 	}
5080 	
5081 	extern __inline __m256d
5082 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5083 	_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5084 	{
5085 	  return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5086 							   (__v4df) __W,
5087 							   (__mmask8) __U);
5088 	}
5089 	
5090 	extern __inline __m256d
5091 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5092 	_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
5093 	{
5094 	  return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A,
5095 							   (__v4df)
5096 							   _mm256_setzero_pd (),
5097 							   (__mmask8) __U);
5098 	}
5099 	
5100 	extern __inline __m128d
5101 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5102 	_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5103 	{
5104 	  return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5105 							   (__v2df) __W,
5106 							   (__mmask8) __U);
5107 	}
5108 	
5109 	extern __inline __m128d
5110 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5111 	_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
5112 	{
5113 	  return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A,
5114 							   (__v2df)
5115 							   _mm_setzero_pd (),
5116 							   (__mmask8) __U);
5117 	}
5118 	
5119 	extern __inline __m256
5120 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5121 	_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5122 	{
5123 	  return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5124 							   (__v8sf) __W,
5125 							   (__mmask8) __U);
5126 	}
5127 	
5128 	extern __inline __m256
5129 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5130 	_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
5131 	{
5132 	  return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A,
5133 							   (__v8sf)
5134 							   _mm256_setzero_ps (),
5135 							   (__mmask8) __U);
5136 	}
5137 	
5138 	extern __inline __m128
5139 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5140 	_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5141 	{
5142 	  return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5143 							   (__v4sf) __W,
5144 							   (__mmask8) __U);
5145 	}
5146 	
5147 	extern __inline __m128
5148 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5149 	_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
5150 	{
5151 	  return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A,
5152 							   (__v4sf)
5153 							   _mm_setzero_ps (),
5154 							   (__mmask8) __U);
5155 	}
5156 	
5157 	extern __inline __m256
5158 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5159 	_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
5160 	{
5161 	  return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5162 							   (__v8sf) __W,
5163 							   (__mmask8) __U);
5164 	}
5165 	
5166 	extern __inline __m256
5167 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5168 	_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
5169 	{
5170 	  return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A,
5171 							   (__v8sf)
5172 							   _mm256_setzero_ps (),
5173 							   (__mmask8) __U);
5174 	}
5175 	
5176 	extern __inline __m128
5177 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5178 	_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
5179 	{
5180 	  return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5181 							   (__v4sf) __W,
5182 							   (__mmask8) __U);
5183 	}
5184 	
5185 	extern __inline __m128
5186 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5187 	_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
5188 	{
5189 	  return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A,
5190 							   (__v4sf)
5191 							   _mm_setzero_ps (),
5192 							   (__mmask8) __U);
5193 	}
5194 	
5195 	extern __inline __m128i
5196 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5197 	_mm_mask_unpackhi_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5198 				 __m128i __B)
5199 	{
5200 	  return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5201 							     (__v4si) __B,
5202 							     (__v4si) __W,
5203 							     (__mmask8) __U);
5204 	}
5205 	
5206 	extern __inline __m128i
5207 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5208 	_mm_maskz_unpackhi_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5209 	{
5210 	  return (__m128i) __builtin_ia32_punpckhdq128_mask ((__v4si) __A,
5211 							     (__v4si) __B,
5212 							     (__v4si)
5213 							     _mm_setzero_si128 (),
5214 							     (__mmask8) __U);
5215 	}
5216 	
5217 	extern __inline __m256i
5218 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5219 	_mm256_mask_unpackhi_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5220 				    __m256i __B)
5221 	{
5222 	  return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5223 							     (__v8si) __B,
5224 							     (__v8si) __W,
5225 							     (__mmask8) __U);
5226 	}
5227 	
5228 	extern __inline __m256i
5229 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5230 	_mm256_maskz_unpackhi_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5231 	{
5232 	  return (__m256i) __builtin_ia32_punpckhdq256_mask ((__v8si) __A,
5233 							     (__v8si) __B,
5234 							     (__v8si)
5235 							     _mm256_setzero_si256 (),
5236 							     (__mmask8) __U);
5237 	}
5238 	
5239 	extern __inline __m128i
5240 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5241 	_mm_mask_unpackhi_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5242 				 __m128i __B)
5243 	{
5244 	  return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5245 							      (__v2di) __B,
5246 							      (__v2di) __W,
5247 							      (__mmask8) __U);
5248 	}
5249 	
5250 	extern __inline __m128i
5251 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5252 	_mm_maskz_unpackhi_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5253 	{
5254 	  return (__m128i) __builtin_ia32_punpckhqdq128_mask ((__v2di) __A,
5255 							      (__v2di) __B,
5256 							      (__v2di)
5257 							      _mm_setzero_si128 (),
5258 							      (__mmask8) __U);
5259 	}
5260 	
5261 	extern __inline __m256i
5262 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5263 	_mm256_mask_unpackhi_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5264 				    __m256i __B)
5265 	{
5266 	  return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5267 							      (__v4di) __B,
5268 							      (__v4di) __W,
5269 							      (__mmask8) __U);
5270 	}
5271 	
5272 	extern __inline __m256i
5273 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5274 	_mm256_maskz_unpackhi_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5275 	{
5276 	  return (__m256i) __builtin_ia32_punpckhqdq256_mask ((__v4di) __A,
5277 							      (__v4di) __B,
5278 							      (__v4di)
5279 							      _mm256_setzero_si256 (),
5280 							      (__mmask8) __U);
5281 	}
5282 	
5283 	extern __inline __m128i
5284 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5285 	_mm_mask_unpacklo_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
5286 				 __m128i __B)
5287 	{
5288 	  return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5289 							     (__v4si) __B,
5290 							     (__v4si) __W,
5291 							     (__mmask8) __U);
5292 	}
5293 	
5294 	extern __inline __m128i
5295 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5296 	_mm_maskz_unpacklo_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
5297 	{
5298 	  return (__m128i) __builtin_ia32_punpckldq128_mask ((__v4si) __A,
5299 							     (__v4si) __B,
5300 							     (__v4si)
5301 							     _mm_setzero_si128 (),
5302 							     (__mmask8) __U);
5303 	}
5304 	
5305 	extern __inline __m256i
5306 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5307 	_mm256_mask_unpacklo_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
5308 				    __m256i __B)
5309 	{
5310 	  return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5311 							     (__v8si) __B,
5312 							     (__v8si) __W,
5313 							     (__mmask8) __U);
5314 	}
5315 	
5316 	extern __inline __m256i
5317 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5318 	_mm256_maskz_unpacklo_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
5319 	{
5320 	  return (__m256i) __builtin_ia32_punpckldq256_mask ((__v8si) __A,
5321 							     (__v8si) __B,
5322 							     (__v8si)
5323 							     _mm256_setzero_si256 (),
5324 							     (__mmask8) __U);
5325 	}
5326 	
5327 	extern __inline __m128i
5328 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5329 	_mm_mask_unpacklo_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
5330 				 __m128i __B)
5331 	{
5332 	  return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5333 							      (__v2di) __B,
5334 							      (__v2di) __W,
5335 							      (__mmask8) __U);
5336 	}
5337 	
5338 	extern __inline __m128i
5339 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5340 	_mm_maskz_unpacklo_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
5341 	{
5342 	  return (__m128i) __builtin_ia32_punpcklqdq128_mask ((__v2di) __A,
5343 							      (__v2di) __B,
5344 							      (__v2di)
5345 							      _mm_setzero_si128 (),
5346 							      (__mmask8) __U);
5347 	}
5348 	
5349 	extern __inline __m256i
5350 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5351 	_mm256_mask_unpacklo_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
5352 				    __m256i __B)
5353 	{
5354 	  return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5355 							      (__v4di) __B,
5356 							      (__v4di) __W,
5357 							      (__mmask8) __U);
5358 	}
5359 	
5360 	extern __inline __m256i
5361 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5362 	_mm256_maskz_unpacklo_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
5363 	{
5364 	  return (__m256i) __builtin_ia32_punpcklqdq256_mask ((__v4di) __A,
5365 							      (__v4di) __B,
5366 							      (__v4di)
5367 							      _mm256_setzero_si256 (),
5368 							      (__mmask8) __U);
5369 	}
5370 	
5371 	extern __inline __mmask8
5372 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5373 	_mm_cmpeq_epu32_mask (__m128i __A, __m128i __B)
5374 	{
5375 	  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5376 							   (__v4si) __B, 0,
5377 							   (__mmask8) -1);
5378 	}
5379 	
5380 	extern __inline __mmask8
5381 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5382 	_mm_cmpeq_epi32_mask (__m128i __A, __m128i __B)
5383 	{
5384 	  return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5385 							    (__v4si) __B,
5386 							    (__mmask8) -1);
5387 	}
5388 	
5389 	extern __inline __mmask8
5390 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5391 	_mm_mask_cmpeq_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5392 	{
5393 	  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5394 							   (__v4si) __B, 0, __U);
5395 	}
5396 	
5397 	extern __inline __mmask8
5398 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5399 	_mm_mask_cmpeq_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5400 	{
5401 	  return (__mmask8) __builtin_ia32_pcmpeqd128_mask ((__v4si) __A,
5402 							    (__v4si) __B, __U);
5403 	}
5404 	
5405 	extern __inline __mmask8
5406 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5407 	_mm256_cmpeq_epu32_mask (__m256i __A, __m256i __B)
5408 	{
5409 	  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5410 							   (__v8si) __B, 0,
5411 							   (__mmask8) -1);
5412 	}
5413 	
5414 	extern __inline __mmask8
5415 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5416 	_mm256_cmpeq_epi32_mask (__m256i __A, __m256i __B)
5417 	{
5418 	  return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5419 							    (__v8si) __B,
5420 							    (__mmask8) -1);
5421 	}
5422 	
5423 	extern __inline __mmask8
5424 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5425 	_mm256_mask_cmpeq_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5426 	{
5427 	  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5428 							   (__v8si) __B, 0, __U);
5429 	}
5430 	
5431 	extern __inline __mmask8
5432 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5433 	_mm256_mask_cmpeq_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5434 	{
5435 	  return (__mmask8) __builtin_ia32_pcmpeqd256_mask ((__v8si) __A,
5436 							    (__v8si) __B, __U);
5437 	}
5438 	
5439 	extern __inline __mmask8
5440 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5441 	_mm_cmpeq_epu64_mask (__m128i __A, __m128i __B)
5442 	{
5443 	  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5444 							   (__v2di) __B, 0,
5445 							   (__mmask8) -1);
5446 	}
5447 	
5448 	extern __inline __mmask8
5449 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5450 	_mm_cmpeq_epi64_mask (__m128i __A, __m128i __B)
5451 	{
5452 	  return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5453 							    (__v2di) __B,
5454 							    (__mmask8) -1);
5455 	}
5456 	
5457 	extern __inline __mmask8
5458 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5459 	_mm_mask_cmpeq_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5460 	{
5461 	  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5462 							   (__v2di) __B, 0, __U);
5463 	}
5464 	
5465 	extern __inline __mmask8
5466 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5467 	_mm_mask_cmpeq_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5468 	{
5469 	  return (__mmask8) __builtin_ia32_pcmpeqq128_mask ((__v2di) __A,
5470 							    (__v2di) __B, __U);
5471 	}
5472 	
5473 	extern __inline __mmask8
5474 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5475 	_mm256_cmpeq_epu64_mask (__m256i __A, __m256i __B)
5476 	{
5477 	  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5478 							   (__v4di) __B, 0,
5479 							   (__mmask8) -1);
5480 	}
5481 	
5482 	extern __inline __mmask8
5483 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5484 	_mm256_cmpeq_epi64_mask (__m256i __A, __m256i __B)
5485 	{
5486 	  return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5487 							    (__v4di) __B,
5488 							    (__mmask8) -1);
5489 	}
5490 	
5491 	extern __inline __mmask8
5492 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5493 	_mm256_mask_cmpeq_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5494 	{
5495 	  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5496 							   (__v4di) __B, 0, __U);
5497 	}
5498 	
5499 	extern __inline __mmask8
5500 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5501 	_mm256_mask_cmpeq_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5502 	{
5503 	  return (__mmask8) __builtin_ia32_pcmpeqq256_mask ((__v4di) __A,
5504 							    (__v4di) __B, __U);
5505 	}
5506 	
5507 	extern __inline __mmask8
5508 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5509 	_mm_cmpgt_epu32_mask (__m128i __A, __m128i __B)
5510 	{
5511 	  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5512 							   (__v4si) __B, 6,
5513 							   (__mmask8) -1);
5514 	}
5515 	
5516 	extern __inline __mmask8
5517 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5518 	_mm_cmpgt_epi32_mask (__m128i __A, __m128i __B)
5519 	{
5520 	  return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5521 							    (__v4si) __B,
5522 							    (__mmask8) -1);
5523 	}
5524 	
5525 	extern __inline __mmask8
5526 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5527 	_mm_mask_cmpgt_epu32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5528 	{
5529 	  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __A,
5530 							   (__v4si) __B, 6, __U);
5531 	}
5532 	
5533 	extern __inline __mmask8
5534 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5535 	_mm_mask_cmpgt_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5536 	{
5537 	  return (__mmask8) __builtin_ia32_pcmpgtd128_mask ((__v4si) __A,
5538 							    (__v4si) __B, __U);
5539 	}
5540 	
5541 	extern __inline __mmask8
5542 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5543 	_mm256_cmpgt_epu32_mask (__m256i __A, __m256i __B)
5544 	{
5545 	  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5546 							   (__v8si) __B, 6,
5547 							   (__mmask8) -1);
5548 	}
5549 	
5550 	extern __inline __mmask8
5551 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5552 	_mm256_cmpgt_epi32_mask (__m256i __A, __m256i __B)
5553 	{
5554 	  return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5555 							    (__v8si) __B,
5556 							    (__mmask8) -1);
5557 	}
5558 	
5559 	extern __inline __mmask8
5560 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5561 	_mm256_mask_cmpgt_epu32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5562 	{
5563 	  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __A,
5564 							   (__v8si) __B, 6, __U);
5565 	}
5566 	
5567 	extern __inline __mmask8
5568 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5569 	_mm256_mask_cmpgt_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5570 	{
5571 	  return (__mmask8) __builtin_ia32_pcmpgtd256_mask ((__v8si) __A,
5572 							    (__v8si) __B, __U);
5573 	}
5574 	
5575 	extern __inline __mmask8
5576 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5577 	_mm_cmpgt_epu64_mask (__m128i __A, __m128i __B)
5578 	{
5579 	  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5580 							   (__v2di) __B, 6,
5581 							   (__mmask8) -1);
5582 	}
5583 	
5584 	extern __inline __mmask8
5585 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5586 	_mm_cmpgt_epi64_mask (__m128i __A, __m128i __B)
5587 	{
5588 	  return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5589 							    (__v2di) __B,
5590 							    (__mmask8) -1);
5591 	}
5592 	
5593 	extern __inline __mmask8
5594 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5595 	_mm_mask_cmpgt_epu64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5596 	{
5597 	  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __A,
5598 							   (__v2di) __B, 6, __U);
5599 	}
5600 	
5601 	extern __inline __mmask8
5602 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5603 	_mm_mask_cmpgt_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5604 	{
5605 	  return (__mmask8) __builtin_ia32_pcmpgtq128_mask ((__v2di) __A,
5606 							    (__v2di) __B, __U);
5607 	}
5608 	
5609 	extern __inline __mmask8
5610 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5611 	_mm256_cmpgt_epu64_mask (__m256i __A, __m256i __B)
5612 	{
5613 	  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5614 							   (__v4di) __B, 6,
5615 							   (__mmask8) -1);
5616 	}
5617 	
5618 	extern __inline __mmask8
5619 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5620 	_mm256_cmpgt_epi64_mask (__m256i __A, __m256i __B)
5621 	{
5622 	  return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5623 							    (__v4di) __B,
5624 							    (__mmask8) -1);
5625 	}
5626 	
5627 	extern __inline __mmask8
5628 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5629 	_mm256_mask_cmpgt_epu64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5630 	{
5631 	  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __A,
5632 							   (__v4di) __B, 6, __U);
5633 	}
5634 	
5635 	extern __inline __mmask8
5636 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5637 	_mm256_mask_cmpgt_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5638 	{
5639 	  return (__mmask8) __builtin_ia32_pcmpgtq256_mask ((__v4di) __A,
5640 							    (__v4di) __B, __U);
5641 	}
5642 	
5643 	extern __inline __mmask8
5644 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5645 	_mm_test_epi32_mask (__m128i __A, __m128i __B)
5646 	{
5647 	  return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5648 						       (__v4si) __B,
5649 						       (__mmask8) -1);
5650 	}
5651 	
5652 	extern __inline __mmask8
5653 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5654 	_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5655 	{
5656 	  return (__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
5657 						       (__v4si) __B, __U);
5658 	}
5659 	
5660 	extern __inline __mmask8
5661 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5662 	_mm256_test_epi32_mask (__m256i __A, __m256i __B)
5663 	{
5664 	  return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5665 						       (__v8si) __B,
5666 						       (__mmask8) -1);
5667 	}
5668 	
5669 	extern __inline __mmask8
5670 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5671 	_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5672 	{
5673 	  return (__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
5674 						       (__v8si) __B, __U);
5675 	}
5676 	
5677 	extern __inline __mmask8
5678 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5679 	_mm_test_epi64_mask (__m128i __A, __m128i __B)
5680 	{
5681 	  return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5682 						       (__v2di) __B,
5683 						       (__mmask8) -1);
5684 	}
5685 	
5686 	extern __inline __mmask8
5687 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5688 	_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5689 	{
5690 	  return (__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
5691 						       (__v2di) __B, __U);
5692 	}
5693 	
5694 	extern __inline __mmask8
5695 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5696 	_mm256_test_epi64_mask (__m256i __A, __m256i __B)
5697 	{
5698 	  return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5699 						       (__v4di) __B,
5700 						       (__mmask8) -1);
5701 	}
5702 	
5703 	extern __inline __mmask8
5704 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5705 	_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5706 	{
5707 	  return (__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
5708 						       (__v4di) __B, __U);
5709 	}
5710 	
5711 	extern __inline __mmask8
5712 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5713 	_mm_testn_epi32_mask (__m128i __A, __m128i __B)
5714 	{
5715 	  return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5716 							(__v4si) __B,
5717 							(__mmask8) -1);
5718 	}
5719 	
5720 	extern __inline __mmask8
5721 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5722 	_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
5723 	{
5724 	  return (__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
5725 							(__v4si) __B, __U);
5726 	}
5727 	
5728 	extern __inline __mmask8
5729 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5730 	_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
5731 	{
5732 	  return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5733 							(__v8si) __B,
5734 							(__mmask8) -1);
5735 	}
5736 	
5737 	extern __inline __mmask8
5738 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5739 	_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
5740 	{
5741 	  return (__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
5742 							(__v8si) __B, __U);
5743 	}
5744 	
5745 	extern __inline __mmask8
5746 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5747 	_mm_testn_epi64_mask (__m128i __A, __m128i __B)
5748 	{
5749 	  return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5750 							(__v2di) __B,
5751 							(__mmask8) -1);
5752 	}
5753 	
5754 	extern __inline __mmask8
5755 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5756 	_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
5757 	{
5758 	  return (__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
5759 							(__v2di) __B, __U);
5760 	}
5761 	
5762 	extern __inline __mmask8
5763 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5764 	_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
5765 	{
5766 	  return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5767 							(__v4di) __B,
5768 							(__mmask8) -1);
5769 	}
5770 	
5771 	extern __inline __mmask8
5772 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5773 	_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
5774 	{
5775 	  return (__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
5776 							(__v4di) __B, __U);
5777 	}
5778 	
5779 	extern __inline __m256d
5780 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5781 	_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A)
5782 	{
5783 	  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5784 							      (__v4df) __W,
5785 							      (__mmask8) __U);
5786 	}
5787 	
5788 	extern __inline __m256d
5789 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5790 	_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A)
5791 	{
5792 	  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
5793 							      (__v4df)
5794 							      _mm256_setzero_pd (),
5795 							      (__mmask8) __U);
5796 	}
5797 	
5798 	extern __inline void
5799 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5800 	_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A)
5801 	{
5802 	  __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
5803 						  (__v4df) __A,
5804 						  (__mmask8) __U);
5805 	}
5806 	
5807 	extern __inline __m128d
5808 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5809 	_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A)
5810 	{
5811 	  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5812 							      (__v2df) __W,
5813 							      (__mmask8) __U);
5814 	}
5815 	
5816 	extern __inline __m128d
5817 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5818 	_mm_maskz_compress_pd (__mmask8 __U, __m128d __A)
5819 	{
5820 	  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
5821 							      (__v2df)
5822 							      _mm_setzero_pd (),
5823 							      (__mmask8) __U);
5824 	}
5825 	
5826 	extern __inline void
5827 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5828 	_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A)
5829 	{
5830 	  __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
5831 						  (__v2df) __A,
5832 						  (__mmask8) __U);
5833 	}
5834 	
5835 	extern __inline __m256
5836 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5837 	_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A)
5838 	{
5839 	  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5840 							     (__v8sf) __W,
5841 							     (__mmask8) __U);
5842 	}
5843 	
5844 	extern __inline __m256
5845 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5846 	_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A)
5847 	{
5848 	  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
5849 							     (__v8sf)
5850 							     _mm256_setzero_ps (),
5851 							     (__mmask8) __U);
5852 	}
5853 	
5854 	extern __inline void
5855 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5856 	_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A)
5857 	{
5858 	  __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
5859 						  (__v8sf) __A,
5860 						  (__mmask8) __U);
5861 	}
5862 	
5863 	extern __inline __m128
5864 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5865 	_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A)
5866 	{
5867 	  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5868 							     (__v4sf) __W,
5869 							     (__mmask8) __U);
5870 	}
5871 	
5872 	extern __inline __m128
5873 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5874 	_mm_maskz_compress_ps (__mmask8 __U, __m128 __A)
5875 	{
5876 	  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
5877 							     (__v4sf)
5878 							     _mm_setzero_ps (),
5879 							     (__mmask8) __U);
5880 	}
5881 	
5882 	extern __inline void
5883 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5884 	_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A)
5885 	{
5886 	  __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
5887 						  (__v4sf) __A,
5888 						  (__mmask8) __U);
5889 	}
5890 	
5891 	extern __inline __m256i
5892 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5893 	_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5894 	{
5895 	  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5896 							      (__v4di) __W,
5897 							      (__mmask8) __U);
5898 	}
5899 	
5900 	extern __inline __m256i
5901 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5902 	_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A)
5903 	{
5904 	  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
5905 							      (__v4di)
5906 							      _mm256_setzero_si256 (),
5907 							      (__mmask8) __U);
5908 	}
5909 	
5910 	extern __inline void
5911 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5912 	_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5913 	{
5914 	  __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
5915 						  (__v4di) __A,
5916 						  (__mmask8) __U);
5917 	}
5918 	
5919 	extern __inline __m128i
5920 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5921 	_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5922 	{
5923 	  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5924 							      (__v2di) __W,
5925 							      (__mmask8) __U);
5926 	}
5927 	
5928 	extern __inline __m128i
5929 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5930 	_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A)
5931 	{
5932 	  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
5933 							      (__v2di)
5934 							      _mm_setzero_si128 (),
5935 							      (__mmask8) __U);
5936 	}
5937 	
5938 	extern __inline void
5939 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5940 	_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5941 	{
5942 	  __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
5943 						  (__v2di) __A,
5944 						  (__mmask8) __U);
5945 	}
5946 	
5947 	extern __inline __m256i
5948 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5949 	_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5950 	{
5951 	  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5952 							      (__v8si) __W,
5953 							      (__mmask8) __U);
5954 	}
5955 	
5956 	extern __inline __m256i
5957 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5958 	_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A)
5959 	{
5960 	  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
5961 							      (__v8si)
5962 							      _mm256_setzero_si256 (),
5963 							      (__mmask8) __U);
5964 	}
5965 	
5966 	extern __inline void
5967 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5968 	_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5969 	{
5970 	  __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
5971 						  (__v8si) __A,
5972 						  (__mmask8) __U);
5973 	}
5974 	
5975 	extern __inline __m128i
5976 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5977 	_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5978 	{
5979 	  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5980 							      (__v4si) __W,
5981 							      (__mmask8) __U);
5982 	}
5983 	
5984 	extern __inline __m128i
5985 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5986 	_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A)
5987 	{
5988 	  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
5989 							      (__v4si)
5990 							      _mm_setzero_si128 (),
5991 							      (__mmask8) __U);
5992 	}
5993 	
5994 	extern __inline void
5995 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5996 	_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5997 	{
5998 	  __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
5999 						  (__v4si) __A,
6000 						  (__mmask8) __U);
6001 	}
6002 	
6003 	extern __inline __m256d
6004 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6005 	_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A)
6006 	{
6007 	  return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
6008 							    (__v4df) __W,
6009 							    (__mmask8) __U);
6010 	}
6011 	
6012 	extern __inline __m256d
6013 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6014 	_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A)
6015 	{
6016 	  return (__m256d) __builtin_ia32_expanddf256_maskz ((__v4df) __A,
6017 							     (__v4df)
6018 							     _mm256_setzero_pd (),
6019 							     (__mmask8) __U);
6020 	}
6021 	
6022 	extern __inline __m256d
6023 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6024 	_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P)
6025 	{
6026 	  return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
6027 								(__v4df) __W,
6028 								(__mmask8)
6029 								__U);
6030 	}
6031 	
6032 	extern __inline __m256d
6033 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6034 	_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6035 	{
6036 	  return (__m256d) __builtin_ia32_expandloaddf256_maskz ((__v4df *) __P,
6037 								 (__v4df)
6038 								 _mm256_setzero_pd (),
6039 								 (__mmask8)
6040 								 __U);
6041 	}
6042 	
6043 	extern __inline __m128d
6044 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6045 	_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A)
6046 	{
6047 	  return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
6048 							    (__v2df) __W,
6049 							    (__mmask8) __U);
6050 	}
6051 	
6052 	extern __inline __m128d
6053 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6054 	_mm_maskz_expand_pd (__mmask8 __U, __m128d __A)
6055 	{
6056 	  return (__m128d) __builtin_ia32_expanddf128_maskz ((__v2df) __A,
6057 							     (__v2df)
6058 							     _mm_setzero_pd (),
6059 							     (__mmask8) __U);
6060 	}
6061 	
6062 	extern __inline __m128d
6063 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6064 	_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P)
6065 	{
6066 	  return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
6067 								(__v2df) __W,
6068 								(__mmask8)
6069 								__U);
6070 	}
6071 	
6072 	extern __inline __m128d
6073 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6074 	_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
6075 	{
6076 	  return (__m128d) __builtin_ia32_expandloaddf128_maskz ((__v2df *) __P,
6077 								 (__v2df)
6078 								 _mm_setzero_pd (),
6079 								 (__mmask8)
6080 								 __U);
6081 	}
6082 	
6083 	extern __inline __m256
6084 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6085 	_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A)
6086 	{
6087 	  return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
6088 							   (__v8sf) __W,
6089 							   (__mmask8) __U);
6090 	}
6091 	
6092 	extern __inline __m256
6093 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6094 	_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A)
6095 	{
6096 	  return (__m256) __builtin_ia32_expandsf256_maskz ((__v8sf) __A,
6097 							    (__v8sf)
6098 							    _mm256_setzero_ps (),
6099 							    (__mmask8) __U);
6100 	}
6101 	
6102 	extern __inline __m256
6103 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6104 	_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P)
6105 	{
6106 	  return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
6107 							       (__v8sf) __W,
6108 							       (__mmask8) __U);
6109 	}
6110 	
6111 	extern __inline __m256
6112 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6113 	_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6114 	{
6115 	  return (__m256) __builtin_ia32_expandloadsf256_maskz ((__v8sf *) __P,
6116 								(__v8sf)
6117 								_mm256_setzero_ps (),
6118 								(__mmask8)
6119 								__U);
6120 	}
6121 	
6122 	extern __inline __m128
6123 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6124 	_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A)
6125 	{
6126 	  return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
6127 							   (__v4sf) __W,
6128 							   (__mmask8) __U);
6129 	}
6130 	
6131 	extern __inline __m128
6132 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6133 	_mm_maskz_expand_ps (__mmask8 __U, __m128 __A)
6134 	{
6135 	  return (__m128) __builtin_ia32_expandsf128_maskz ((__v4sf) __A,
6136 							    (__v4sf)
6137 							    _mm_setzero_ps (),
6138 							    (__mmask8) __U);
6139 	}
6140 	
6141 	extern __inline __m128
6142 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6143 	_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P)
6144 	{
6145 	  return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
6146 							       (__v4sf) __W,
6147 							       (__mmask8) __U);
6148 	}
6149 	
6150 	extern __inline __m128
6151 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6152 	_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P)
6153 	{
6154 	  return (__m128) __builtin_ia32_expandloadsf128_maskz ((__v4sf *) __P,
6155 								(__v4sf)
6156 								_mm_setzero_ps (),
6157 								(__mmask8)
6158 								__U);
6159 	}
6160 	
6161 	extern __inline __m256i
6162 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6163 	_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
6164 	{
6165 	  return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
6166 							    (__v4di) __W,
6167 							    (__mmask8) __U);
6168 	}
6169 	
6170 	extern __inline __m256i
6171 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6172 	_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A)
6173 	{
6174 	  return (__m256i) __builtin_ia32_expanddi256_maskz ((__v4di) __A,
6175 							     (__v4di)
6176 							     _mm256_setzero_si256 (),
6177 							     (__mmask8) __U);
6178 	}
6179 	
6180 	extern __inline __m256i
6181 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6182 	_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
6183 				       void const *__P)
6184 	{
6185 	  return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
6186 								(__v4di) __W,
6187 								(__mmask8)
6188 								__U);
6189 	}
6190 	
6191 	extern __inline __m256i
6192 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6193 	_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6194 	{
6195 	  return (__m256i) __builtin_ia32_expandloaddi256_maskz ((__v4di *) __P,
6196 								 (__v4di)
6197 								 _mm256_setzero_si256 (),
6198 								 (__mmask8)
6199 								 __U);
6200 	}
6201 	
6202 	extern __inline __m128i
6203 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6204 	_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
6205 	{
6206 	  return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
6207 							    (__v2di) __W,
6208 							    (__mmask8) __U);
6209 	}
6210 	
6211 	extern __inline __m128i
6212 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6213 	_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A)
6214 	{
6215 	  return (__m128i) __builtin_ia32_expanddi128_maskz ((__v2di) __A,
6216 							     (__v2di)
6217 							     _mm_setzero_si128 (),
6218 							     (__mmask8) __U);
6219 	}
6220 	
6221 	extern __inline __m128i
6222 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6223 	_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
6224 	{
6225 	  return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
6226 								(__v2di) __W,
6227 								(__mmask8)
6228 								__U);
6229 	}
6230 	
6231 	extern __inline __m128i
6232 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6233 	_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
6234 	{
6235 	  return (__m128i) __builtin_ia32_expandloaddi128_maskz ((__v2di *) __P,
6236 								 (__v2di)
6237 								 _mm_setzero_si128 (),
6238 								 (__mmask8)
6239 								 __U);
6240 	}
6241 	
6242 	extern __inline __m256i
6243 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6244 	_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
6245 	{
6246 	  return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
6247 							    (__v8si) __W,
6248 							    (__mmask8) __U);
6249 	}
6250 	
6251 	extern __inline __m256i
6252 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6253 	_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A)
6254 	{
6255 	  return (__m256i) __builtin_ia32_expandsi256_maskz ((__v8si) __A,
6256 							     (__v8si)
6257 							     _mm256_setzero_si256 (),
6258 							     (__mmask8) __U);
6259 	}
6260 	
6261 	extern __inline __m256i
6262 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6263 	_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
6264 				       void const *__P)
6265 	{
6266 	  return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
6267 								(__v8si) __W,
6268 								(__mmask8)
6269 								__U);
6270 	}
6271 	
6272 	extern __inline __m256i
6273 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6274 	_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6275 	{
6276 	  return (__m256i) __builtin_ia32_expandloadsi256_maskz ((__v8si *) __P,
6277 								 (__v8si)
6278 								 _mm256_setzero_si256 (),
6279 								 (__mmask8)
6280 								 __U);
6281 	}
6282 	
6283 	extern __inline __m128i
6284 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6285 	_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
6286 	{
6287 	  return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
6288 							    (__v4si) __W,
6289 							    (__mmask8) __U);
6290 	}
6291 	
6292 	extern __inline __m128i
6293 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6294 	_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A)
6295 	{
6296 	  return (__m128i) __builtin_ia32_expandsi128_maskz ((__v4si) __A,
6297 							     (__v4si)
6298 							     _mm_setzero_si128 (),
6299 							     (__mmask8) __U);
6300 	}
6301 	
6302 	extern __inline __m128i
6303 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6304 	_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
6305 	{
6306 	  return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
6307 								(__v4si) __W,
6308 								(__mmask8)
6309 								__U);
6310 	}
6311 	
6312 	extern __inline __m128i
6313 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6314 	_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P)
6315 	{
6316 	  return (__m128i) __builtin_ia32_expandloadsi128_maskz ((__v4si *) __P,
6317 								 (__v4si)
6318 								 _mm_setzero_si128 (),
6319 								 (__mmask8)
6320 								 __U);
6321 	}
6322 	
6323 	extern __inline __m256d
6324 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6325 	_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B)
6326 	{
6327 	  return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6328 								/* idx */ ,
6329 								(__v4df) __A,
6330 								(__v4df) __B,
6331 								(__mmask8) -1);
6332 	}
6333 	
6334 	extern __inline __m256d
6335 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6336 	_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
6337 				     __m256d __B)
6338 	{
6339 	  return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
6340 								/* idx */ ,
6341 								(__v4df) __A,
6342 								(__v4df) __B,
6343 								(__mmask8)
6344 								__U);
6345 	}
6346 	
6347 	extern __inline __m256d
6348 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6349 	_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
6350 				      __m256d __B)
6351 	{
6352 	  return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
6353 								(__v4di) __I
6354 								/* idx */ ,
6355 								(__v4df) __B,
6356 								(__mmask8)
6357 								__U);
6358 	}
6359 	
6360 	extern __inline __m256d
6361 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6362 	_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
6363 				      __m256d __B)
6364 	{
6365 	  return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
6366 								 /* idx */ ,
6367 								 (__v4df) __A,
6368 								 (__v4df) __B,
6369 								 (__mmask8)
6370 								 __U);
6371 	}
6372 	
6373 	extern __inline __m256
6374 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6375 	_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B)
6376 	{
6377 	  return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6378 							       /* idx */ ,
6379 							       (__v8sf) __A,
6380 							       (__v8sf) __B,
6381 							       (__mmask8) -1);
6382 	}
6383 	
6384 	extern __inline __m256
6385 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6386 	_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
6387 				     __m256 __B)
6388 	{
6389 	  return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
6390 							       /* idx */ ,
6391 							       (__v8sf) __A,
6392 							       (__v8sf) __B,
6393 							       (__mmask8) __U);
6394 	}
6395 	
6396 	extern __inline __m256
6397 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6398 	_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
6399 				      __m256 __B)
6400 	{
6401 	  return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
6402 							       (__v8si) __I
6403 							       /* idx */ ,
6404 							       (__v8sf) __B,
6405 							       (__mmask8) __U);
6406 	}
6407 	
6408 	extern __inline __m256
6409 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6410 	_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
6411 				      __m256 __B)
6412 	{
6413 	  return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
6414 								/* idx */ ,
6415 								(__v8sf) __A,
6416 								(__v8sf) __B,
6417 								(__mmask8)
6418 								__U);
6419 	}
6420 	
6421 	extern __inline __m128i
6422 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6423 	_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B)
6424 	{
6425 	  return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6426 							       /* idx */ ,
6427 							       (__v2di) __A,
6428 							       (__v2di) __B,
6429 							       (__mmask8) -1);
6430 	}
6431 	
6432 	extern __inline __m128i
6433 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6434 	_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
6435 				     __m128i __B)
6436 	{
6437 	  return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
6438 							       /* idx */ ,
6439 							       (__v2di) __A,
6440 							       (__v2di) __B,
6441 							       (__mmask8) __U);
6442 	}
6443 	
6444 	extern __inline __m128i
6445 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6446 	_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
6447 				      __m128i __B)
6448 	{
6449 	  return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
6450 							       (__v2di) __I
6451 							       /* idx */ ,
6452 							       (__v2di) __B,
6453 							       (__mmask8) __U);
6454 	}
6455 	
6456 	extern __inline __m128i
6457 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6458 	_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
6459 				      __m128i __B)
6460 	{
6461 	  return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
6462 								/* idx */ ,
6463 								(__v2di) __A,
6464 								(__v2di) __B,
6465 								(__mmask8)
6466 								__U);
6467 	}
6468 	
6469 	extern __inline __m128i
6470 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6471 	_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B)
6472 	{
6473 	  return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6474 							       /* idx */ ,
6475 							       (__v4si) __A,
6476 							       (__v4si) __B,
6477 							       (__mmask8) -1);
6478 	}
6479 	
6480 	extern __inline __m128i
6481 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6482 	_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
6483 				     __m128i __B)
6484 	{
6485 	  return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
6486 							       /* idx */ ,
6487 							       (__v4si) __A,
6488 							       (__v4si) __B,
6489 							       (__mmask8) __U);
6490 	}
6491 	
6492 	extern __inline __m128i
6493 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6494 	_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
6495 				      __m128i __B)
6496 	{
6497 	  return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
6498 							       (__v4si) __I
6499 							       /* idx */ ,
6500 							       (__v4si) __B,
6501 							       (__mmask8) __U);
6502 	}
6503 	
6504 	extern __inline __m128i
6505 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6506 	_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
6507 				      __m128i __B)
6508 	{
6509 	  return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
6510 								/* idx */ ,
6511 								(__v4si) __A,
6512 								(__v4si) __B,
6513 								(__mmask8)
6514 								__U);
6515 	}
6516 	
6517 	extern __inline __m256i
6518 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6519 	_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B)
6520 	{
6521 	  return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6522 							       /* idx */ ,
6523 							       (__v4di) __A,
6524 							       (__v4di) __B,
6525 							       (__mmask8) -1);
6526 	}
6527 	
6528 	extern __inline __m256i
6529 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6530 	_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
6531 					__m256i __B)
6532 	{
6533 	  return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
6534 							       /* idx */ ,
6535 							       (__v4di) __A,
6536 							       (__v4di) __B,
6537 							       (__mmask8) __U);
6538 	}
6539 	
6540 	extern __inline __m256i
6541 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6542 	_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
6543 					 __mmask8 __U, __m256i __B)
6544 	{
6545 	  return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
6546 							       (__v4di) __I
6547 							       /* idx */ ,
6548 							       (__v4di) __B,
6549 							       (__mmask8) __U);
6550 	}
6551 	
6552 	extern __inline __m256i
6553 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6554 	_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
6555 					 __m256i __I, __m256i __B)
6556 	{
6557 	  return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
6558 								/* idx */ ,
6559 								(__v4di) __A,
6560 								(__v4di) __B,
6561 								(__mmask8)
6562 								__U);
6563 	}
6564 	
6565 	extern __inline __m256i
6566 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6567 	_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B)
6568 	{
6569 	  return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6570 							       /* idx */ ,
6571 							       (__v8si) __A,
6572 							       (__v8si) __B,
6573 							       (__mmask8) -1);
6574 	}
6575 	
6576 	extern __inline __m256i
6577 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6578 	_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
6579 					__m256i __B)
6580 	{
6581 	  return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
6582 							       /* idx */ ,
6583 							       (__v8si) __A,
6584 							       (__v8si) __B,
6585 							       (__mmask8) __U);
6586 	}
6587 	
6588 	extern __inline __m256i
6589 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6590 	_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
6591 					 __mmask8 __U, __m256i __B)
6592 	{
6593 	  return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
6594 							       (__v8si) __I
6595 							       /* idx */ ,
6596 							       (__v8si) __B,
6597 							       (__mmask8) __U);
6598 	}
6599 	
6600 	extern __inline __m256i
6601 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6602 	_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
6603 					 __m256i __I, __m256i __B)
6604 	{
6605 	  return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
6606 								/* idx */ ,
6607 								(__v8si) __A,
6608 								(__v8si) __B,
6609 								(__mmask8)
6610 								__U);
6611 	}
6612 	
6613 	extern __inline __m128d
6614 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6615 	_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B)
6616 	{
6617 	  return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6618 								/* idx */ ,
6619 								(__v2df) __A,
6620 								(__v2df) __B,
6621 								(__mmask8) -1);
6622 	}
6623 	
6624 	extern __inline __m128d
6625 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6626 	_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
6627 				  __m128d __B)
6628 	{
6629 	  return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
6630 								/* idx */ ,
6631 								(__v2df) __A,
6632 								(__v2df) __B,
6633 								(__mmask8)
6634 								__U);
6635 	}
6636 	
6637 	extern __inline __m128d
6638 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6639 	_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
6640 				   __m128d __B)
6641 	{
6642 	  return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
6643 								(__v2di) __I
6644 								/* idx */ ,
6645 								(__v2df) __B,
6646 								(__mmask8)
6647 								__U);
6648 	}
6649 	
6650 	extern __inline __m128d
6651 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6652 	_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
6653 				   __m128d __B)
6654 	{
6655 	  return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
6656 								 /* idx */ ,
6657 								 (__v2df) __A,
6658 								 (__v2df) __B,
6659 								 (__mmask8)
6660 								 __U);
6661 	}
6662 	
6663 	extern __inline __m128
6664 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6665 	_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B)
6666 	{
6667 	  return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6668 							       /* idx */ ,
6669 							       (__v4sf) __A,
6670 							       (__v4sf) __B,
6671 							       (__mmask8) -1);
6672 	}
6673 	
6674 	extern __inline __m128
6675 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6676 	_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
6677 				  __m128 __B)
6678 	{
6679 	  return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
6680 							       /* idx */ ,
6681 							       (__v4sf) __A,
6682 							       (__v4sf) __B,
6683 							       (__mmask8) __U);
6684 	}
6685 	
6686 	extern __inline __m128
6687 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6688 	_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
6689 				   __m128 __B)
6690 	{
6691 	  return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
6692 							       (__v4si) __I
6693 							       /* idx */ ,
6694 							       (__v4sf) __B,
6695 							       (__mmask8) __U);
6696 	}
6697 	
6698 	extern __inline __m128
6699 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6700 	_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
6701 				   __m128 __B)
6702 	{
6703 	  return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
6704 								/* idx */ ,
6705 								(__v4sf) __A,
6706 								(__v4sf) __B,
6707 								(__mmask8)
6708 								__U);
6709 	}
6710 	
6711 	extern __inline __m128i
6712 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6713 	_mm_srav_epi64 (__m128i __X, __m128i __Y)
6714 	{
6715 	  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6716 							  (__v2di) __Y,
6717 							  (__v2di)
6718 							  _mm_setzero_si128 (),
6719 							  (__mmask8) -1);
6720 	}
6721 	
6722 	extern __inline __m128i
6723 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6724 	_mm_mask_srav_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6725 			     __m128i __Y)
6726 	{
6727 	  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6728 							  (__v2di) __Y,
6729 							  (__v2di) __W,
6730 							  (__mmask8) __U);
6731 	}
6732 	
6733 	extern __inline __m128i
6734 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6735 	_mm_maskz_srav_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6736 	{
6737 	  return (__m128i) __builtin_ia32_psravq128_mask ((__v2di) __X,
6738 							  (__v2di) __Y,
6739 							  (__v2di)
6740 							  _mm_setzero_si128 (),
6741 							  (__mmask8) __U);
6742 	}
6743 	
6744 	extern __inline __m256i
6745 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6746 	_mm256_mask_sllv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6747 				__m256i __Y)
6748 	{
6749 	  return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6750 							 (__v8si) __Y,
6751 							 (__v8si) __W,
6752 							 (__mmask8) __U);
6753 	}
6754 	
6755 	extern __inline __m256i
6756 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6757 	_mm256_maskz_sllv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6758 	{
6759 	  return (__m256i) __builtin_ia32_psllv8si_mask ((__v8si) __X,
6760 							 (__v8si) __Y,
6761 							 (__v8si)
6762 							 _mm256_setzero_si256 (),
6763 							 (__mmask8) __U);
6764 	}
6765 	
6766 	extern __inline __m128i
6767 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6768 	_mm_mask_sllv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6769 			     __m128i __Y)
6770 	{
6771 	  return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6772 							 (__v4si) __Y,
6773 							 (__v4si) __W,
6774 							 (__mmask8) __U);
6775 	}
6776 	
6777 	extern __inline __m128i
6778 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6779 	_mm_maskz_sllv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6780 	{
6781 	  return (__m128i) __builtin_ia32_psllv4si_mask ((__v4si) __X,
6782 							 (__v4si) __Y,
6783 							 (__v4si)
6784 							 _mm_setzero_si128 (),
6785 							 (__mmask8) __U);
6786 	}
6787 	
6788 	extern __inline __m256i
6789 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6790 	_mm256_mask_sllv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6791 				__m256i __Y)
6792 	{
6793 	  return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6794 							 (__v4di) __Y,
6795 							 (__v4di) __W,
6796 							 (__mmask8) __U);
6797 	}
6798 	
6799 	extern __inline __m256i
6800 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6801 	_mm256_maskz_sllv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6802 	{
6803 	  return (__m256i) __builtin_ia32_psllv4di_mask ((__v4di) __X,
6804 							 (__v4di) __Y,
6805 							 (__v4di)
6806 							 _mm256_setzero_si256 (),
6807 							 (__mmask8) __U);
6808 	}
6809 	
6810 	extern __inline __m128i
6811 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6812 	_mm_mask_sllv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6813 			     __m128i __Y)
6814 	{
6815 	  return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6816 							 (__v2di) __Y,
6817 							 (__v2di) __W,
6818 							 (__mmask8) __U);
6819 	}
6820 	
6821 	extern __inline __m128i
6822 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6823 	_mm_maskz_sllv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6824 	{
6825 	  return (__m128i) __builtin_ia32_psllv2di_mask ((__v2di) __X,
6826 							 (__v2di) __Y,
6827 							 (__v2di)
6828 							 _mm_setzero_si128 (),
6829 							 (__mmask8) __U);
6830 	}
6831 	
6832 	extern __inline __m256i
6833 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6834 	_mm256_mask_srav_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6835 				__m256i __Y)
6836 	{
6837 	  return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6838 							 (__v8si) __Y,
6839 							 (__v8si) __W,
6840 							 (__mmask8) __U);
6841 	}
6842 	
6843 	extern __inline __m256i
6844 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6845 	_mm256_maskz_srav_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6846 	{
6847 	  return (__m256i) __builtin_ia32_psrav8si_mask ((__v8si) __X,
6848 							 (__v8si) __Y,
6849 							 (__v8si)
6850 							 _mm256_setzero_si256 (),
6851 							 (__mmask8) __U);
6852 	}
6853 	
6854 	extern __inline __m128i
6855 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6856 	_mm_mask_srav_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6857 			     __m128i __Y)
6858 	{
6859 	  return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6860 							 (__v4si) __Y,
6861 							 (__v4si) __W,
6862 							 (__mmask8) __U);
6863 	}
6864 	
6865 	extern __inline __m128i
6866 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6867 	_mm_maskz_srav_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6868 	{
6869 	  return (__m128i) __builtin_ia32_psrav4si_mask ((__v4si) __X,
6870 							 (__v4si) __Y,
6871 							 (__v4si)
6872 							 _mm_setzero_si128 (),
6873 							 (__mmask8) __U);
6874 	}
6875 	
6876 	extern __inline __m256i
6877 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6878 	_mm256_mask_srlv_epi32 (__m256i __W, __mmask8 __U, __m256i __X,
6879 				__m256i __Y)
6880 	{
6881 	  return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6882 							 (__v8si) __Y,
6883 							 (__v8si) __W,
6884 							 (__mmask8) __U);
6885 	}
6886 	
6887 	extern __inline __m256i
6888 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6889 	_mm256_maskz_srlv_epi32 (__mmask8 __U, __m256i __X, __m256i __Y)
6890 	{
6891 	  return (__m256i) __builtin_ia32_psrlv8si_mask ((__v8si) __X,
6892 							 (__v8si) __Y,
6893 							 (__v8si)
6894 							 _mm256_setzero_si256 (),
6895 							 (__mmask8) __U);
6896 	}
6897 	
6898 	extern __inline __m128i
6899 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6900 	_mm_mask_srlv_epi32 (__m128i __W, __mmask8 __U, __m128i __X,
6901 			     __m128i __Y)
6902 	{
6903 	  return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6904 							 (__v4si) __Y,
6905 							 (__v4si) __W,
6906 							 (__mmask8) __U);
6907 	}
6908 	
6909 	extern __inline __m128i
6910 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6911 	_mm_maskz_srlv_epi32 (__mmask8 __U, __m128i __X, __m128i __Y)
6912 	{
6913 	  return (__m128i) __builtin_ia32_psrlv4si_mask ((__v4si) __X,
6914 							 (__v4si) __Y,
6915 							 (__v4si)
6916 							 _mm_setzero_si128 (),
6917 							 (__mmask8) __U);
6918 	}
6919 	
6920 	extern __inline __m256i
6921 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6922 	_mm256_mask_srlv_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
6923 				__m256i __Y)
6924 	{
6925 	  return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6926 							 (__v4di) __Y,
6927 							 (__v4di) __W,
6928 							 (__mmask8) __U);
6929 	}
6930 	
6931 	extern __inline __m256i
6932 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6933 	_mm256_maskz_srlv_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
6934 	{
6935 	  return (__m256i) __builtin_ia32_psrlv4di_mask ((__v4di) __X,
6936 							 (__v4di) __Y,
6937 							 (__v4di)
6938 							 _mm256_setzero_si256 (),
6939 							 (__mmask8) __U);
6940 	}
6941 	
6942 	extern __inline __m128i
6943 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6944 	_mm_mask_srlv_epi64 (__m128i __W, __mmask8 __U, __m128i __X,
6945 			     __m128i __Y)
6946 	{
6947 	  return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6948 							 (__v2di) __Y,
6949 							 (__v2di) __W,
6950 							 (__mmask8) __U);
6951 	}
6952 	
6953 	extern __inline __m128i
6954 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6955 	_mm_maskz_srlv_epi64 (__mmask8 __U, __m128i __X, __m128i __Y)
6956 	{
6957 	  return (__m128i) __builtin_ia32_psrlv2di_mask ((__v2di) __X,
6958 							 (__v2di) __Y,
6959 							 (__v2di)
6960 							 _mm_setzero_si128 (),
6961 							 (__mmask8) __U);
6962 	}
6963 	
6964 	extern __inline __m256i
6965 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6966 	_mm256_rolv_epi32 (__m256i __A, __m256i __B)
6967 	{
6968 	  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6969 							  (__v8si) __B,
6970 							  (__v8si)
6971 							  _mm256_setzero_si256 (),
6972 							  (__mmask8) -1);
6973 	}
6974 	
6975 	extern __inline __m256i
6976 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6977 	_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
6978 				__m256i __B)
6979 	{
6980 	  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6981 							  (__v8si) __B,
6982 							  (__v8si) __W,
6983 							  (__mmask8) __U);
6984 	}
6985 	
6986 	extern __inline __m256i
6987 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6988 	_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
6989 	{
6990 	  return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
6991 							  (__v8si) __B,
6992 							  (__v8si)
6993 							  _mm256_setzero_si256 (),
6994 							  (__mmask8) __U);
6995 	}
6996 	
6997 	extern __inline __m128i
6998 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6999 	_mm_rolv_epi32 (__m128i __A, __m128i __B)
7000 	{
7001 	  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7002 							  (__v4si) __B,
7003 							  (__v4si)
7004 							  _mm_setzero_si128 (),
7005 							  (__mmask8) -1);
7006 	}
7007 	
7008 	extern __inline __m128i
7009 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7010 	_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7011 			     __m128i __B)
7012 	{
7013 	  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7014 							  (__v4si) __B,
7015 							  (__v4si) __W,
7016 							  (__mmask8) __U);
7017 	}
7018 	
7019 	extern __inline __m128i
7020 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7021 	_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7022 	{
7023 	  return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
7024 							  (__v4si) __B,
7025 							  (__v4si)
7026 							  _mm_setzero_si128 (),
7027 							  (__mmask8) __U);
7028 	}
7029 	
7030 	extern __inline __m256i
7031 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7032 	_mm256_rorv_epi32 (__m256i __A, __m256i __B)
7033 	{
7034 	  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7035 							  (__v8si) __B,
7036 							  (__v8si)
7037 							  _mm256_setzero_si256 (),
7038 							  (__mmask8) -1);
7039 	}
7040 	
7041 	extern __inline __m256i
7042 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7043 	_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
7044 				__m256i __B)
7045 	{
7046 	  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7047 							  (__v8si) __B,
7048 							  (__v8si) __W,
7049 							  (__mmask8) __U);
7050 	}
7051 	
7052 	extern __inline __m256i
7053 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7054 	_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
7055 	{
7056 	  return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
7057 							  (__v8si) __B,
7058 							  (__v8si)
7059 							  _mm256_setzero_si256 (),
7060 							  (__mmask8) __U);
7061 	}
7062 	
7063 	extern __inline __m128i
7064 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7065 	_mm_rorv_epi32 (__m128i __A, __m128i __B)
7066 	{
7067 	  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7068 							  (__v4si) __B,
7069 							  (__v4si)
7070 							  _mm_setzero_si128 (),
7071 							  (__mmask8) -1);
7072 	}
7073 	
7074 	extern __inline __m128i
7075 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7076 	_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
7077 			     __m128i __B)
7078 	{
7079 	  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7080 							  (__v4si) __B,
7081 							  (__v4si) __W,
7082 							  (__mmask8) __U);
7083 	}
7084 	
7085 	extern __inline __m128i
7086 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7087 	_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
7088 	{
7089 	  return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
7090 							  (__v4si) __B,
7091 							  (__v4si)
7092 							  _mm_setzero_si128 (),
7093 							  (__mmask8) __U);
7094 	}
7095 	
7096 	extern __inline __m256i
7097 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7098 	_mm256_rolv_epi64 (__m256i __A, __m256i __B)
7099 	{
7100 	  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7101 							  (__v4di) __B,
7102 							  (__v4di)
7103 							  _mm256_setzero_si256 (),
7104 							  (__mmask8) -1);
7105 	}
7106 	
7107 	extern __inline __m256i
7108 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7109 	_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7110 				__m256i __B)
7111 	{
7112 	  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7113 							  (__v4di) __B,
7114 							  (__v4di) __W,
7115 							  (__mmask8) __U);
7116 	}
7117 	
7118 	extern __inline __m256i
7119 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7120 	_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7121 	{
7122 	  return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
7123 							  (__v4di) __B,
7124 							  (__v4di)
7125 							  _mm256_setzero_si256 (),
7126 							  (__mmask8) __U);
7127 	}
7128 	
7129 	extern __inline __m128i
7130 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7131 	_mm_rolv_epi64 (__m128i __A, __m128i __B)
7132 	{
7133 	  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7134 							  (__v2di) __B,
7135 							  (__v2di)
7136 							  _mm_setzero_si128 (),
7137 							  (__mmask8) -1);
7138 	}
7139 	
7140 	extern __inline __m128i
7141 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7142 	_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7143 			     __m128i __B)
7144 	{
7145 	  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7146 							  (__v2di) __B,
7147 							  (__v2di) __W,
7148 							  (__mmask8) __U);
7149 	}
7150 	
7151 	extern __inline __m128i
7152 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7153 	_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7154 	{
7155 	  return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
7156 							  (__v2di) __B,
7157 							  (__v2di)
7158 							  _mm_setzero_si128 (),
7159 							  (__mmask8) __U);
7160 	}
7161 	
7162 	extern __inline __m256i
7163 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7164 	_mm256_rorv_epi64 (__m256i __A, __m256i __B)
7165 	{
7166 	  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7167 							  (__v4di) __B,
7168 							  (__v4di)
7169 							  _mm256_setzero_si256 (),
7170 							  (__mmask8) -1);
7171 	}
7172 	
7173 	extern __inline __m256i
7174 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7175 	_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7176 				__m256i __B)
7177 	{
7178 	  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7179 							  (__v4di) __B,
7180 							  (__v4di) __W,
7181 							  (__mmask8) __U);
7182 	}
7183 	
7184 	extern __inline __m256i
7185 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7186 	_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7187 	{
7188 	  return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
7189 							  (__v4di) __B,
7190 							  (__v4di)
7191 							  _mm256_setzero_si256 (),
7192 							  (__mmask8) __U);
7193 	}
7194 	
7195 	extern __inline __m128i
7196 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7197 	_mm_rorv_epi64 (__m128i __A, __m128i __B)
7198 	{
7199 	  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7200 							  (__v2di) __B,
7201 							  (__v2di)
7202 							  _mm_setzero_si128 (),
7203 							  (__mmask8) -1);
7204 	}
7205 	
7206 	extern __inline __m128i
7207 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7208 	_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7209 			     __m128i __B)
7210 	{
7211 	  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7212 							  (__v2di) __B,
7213 							  (__v2di) __W,
7214 							  (__mmask8) __U);
7215 	}
7216 	
7217 	extern __inline __m128i
7218 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7219 	_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7220 	{
7221 	  return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
7222 							  (__v2di) __B,
7223 							  (__v2di)
7224 							  _mm_setzero_si128 (),
7225 							  (__mmask8) __U);
7226 	}
7227 	
7228 	extern __inline __m256i
7229 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7230 	_mm256_srav_epi64 (__m256i __X, __m256i __Y)
7231 	{
7232 	  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7233 							  (__v4di) __Y,
7234 							  (__v4di)
7235 							  _mm256_setzero_si256 (),
7236 							  (__mmask8) -1);
7237 	}
7238 	
7239 	extern __inline __m256i
7240 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7241 	_mm256_mask_srav_epi64 (__m256i __W, __mmask8 __U, __m256i __X,
7242 				__m256i __Y)
7243 	{
7244 	  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7245 							  (__v4di) __Y,
7246 							  (__v4di) __W,
7247 							  (__mmask8) __U);
7248 	}
7249 	
7250 	extern __inline __m256i
7251 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7252 	_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
7253 	{
7254 	  return (__m256i) __builtin_ia32_psravq256_mask ((__v4di) __X,
7255 							  (__v4di) __Y,
7256 							  (__v4di)
7257 							  _mm256_setzero_si256 (),
7258 							  (__mmask8) __U);
7259 	}
7260 	
7261 	extern __inline __m256i
7262 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7263 	_mm256_mask_and_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7264 			       __m256i __B)
7265 	{
7266 	  return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7267 							 (__v4di) __B,
7268 							 (__v4di) __W, __U);
7269 	}
7270 	
7271 	extern __inline __m256i
7272 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7273 	_mm256_maskz_and_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7274 	{
7275 	  return (__m256i) __builtin_ia32_pandq256_mask ((__v4di) __A,
7276 							 (__v4di) __B,
7277 							 (__v4di)
7278 							 _mm256_setzero_pd (),
7279 							 __U);
7280 	}
7281 	
7282 	extern __inline __m128i
7283 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7284 	_mm_mask_and_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7285 			    __m128i __B)
7286 	{
7287 	  return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7288 							 (__v2di) __B,
7289 							 (__v2di) __W, __U);
7290 	}
7291 	
7292 	extern __inline __m128i
7293 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7294 	_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7295 	{
7296 	  return (__m128i) __builtin_ia32_pandq128_mask ((__v2di) __A,
7297 							 (__v2di) __B,
7298 							 (__v2di)
7299 							 _mm_setzero_pd (),
7300 							 __U);
7301 	}
7302 	
7303 	extern __inline __m256i
7304 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7305 	_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7306 				  __m256i __B)
7307 	{
7308 	  return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7309 							  (__v4di) __B,
7310 							  (__v4di) __W, __U);
7311 	}
7312 	
7313 	extern __inline __m256i
7314 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7315 	_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7316 	{
7317 	  return (__m256i) __builtin_ia32_pandnq256_mask ((__v4di) __A,
7318 							  (__v4di) __B,
7319 							  (__v4di)
7320 							  _mm256_setzero_pd (),
7321 							  __U);
7322 	}
7323 	
7324 	extern __inline __m128i
7325 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7326 	_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7327 			       __m128i __B)
7328 	{
7329 	  return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7330 							  (__v2di) __B,
7331 							  (__v2di) __W, __U);
7332 	}
7333 	
7334 	extern __inline __m128i
7335 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7336 	_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7337 	{
7338 	  return (__m128i) __builtin_ia32_pandnq128_mask ((__v2di) __A,
7339 							  (__v2di) __B,
7340 							  (__v2di)
7341 							  _mm_setzero_pd (),
7342 							  __U);
7343 	}
7344 	
7345 	extern __inline __m256i
7346 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7347 	_mm256_mask_or_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7348 			      __m256i __B)
7349 	{
7350 	  return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7351 							(__v4di) __B,
7352 							(__v4di) __W,
7353 							(__mmask8) __U);
7354 	}
7355 	
7356 	extern __inline __m256i
7357 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7358 	_mm256_maskz_or_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7359 	{
7360 	  return (__m256i) __builtin_ia32_porq256_mask ((__v4di) __A,
7361 							(__v4di) __B,
7362 							(__v4di)
7363 							_mm256_setzero_si256 (),
7364 							(__mmask8) __U);
7365 	}
7366 	
7367 	extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7368 	_mm256_or_epi64 (__m256i __A, __m256i __B)
7369 	{
7370 	  return (__m256i) ((__v4du)__A | (__v4du)__B);
7371 	}
7372 	
7373 	extern __inline __m128i
7374 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7375 	_mm_mask_or_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
7376 	{
7377 	  return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7378 							(__v2di) __B,
7379 							(__v2di) __W,
7380 							(__mmask8) __U);
7381 	}
7382 	
7383 	extern __inline __m128i
7384 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7385 	_mm_maskz_or_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7386 	{
7387 	  return (__m128i) __builtin_ia32_porq128_mask ((__v2di) __A,
7388 							(__v2di) __B,
7389 							(__v2di)
7390 							_mm_setzero_si128 (),
7391 							(__mmask8) __U);
7392 	}
7393 	
7394 	extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7395 	_mm_or_epi64 (__m128i __A, __m128i __B)
7396 	{
7397 	  return (__m128i) ((__v2du)__A | (__v2du)__B);
7398 	}
7399 	
7400 	extern __inline __m256i
7401 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7402 	_mm256_mask_xor_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
7403 			       __m256i __B)
7404 	{
7405 	  return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7406 							 (__v4di) __B,
7407 							 (__v4di) __W,
7408 							 (__mmask8) __U);
7409 	}
7410 	
7411 	extern __inline __m256i
7412 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7413 	_mm256_maskz_xor_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
7414 	{
7415 	  return (__m256i) __builtin_ia32_pxorq256_mask ((__v4di) __A,
7416 							 (__v4di) __B,
7417 							 (__v4di)
7418 							 _mm256_setzero_si256 (),
7419 							 (__mmask8) __U);
7420 	}
7421 	
7422 	extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7423 	_mm256_xor_epi64 (__m256i __A, __m256i __B)
7424 	{
7425 	  return (__m256i) ((__v4du)__A ^ (__v4du)__B);
7426 	}
7427 	
7428 	extern __inline __m128i
7429 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7430 	_mm_mask_xor_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
7431 			    __m128i __B)
7432 	{
7433 	  return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7434 							 (__v2di) __B,
7435 							 (__v2di) __W,
7436 							 (__mmask8) __U);
7437 	}
7438 	
7439 	extern __inline __m128i
7440 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7441 	_mm_maskz_xor_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
7442 	{
7443 	  return (__m128i) __builtin_ia32_pxorq128_mask ((__v2di) __A,
7444 							 (__v2di) __B,
7445 							 (__v2di)
7446 							 _mm_setzero_si128 (),
7447 							 (__mmask8) __U);
7448 	}
7449 	
7450 	extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
7451 	_mm_xor_epi64 (__m128i __A, __m128i __B)
7452 	{
7453 	  return (__m128i) ((__v2du)__A ^ (__v2du)__B);
7454 	}
7455 	
7456 	extern __inline __m256d
7457 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7458 	_mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
7459 			    __m256d __B)
7460 	{
7461 	  return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7462 							 (__v4df) __B,
7463 							 (__v4df) __W,
7464 							 (__mmask8) __U);
7465 	}
7466 	
7467 	extern __inline __m256d
7468 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7469 	_mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B)
7470 	{
7471 	  return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
7472 							 (__v4df) __B,
7473 							 (__v4df)
7474 							 _mm256_setzero_pd (),
7475 							 (__mmask8) __U);
7476 	}
7477 	
7478 	extern __inline __m256
7479 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7480 	_mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7481 	{
7482 	  return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7483 							(__v8sf) __B,
7484 							(__v8sf) __W,
7485 							(__mmask8) __U);
7486 	}
7487 	
7488 	extern __inline __m256
7489 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7490 	_mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B)
7491 	{
7492 	  return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
7493 							(__v8sf) __B,
7494 							(__v8sf)
7495 							_mm256_setzero_ps (),
7496 							(__mmask8) __U);
7497 	}
7498 	
7499 	extern __inline __m128
7500 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7501 	_mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7502 	{
7503 	  return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7504 						     (__v4sf) __B,
7505 						     (__v4sf) __W,
7506 						     (__mmask8) __U);
7507 	}
7508 	
7509 	extern __inline __m128
7510 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7511 	_mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B)
7512 	{
7513 	  return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
7514 						     (__v4sf) __B,
7515 						     (__v4sf)
7516 						     _mm_setzero_ps (),
7517 						     (__mmask8) __U);
7518 	}
7519 	
7520 	extern __inline __m128d
7521 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7522 	_mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7523 	{
7524 	  return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7525 						      (__v2df) __B,
7526 						      (__v2df) __W,
7527 						      (__mmask8) __U);
7528 	}
7529 	
7530 	extern __inline __m128d
7531 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7532 	_mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B)
7533 	{
7534 	  return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
7535 						      (__v2df) __B,
7536 						      (__v2df)
7537 						      _mm_setzero_pd (),
7538 						      (__mmask8) __U);
7539 	}
7540 	
7541 	extern __inline __m256d
7542 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7543 	_mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
7544 			    __m256d __B)
7545 	{
7546 	  return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7547 							 (__v4df) __B,
7548 							 (__v4df) __W,
7549 							 (__mmask8) __U);
7550 	}
7551 	
7552 	extern __inline __m256d
7553 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7554 	_mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
7555 			    __m256d __B)
7556 	{
7557 	  return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7558 							 (__v4df) __B,
7559 							 (__v4df) __W,
7560 							 (__mmask8) __U);
7561 	}
7562 	
7563 	extern __inline __m256d
7564 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7565 	_mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B)
7566 	{
7567 	  return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
7568 							 (__v4df) __B,
7569 							 (__v4df)
7570 							 _mm256_setzero_pd (),
7571 							 (__mmask8) __U);
7572 	}
7573 	
7574 	extern __inline __m256
7575 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7576 	_mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7577 	{
7578 	  return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7579 							(__v8sf) __B,
7580 							(__v8sf) __W,
7581 							(__mmask8) __U);
7582 	}
7583 	
7584 	extern __inline __m256d
7585 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7586 	_mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B)
7587 	{
7588 	  return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
7589 							 (__v4df) __B,
7590 							 (__v4df)
7591 							 _mm256_setzero_pd (),
7592 							 (__mmask8) __U);
7593 	}
7594 	
7595 	extern __inline __m256
7596 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7597 	_mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7598 	{
7599 	  return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7600 							(__v8sf) __B,
7601 							(__v8sf) __W,
7602 							(__mmask8) __U);
7603 	}
7604 	
7605 	extern __inline __m256
7606 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7607 	_mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B)
7608 	{
7609 	  return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
7610 							(__v8sf) __B,
7611 							(__v8sf)
7612 							_mm256_setzero_ps (),
7613 							(__mmask8) __U);
7614 	}
7615 	
7616 	extern __inline __m256
7617 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7618 	_mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B)
7619 	{
7620 	  return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
7621 							(__v8sf) __B,
7622 							(__v8sf)
7623 							_mm256_setzero_ps (),
7624 							(__mmask8) __U);
7625 	}
7626 	
7627 	extern __inline __m128
7628 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7629 	_mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7630 	{
7631 	  return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7632 						     (__v4sf) __B,
7633 						     (__v4sf) __W,
7634 						     (__mmask8) __U);
7635 	}
7636 	
7637 	extern __inline __m128
7638 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7639 	_mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7640 	{
7641 	  return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7642 						     (__v4sf) __B,
7643 						     (__v4sf) __W,
7644 						     (__mmask8) __U);
7645 	}
7646 	
7647 	extern __inline __m128
7648 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7649 	_mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B)
7650 	{
7651 	  return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
7652 						     (__v4sf) __B,
7653 						     (__v4sf)
7654 						     _mm_setzero_ps (),
7655 						     (__mmask8) __U);
7656 	}
7657 	
7658 	extern __inline __m128
7659 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7660 	_mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B)
7661 	{
7662 	  return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
7663 						     (__v4sf) __B,
7664 						     (__v4sf)
7665 						     _mm_setzero_ps (),
7666 						     (__mmask8) __U);
7667 	}
7668 	
7669 	extern __inline __m128
7670 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7671 	_mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7672 	{
7673 	  return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7674 						     (__v4sf) __B,
7675 						     (__v4sf) __W,
7676 						     (__mmask8) __U);
7677 	}
7678 	
7679 	extern __inline __m128
7680 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7681 	_mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B)
7682 	{
7683 	  return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
7684 						     (__v4sf) __B,
7685 						     (__v4sf)
7686 						     _mm_setzero_ps (),
7687 						     (__mmask8) __U);
7688 	}
7689 	
7690 	extern __inline __m128d
7691 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7692 	_mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7693 	{
7694 	  return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7695 						      (__v2df) __B,
7696 						      (__v2df) __W,
7697 						      (__mmask8) __U);
7698 	}
7699 	
7700 	extern __inline __m128d
7701 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7702 	_mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B)
7703 	{
7704 	  return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
7705 						      (__v2df) __B,
7706 						      (__v2df)
7707 						      _mm_setzero_pd (),
7708 						      (__mmask8) __U);
7709 	}
7710 	
7711 	extern __inline __m128d
7712 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7713 	_mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7714 	{
7715 	  return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7716 						      (__v2df) __B,
7717 						      (__v2df) __W,
7718 						      (__mmask8) __U);
7719 	}
7720 	
7721 	extern __inline __m128d
7722 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7723 	_mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B)
7724 	{
7725 	  return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
7726 						      (__v2df) __B,
7727 						      (__v2df)
7728 						      _mm_setzero_pd (),
7729 						      (__mmask8) __U);
7730 	}
7731 	
7732 	extern __inline __m128d
7733 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7734 	_mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
7735 	{
7736 	  return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7737 						      (__v2df) __B,
7738 						      (__v2df) __W,
7739 						      (__mmask8) __U);
7740 	}
7741 	
7742 	extern __inline __m128d
7743 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7744 	_mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B)
7745 	{
7746 	  return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
7747 						      (__v2df) __B,
7748 						      (__v2df)
7749 						      _mm_setzero_pd (),
7750 						      (__mmask8) __U);
7751 	}
7752 	
7753 	extern __inline __m256
7754 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7755 	_mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
7756 	{
7757 	  return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7758 							(__v8sf) __B,
7759 							(__v8sf) __W,
7760 							(__mmask8) __U);
7761 	}
7762 	
7763 	extern __inline __m256
7764 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7765 	_mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B)
7766 	{
7767 	  return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
7768 							(__v8sf) __B,
7769 							(__v8sf)
7770 							_mm256_setzero_ps (),
7771 							(__mmask8) __U);
7772 	}
7773 	
7774 	extern __inline __m256d
7775 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7776 	_mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
7777 			    __m256d __B)
7778 	{
7779 	  return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7780 							 (__v4df) __B,
7781 							 (__v4df) __W,
7782 							 (__mmask8) __U);
7783 	}
7784 	
7785 	extern __inline __m256d
7786 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7787 	_mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B)
7788 	{
7789 	  return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
7790 							 (__v4df) __B,
7791 							 (__v4df)
7792 							 _mm256_setzero_pd (),
7793 							 (__mmask8) __U);
7794 	}
7795 	
7796 	extern __inline __m256i
7797 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7798 	_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7799 	{
7800 	  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7801 							  (__v4di) __B,
7802 							  (__v4di)
7803 							  _mm256_setzero_si256 (),
7804 							  __M);
7805 	}
7806 	
7807 	extern __inline __m256i
7808 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7809 	_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7810 			       __m256i __B)
7811 	{
7812 	  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7813 							  (__v4di) __B,
7814 							  (__v4di) __W, __M);
7815 	}
7816 	
7817 	extern __inline __m256i
7818 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7819 	_mm256_min_epi64 (__m256i __A, __m256i __B)
7820 	{
7821 	  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7822 							  (__v4di) __B,
7823 							  (__v4di)
7824 							  _mm256_setzero_si256 (),
7825 							  (__mmask8) -1);
7826 	}
7827 	
7828 	extern __inline __m256i
7829 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7830 	_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
7831 			       __m256i __B)
7832 	{
7833 	  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7834 							  (__v4di) __B,
7835 							  (__v4di) __W, __M);
7836 	}
7837 	
7838 	extern __inline __m256i
7839 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7840 	_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B)
7841 	{
7842 	  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
7843 							  (__v4di) __B,
7844 							  (__v4di)
7845 							  _mm256_setzero_si256 (),
7846 							  __M);
7847 	}
7848 	
7849 	extern __inline __m256i
7850 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7851 	_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7852 	{
7853 	  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7854 							  (__v4di) __B,
7855 							  (__v4di)
7856 							  _mm256_setzero_si256 (),
7857 							  __M);
7858 	}
7859 	
7860 	extern __inline __m256i
7861 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7862 	_mm256_max_epi64 (__m256i __A, __m256i __B)
7863 	{
7864 	  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
7865 							  (__v4di) __B,
7866 							  (__v4di)
7867 							  _mm256_setzero_si256 (),
7868 							  (__mmask8) -1);
7869 	}
7870 	
7871 	extern __inline __m256i
7872 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7873 	_mm256_max_epu64 (__m256i __A, __m256i __B)
7874 	{
7875 	  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7876 							  (__v4di) __B,
7877 							  (__v4di)
7878 							  _mm256_setzero_si256 (),
7879 							  (__mmask8) -1);
7880 	}
7881 	
7882 	extern __inline __m256i
7883 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7884 	_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7885 			       __m256i __B)
7886 	{
7887 	  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
7888 							  (__v4di) __B,
7889 							  (__v4di) __W, __M);
7890 	}
7891 	
7892 	extern __inline __m256i
7893 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7894 	_mm256_min_epu64 (__m256i __A, __m256i __B)
7895 	{
7896 	  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7897 							  (__v4di) __B,
7898 							  (__v4di)
7899 							  _mm256_setzero_si256 (),
7900 							  (__mmask8) -1);
7901 	}
7902 	
7903 	extern __inline __m256i
7904 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7905 	_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
7906 			       __m256i __B)
7907 	{
7908 	  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7909 							  (__v4di) __B,
7910 							  (__v4di) __W, __M);
7911 	}
7912 	
7913 	extern __inline __m256i
7914 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7915 	_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B)
7916 	{
7917 	  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
7918 							  (__v4di) __B,
7919 							  (__v4di)
7920 							  _mm256_setzero_si256 (),
7921 							  __M);
7922 	}
7923 	
7924 	extern __inline __m256i
7925 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7926 	_mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7927 	{
7928 	  return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7929 							  (__v8si) __B,
7930 							  (__v8si)
7931 							  _mm256_setzero_si256 (),
7932 							  __M);
7933 	}
7934 	
7935 	extern __inline __m256i
7936 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7937 	_mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7938 			       __m256i __B)
7939 	{
7940 	  return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
7941 							  (__v8si) __B,
7942 							  (__v8si) __W, __M);
7943 	}
7944 	
7945 	extern __inline __m256i
7946 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7947 	_mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
7948 	{
7949 	  return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7950 							  (__v8si) __B,
7951 							  (__v8si)
7952 							  _mm256_setzero_si256 (),
7953 							  __M);
7954 	}
7955 	
7956 	extern __inline __m256i
7957 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7958 	_mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
7959 			       __m256i __B)
7960 	{
7961 	  return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
7962 							  (__v8si) __B,
7963 							  (__v8si) __W, __M);
7964 	}
7965 	
7966 	extern __inline __m256i
7967 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7968 	_mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7969 	{
7970 	  return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7971 							  (__v8si) __B,
7972 							  (__v8si)
7973 							  _mm256_setzero_si256 (),
7974 							  __M);
7975 	}
7976 	
7977 	extern __inline __m256i
7978 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7979 	_mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
7980 			       __m256i __B)
7981 	{
7982 	  return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
7983 							  (__v8si) __B,
7984 							  (__v8si) __W, __M);
7985 	}
7986 	
7987 	extern __inline __m256i
7988 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7989 	_mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B)
7990 	{
7991 	  return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
7992 							  (__v8si) __B,
7993 							  (__v8si)
7994 							  _mm256_setzero_si256 (),
7995 							  __M);
7996 	}
7997 	
7998 	extern __inline __m256i
7999 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8000 	_mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
8001 			       __m256i __B)
8002 	{
8003 	  return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
8004 							  (__v8si) __B,
8005 							  (__v8si) __W, __M);
8006 	}
8007 	
8008 	extern __inline __m128i
8009 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8010 	_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
8011 	{
8012 	  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8013 							  (__v2di) __B,
8014 							  (__v2di)
8015 							  _mm_setzero_si128 (),
8016 							  __M);
8017 	}
8018 	
8019 	extern __inline __m128i
8020 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8021 	_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
8022 			    __m128i __B)
8023 	{
8024 	  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8025 							  (__v2di) __B,
8026 							  (__v2di) __W, __M);
8027 	}
8028 	
8029 	extern __inline __m128i
8030 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8031 	_mm_min_epi64 (__m128i __A, __m128i __B)
8032 	{
8033 	  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8034 							  (__v2di) __B,
8035 							  (__v2di)
8036 							  _mm_setzero_si128 (),
8037 							  (__mmask8) -1);
8038 	}
8039 	
8040 	extern __inline __m128i
8041 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8042 	_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
8043 			    __m128i __B)
8044 	{
8045 	  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8046 							  (__v2di) __B,
8047 							  (__v2di) __W, __M);
8048 	}
8049 	
8050 	extern __inline __m128i
8051 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8052 	_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B)
8053 	{
8054 	  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
8055 							  (__v2di) __B,
8056 							  (__v2di)
8057 							  _mm_setzero_si128 (),
8058 							  __M);
8059 	}
8060 	
8061 	extern __inline __m128i
8062 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8063 	_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8064 	{
8065 	  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8066 							  (__v2di) __B,
8067 							  (__v2di)
8068 							  _mm_setzero_si128 (),
8069 							  __M);
8070 	}
8071 	
8072 	extern __inline __m128i
8073 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8074 	_mm_max_epi64 (__m128i __A, __m128i __B)
8075 	{
8076 	  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
8077 							  (__v2di) __B,
8078 							  (__v2di)
8079 							  _mm_setzero_si128 (),
8080 							  (__mmask8) -1);
8081 	}
8082 	
8083 	extern __inline __m128i
8084 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8085 	_mm_max_epu64 (__m128i __A, __m128i __B)
8086 	{
8087 	  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8088 							  (__v2di) __B,
8089 							  (__v2di)
8090 							  _mm_setzero_si128 (),
8091 							  (__mmask8) -1);
8092 	}
8093 	
8094 	extern __inline __m128i
8095 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8096 	_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8097 			    __m128i __B)
8098 	{
8099 	  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
8100 							  (__v2di) __B,
8101 							  (__v2di) __W, __M);
8102 	}
8103 	
8104 	extern __inline __m128i
8105 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8106 	_mm_min_epu64 (__m128i __A, __m128i __B)
8107 	{
8108 	  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8109 							  (__v2di) __B,
8110 							  (__v2di)
8111 							  _mm_setzero_si128 (),
8112 							  (__mmask8) -1);
8113 	}
8114 	
8115 	extern __inline __m128i
8116 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8117 	_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
8118 			    __m128i __B)
8119 	{
8120 	  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8121 							  (__v2di) __B,
8122 							  (__v2di) __W, __M);
8123 	}
8124 	
8125 	extern __inline __m128i
8126 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8127 	_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B)
8128 	{
8129 	  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
8130 							  (__v2di) __B,
8131 							  (__v2di)
8132 							  _mm_setzero_si128 (),
8133 							  __M);
8134 	}
8135 	
8136 	extern __inline __m128i
8137 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8138 	_mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8139 	{
8140 	  return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8141 							  (__v4si) __B,
8142 							  (__v4si)
8143 							  _mm_setzero_si128 (),
8144 							  __M);
8145 	}
8146 	
8147 	extern __inline __m128i
8148 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8149 	_mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8150 			    __m128i __B)
8151 	{
8152 	  return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
8153 							  (__v4si) __B,
8154 							  (__v4si) __W, __M);
8155 	}
8156 	
8157 	extern __inline __m128i
8158 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8159 	_mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
8160 	{
8161 	  return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8162 							  (__v4si) __B,
8163 							  (__v4si)
8164 							  _mm_setzero_si128 (),
8165 							  __M);
8166 	}
8167 	
8168 	extern __inline __m128i
8169 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8170 	_mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
8171 			    __m128i __B)
8172 	{
8173 	  return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
8174 							  (__v4si) __B,
8175 							  (__v4si) __W, __M);
8176 	}
8177 	
8178 	extern __inline __m128i
8179 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8180 	_mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8181 	{
8182 	  return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8183 							  (__v4si) __B,
8184 							  (__v4si)
8185 							  _mm_setzero_si128 (),
8186 							  __M);
8187 	}
8188 	
8189 	extern __inline __m128i
8190 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8191 	_mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8192 			    __m128i __B)
8193 	{
8194 	  return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
8195 							  (__v4si) __B,
8196 							  (__v4si) __W, __M);
8197 	}
8198 	
8199 	extern __inline __m128i
8200 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8201 	_mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B)
8202 	{
8203 	  return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8204 							  (__v4si) __B,
8205 							  (__v4si)
8206 							  _mm_setzero_si128 (),
8207 							  __M);
8208 	}
8209 	
8210 	extern __inline __m128i
8211 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8212 	_mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
8213 			    __m128i __B)
8214 	{
8215 	  return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
8216 							  (__v4si) __B,
8217 							  (__v4si) __W, __M);
8218 	}
8219 	
8220 	#ifndef __AVX512CD__
8221 	#pragma GCC push_options
8222 	#pragma GCC target("avx512vl,avx512cd")
8223 	#define __DISABLE_AVX512VLCD__
8224 	#endif
8225 	
8226 	extern __inline __m128i
8227 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8228 	_mm_broadcastmb_epi64 (__mmask8 __A)
8229 	{
8230 	  return (__m128i) __builtin_ia32_broadcastmb128 (__A);
8231 	}
8232 	
8233 	extern __inline __m256i
8234 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8235 	_mm256_broadcastmb_epi64 (__mmask8 __A)
8236 	{
8237 	  return (__m256i) __builtin_ia32_broadcastmb256 (__A);
8238 	}
8239 	
8240 	extern __inline __m128i
8241 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8242 	_mm_broadcastmw_epi32 (__mmask16 __A)
8243 	{
8244 	  return (__m128i) __builtin_ia32_broadcastmw128 (__A);
8245 	}
8246 	
8247 	extern __inline __m256i
8248 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8249 	_mm256_broadcastmw_epi32 (__mmask16 __A)
8250 	{
8251 	  return (__m256i) __builtin_ia32_broadcastmw256 (__A);
8252 	}
8253 	
8254 	extern __inline __m256i
8255 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8256 	_mm256_lzcnt_epi32 (__m256i __A)
8257 	{
8258 	  return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8259 							     (__v8si)
8260 							     _mm256_setzero_si256 (),
8261 							     (__mmask8) -1);
8262 	}
8263 	
8264 	extern __inline __m256i
8265 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8266 	_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8267 	{
8268 	  return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8269 							     (__v8si) __W,
8270 							     (__mmask8) __U);
8271 	}
8272 	
8273 	extern __inline __m256i
8274 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8275 	_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
8276 	{
8277 	  return (__m256i) __builtin_ia32_vplzcntd_256_mask ((__v8si) __A,
8278 							     (__v8si)
8279 							     _mm256_setzero_si256 (),
8280 							     (__mmask8) __U);
8281 	}
8282 	
8283 	extern __inline __m256i
8284 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8285 	_mm256_lzcnt_epi64 (__m256i __A)
8286 	{
8287 	  return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8288 							     (__v4di)
8289 							     _mm256_setzero_si256 (),
8290 							     (__mmask8) -1);
8291 	}
8292 	
8293 	extern __inline __m256i
8294 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8295 	_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8296 	{
8297 	  return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8298 							     (__v4di) __W,
8299 							     (__mmask8) __U);
8300 	}
8301 	
8302 	extern __inline __m256i
8303 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8304 	_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
8305 	{
8306 	  return (__m256i) __builtin_ia32_vplzcntq_256_mask ((__v4di) __A,
8307 							     (__v4di)
8308 							     _mm256_setzero_si256 (),
8309 							     (__mmask8) __U);
8310 	}
8311 	
8312 	extern __inline __m256i
8313 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8314 	_mm256_conflict_epi64 (__m256i __A)
8315 	{
8316 	  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8317 								 (__v4di)
8318 								 _mm256_setzero_si256 (),
8319 								 (__mmask8) -1);
8320 	}
8321 	
8322 	extern __inline __m256i
8323 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8324 	_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
8325 	{
8326 	  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8327 								 (__v4di) __W,
8328 								 (__mmask8)
8329 								 __U);
8330 	}
8331 	
8332 	extern __inline __m256i
8333 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8334 	_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
8335 	{
8336 	  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
8337 								 (__v4di)
8338 								 _mm256_setzero_si256 (),
8339 								 (__mmask8)
8340 								 __U);
8341 	}
8342 	
8343 	extern __inline __m256i
8344 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8345 	_mm256_conflict_epi32 (__m256i __A)
8346 	{
8347 	  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8348 								 (__v8si)
8349 								 _mm256_setzero_si256 (),
8350 								 (__mmask8) -1);
8351 	}
8352 	
8353 	extern __inline __m256i
8354 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8355 	_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
8356 	{
8357 	  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8358 								 (__v8si) __W,
8359 								 (__mmask8)
8360 								 __U);
8361 	}
8362 	
8363 	extern __inline __m256i
8364 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8365 	_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
8366 	{
8367 	  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
8368 								 (__v8si)
8369 								 _mm256_setzero_si256 (),
8370 								 (__mmask8)
8371 								 __U);
8372 	}
8373 	
8374 	extern __inline __m128i
8375 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8376 	_mm_lzcnt_epi32 (__m128i __A)
8377 	{
8378 	  return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8379 							     (__v4si)
8380 							     _mm_setzero_si128 (),
8381 							     (__mmask8) -1);
8382 	}
8383 	
8384 	extern __inline __m128i
8385 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8386 	_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8387 	{
8388 	  return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8389 							     (__v4si) __W,
8390 							     (__mmask8) __U);
8391 	}
8392 	
8393 	extern __inline __m128i
8394 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8395 	_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
8396 	{
8397 	  return (__m128i) __builtin_ia32_vplzcntd_128_mask ((__v4si) __A,
8398 							     (__v4si)
8399 							     _mm_setzero_si128 (),
8400 							     (__mmask8) __U);
8401 	}
8402 	
8403 	extern __inline __m128i
8404 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8405 	_mm_lzcnt_epi64 (__m128i __A)
8406 	{
8407 	  return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8408 							     (__v2di)
8409 							     _mm_setzero_si128 (),
8410 							     (__mmask8) -1);
8411 	}
8412 	
8413 	extern __inline __m128i
8414 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8415 	_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8416 	{
8417 	  return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8418 							     (__v2di) __W,
8419 							     (__mmask8) __U);
8420 	}
8421 	
8422 	extern __inline __m128i
8423 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8424 	_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
8425 	{
8426 	  return (__m128i) __builtin_ia32_vplzcntq_128_mask ((__v2di) __A,
8427 							     (__v2di)
8428 							     _mm_setzero_si128 (),
8429 							     (__mmask8) __U);
8430 	}
8431 	
8432 	extern __inline __m128i
8433 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8434 	_mm_conflict_epi64 (__m128i __A)
8435 	{
8436 	  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8437 								 (__v2di)
8438 								 _mm_setzero_si128 (),
8439 								 (__mmask8) -1);
8440 	}
8441 	
8442 	extern __inline __m128i
8443 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8444 	_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
8445 	{
8446 	  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8447 								 (__v2di) __W,
8448 								 (__mmask8)
8449 								 __U);
8450 	}
8451 	
8452 	extern __inline __m128i
8453 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8454 	_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
8455 	{
8456 	  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
8457 								 (__v2di)
8458 								 _mm_setzero_si128 (),
8459 								 (__mmask8)
8460 								 __U);
8461 	}
8462 	
8463 	extern __inline __m128i
8464 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8465 	_mm_conflict_epi32 (__m128i __A)
8466 	{
8467 	  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8468 								 (__v4si)
8469 								 _mm_setzero_si128 (),
8470 								 (__mmask8) -1);
8471 	}
8472 	
8473 	extern __inline __m128i
8474 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8475 	_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
8476 	{
8477 	  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8478 								 (__v4si) __W,
8479 								 (__mmask8)
8480 								 __U);
8481 	}
8482 	
8483 	extern __inline __m128i
8484 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8485 	_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
8486 	{
8487 	  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
8488 								 (__v4si)
8489 								 _mm_setzero_si128 (),
8490 								 (__mmask8)
8491 								 __U);
8492 	}
8493 	
8494 	#ifdef __DISABLE_AVX512VLCD__
8495 	#pragma GCC pop_options
8496 	#endif
8497 	
8498 	extern __inline __m256d
8499 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8500 	_mm256_mask_unpacklo_pd (__m256d __W, __mmask8 __U, __m256d __A,
8501 				 __m256d __B)
8502 	{
8503 	  return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8504 							    (__v4df) __B,
8505 							    (__v4df) __W,
8506 							    (__mmask8) __U);
8507 	}
8508 	
8509 	extern __inline __m256d
8510 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8511 	_mm256_maskz_unpacklo_pd (__mmask8 __U, __m256d __A, __m256d __B)
8512 	{
8513 	  return (__m256d) __builtin_ia32_unpcklpd256_mask ((__v4df) __A,
8514 							    (__v4df) __B,
8515 							    (__v4df)
8516 							    _mm256_setzero_pd (),
8517 							    (__mmask8) __U);
8518 	}
8519 	
8520 	extern __inline __m128d
8521 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8522 	_mm_mask_unpacklo_pd (__m128d __W, __mmask8 __U, __m128d __A,
8523 			      __m128d __B)
8524 	{
8525 	  return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8526 							    (__v2df) __B,
8527 							    (__v2df) __W,
8528 							    (__mmask8) __U);
8529 	}
8530 	
8531 	extern __inline __m128d
8532 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8533 	_mm_maskz_unpacklo_pd (__mmask8 __U, __m128d __A, __m128d __B)
8534 	{
8535 	  return (__m128d) __builtin_ia32_unpcklpd128_mask ((__v2df) __A,
8536 							    (__v2df) __B,
8537 							    (__v2df)
8538 							    _mm_setzero_pd (),
8539 							    (__mmask8) __U);
8540 	}
8541 	
8542 	extern __inline __m256
8543 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8544 	_mm256_mask_unpacklo_ps (__m256 __W, __mmask8 __U, __m256 __A,
8545 				 __m256 __B)
8546 	{
8547 	  return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8548 							   (__v8sf) __B,
8549 							   (__v8sf) __W,
8550 							   (__mmask8) __U);
8551 	}
8552 	
8553 	extern __inline __m256d
8554 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8555 	_mm256_mask_unpackhi_pd (__m256d __W, __mmask8 __U, __m256d __A,
8556 				 __m256d __B)
8557 	{
8558 	  return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8559 							    (__v4df) __B,
8560 							    (__v4df) __W,
8561 							    (__mmask8) __U);
8562 	}
8563 	
8564 	extern __inline __m256d
8565 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8566 	_mm256_maskz_unpackhi_pd (__mmask8 __U, __m256d __A, __m256d __B)
8567 	{
8568 	  return (__m256d) __builtin_ia32_unpckhpd256_mask ((__v4df) __A,
8569 							    (__v4df) __B,
8570 							    (__v4df)
8571 							    _mm256_setzero_pd (),
8572 							    (__mmask8) __U);
8573 	}
8574 	
8575 	extern __inline __m128d
8576 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8577 	_mm_mask_unpackhi_pd (__m128d __W, __mmask8 __U, __m128d __A,
8578 			      __m128d __B)
8579 	{
8580 	  return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8581 							    (__v2df) __B,
8582 							    (__v2df) __W,
8583 							    (__mmask8) __U);
8584 	}
8585 	
8586 	extern __inline __m128d
8587 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8588 	_mm_maskz_unpackhi_pd (__mmask8 __U, __m128d __A, __m128d __B)
8589 	{
8590 	  return (__m128d) __builtin_ia32_unpckhpd128_mask ((__v2df) __A,
8591 							    (__v2df) __B,
8592 							    (__v2df)
8593 							    _mm_setzero_pd (),
8594 							    (__mmask8) __U);
8595 	}
8596 	
8597 	extern __inline __m256
8598 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8599 	_mm256_mask_unpackhi_ps (__m256 __W, __mmask8 __U, __m256 __A,
8600 				 __m256 __B)
8601 	{
8602 	  return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8603 							   (__v8sf) __B,
8604 							   (__v8sf) __W,
8605 							   (__mmask8) __U);
8606 	}
8607 	
8608 	extern __inline __m256
8609 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8610 	_mm256_maskz_unpackhi_ps (__mmask8 __U, __m256 __A, __m256 __B)
8611 	{
8612 	  return (__m256) __builtin_ia32_unpckhps256_mask ((__v8sf) __A,
8613 							   (__v8sf) __B,
8614 							   (__v8sf)
8615 							   _mm256_setzero_ps (),
8616 							   (__mmask8) __U);
8617 	}
8618 	
8619 	extern __inline __m128
8620 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8621 	_mm_mask_unpackhi_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8622 	{
8623 	  return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8624 							   (__v4sf) __B,
8625 							   (__v4sf) __W,
8626 							   (__mmask8) __U);
8627 	}
8628 	
8629 	extern __inline __m128
8630 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8631 	_mm_maskz_unpackhi_ps (__mmask8 __U, __m128 __A, __m128 __B)
8632 	{
8633 	  return (__m128) __builtin_ia32_unpckhps128_mask ((__v4sf) __A,
8634 							   (__v4sf) __B,
8635 							   (__v4sf)
8636 							   _mm_setzero_ps (),
8637 							   (__mmask8) __U);
8638 	}
8639 	
8640 	extern __inline __m128
8641 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8642 	_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8643 	{
8644 	  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8645 							 (__v4sf) __W,
8646 							 (__mmask8) __U);
8647 	}
8648 	
8649 	extern __inline __m128
8650 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8651 	_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8652 	{
8653 	  return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8654 							 (__v4sf)
8655 							 _mm_setzero_ps (),
8656 							 (__mmask8) __U);
8657 	}
8658 	
8659 	extern __inline __m256
8660 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8661 	_mm256_maskz_unpacklo_ps (__mmask8 __U, __m256 __A, __m256 __B)
8662 	{
8663 	  return (__m256) __builtin_ia32_unpcklps256_mask ((__v8sf) __A,
8664 							   (__v8sf) __B,
8665 							   (__v8sf)
8666 							   _mm256_setzero_ps (),
8667 							   (__mmask8) __U);
8668 	}
8669 	
8670 	extern __inline __m256
8671 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8672 	_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8673 	{
8674 	  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8675 							    (__v8sf) __W,
8676 							    (__mmask8) __U);
8677 	}
8678 	
8679 	extern __inline __m256
8680 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8681 	_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
8682 	{
8683 	  return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8684 							    (__v8sf)
8685 							    _mm256_setzero_ps (),
8686 							    (__mmask8) __U);
8687 	}
8688 	
8689 	extern __inline __m128
8690 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8691 	_mm_mask_unpacklo_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8692 	{
8693 	  return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8694 							   (__v4sf) __B,
8695 							   (__v4sf) __W,
8696 							   (__mmask8) __U);
8697 	}
8698 	
8699 	extern __inline __m128
8700 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8701 	_mm_maskz_unpacklo_ps (__mmask8 __U, __m128 __A, __m128 __B)
8702 	{
8703 	  return (__m128) __builtin_ia32_unpcklps128_mask ((__v4sf) __A,
8704 							   (__v4sf) __B,
8705 							   (__v4sf)
8706 							   _mm_setzero_ps (),
8707 							   (__mmask8) __U);
8708 	}
8709 	
8710 	extern __inline __m256i
8711 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8712 	_mm256_mask_sra_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8713 			       __m128i __B)
8714 	{
8715 	  return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8716 							 (__v4si) __B,
8717 							 (__v8si) __W,
8718 							 (__mmask8) __U);
8719 	}
8720 	
8721 	extern __inline __m256i
8722 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8723 	_mm256_maskz_sra_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8724 	{
8725 	  return (__m256i) __builtin_ia32_psrad256_mask ((__v8si) __A,
8726 							 (__v4si) __B,
8727 							 (__v8si)
8728 							 _mm256_setzero_si256 (),
8729 							 (__mmask8) __U);
8730 	}
8731 	
8732 	extern __inline __m128i
8733 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8734 	_mm_mask_sra_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8735 			    __m128i __B)
8736 	{
8737 	  return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8738 							 (__v4si) __B,
8739 							 (__v4si) __W,
8740 							 (__mmask8) __U);
8741 	}
8742 	
8743 	extern __inline __m128i
8744 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8745 	_mm_maskz_sra_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8746 	{
8747 	  return (__m128i) __builtin_ia32_psrad128_mask ((__v4si) __A,
8748 							 (__v4si) __B,
8749 							 (__v4si)
8750 							 _mm_setzero_si128 (),
8751 							 (__mmask8) __U);
8752 	}
8753 	
8754 	extern __inline __m256i
8755 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8756 	_mm256_sra_epi64 (__m256i __A, __m128i __B)
8757 	{
8758 	  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8759 							 (__v2di) __B,
8760 							 (__v4di)
8761 							 _mm256_setzero_si256 (),
8762 							 (__mmask8) -1);
8763 	}
8764 	
8765 	extern __inline __m256i
8766 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8767 	_mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8768 			       __m128i __B)
8769 	{
8770 	  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8771 							 (__v2di) __B,
8772 							 (__v4di) __W,
8773 							 (__mmask8) __U);
8774 	}
8775 	
8776 	extern __inline __m256i
8777 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8778 	_mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8779 	{
8780 	  return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A,
8781 							 (__v2di) __B,
8782 							 (__v4di)
8783 							 _mm256_setzero_si256 (),
8784 							 (__mmask8) __U);
8785 	}
8786 	
8787 	extern __inline __m128i
8788 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8789 	_mm_sra_epi64 (__m128i __A, __m128i __B)
8790 	{
8791 	  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8792 							 (__v2di) __B,
8793 							 (__v2di)
8794 							 _mm_setzero_si128 (),
8795 							 (__mmask8) -1);
8796 	}
8797 	
8798 	extern __inline __m128i
8799 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8800 	_mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8801 			    __m128i __B)
8802 	{
8803 	  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8804 							 (__v2di) __B,
8805 							 (__v2di) __W,
8806 							 (__mmask8) __U);
8807 	}
8808 	
8809 	extern __inline __m128i
8810 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8811 	_mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8812 	{
8813 	  return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A,
8814 							 (__v2di) __B,
8815 							 (__v2di)
8816 							 _mm_setzero_si128 (),
8817 							 (__mmask8) __U);
8818 	}
8819 	
8820 	extern __inline __m128i
8821 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8822 	_mm_mask_sll_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
8823 			    __m128i __B)
8824 	{
8825 	  return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8826 							 (__v4si) __B,
8827 							 (__v4si) __W,
8828 							 (__mmask8) __U);
8829 	}
8830 	
8831 	extern __inline __m128i
8832 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8833 	_mm_maskz_sll_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
8834 	{
8835 	  return (__m128i) __builtin_ia32_pslld128_mask ((__v4si) __A,
8836 							 (__v4si) __B,
8837 							 (__v4si)
8838 							 _mm_setzero_si128 (),
8839 							 (__mmask8) __U);
8840 	}
8841 	
8842 	extern __inline __m128i
8843 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8844 	_mm_mask_sll_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
8845 			    __m128i __B)
8846 	{
8847 	  return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8848 							 (__v2di) __B,
8849 							 (__v2di) __W,
8850 							 (__mmask8) __U);
8851 	}
8852 	
8853 	extern __inline __m128i
8854 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8855 	_mm_maskz_sll_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
8856 	{
8857 	  return (__m128i) __builtin_ia32_psllq128_mask ((__v2di) __A,
8858 							 (__v2di) __B,
8859 							 (__v2di)
8860 							 _mm_setzero_si128 (),
8861 							 (__mmask8) __U);
8862 	}
8863 	
8864 	extern __inline __m256i
8865 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8866 	_mm256_mask_sll_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
8867 			       __m128i __B)
8868 	{
8869 	  return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8870 							 (__v4si) __B,
8871 							 (__v8si) __W,
8872 							 (__mmask8) __U);
8873 	}
8874 	
8875 	extern __inline __m256i
8876 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8877 	_mm256_maskz_sll_epi32 (__mmask8 __U, __m256i __A, __m128i __B)
8878 	{
8879 	  return (__m256i) __builtin_ia32_pslld256_mask ((__v8si) __A,
8880 							 (__v4si) __B,
8881 							 (__v8si)
8882 							 _mm256_setzero_si256 (),
8883 							 (__mmask8) __U);
8884 	}
8885 	
8886 	extern __inline __m256i
8887 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8888 	_mm256_mask_sll_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
8889 			       __m128i __B)
8890 	{
8891 	  return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8892 							 (__v2di) __B,
8893 							 (__v4di) __W,
8894 							 (__mmask8) __U);
8895 	}
8896 	
8897 	extern __inline __m256i
8898 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8899 	_mm256_maskz_sll_epi64 (__mmask8 __U, __m256i __A, __m128i __B)
8900 	{
8901 	  return (__m256i) __builtin_ia32_psllq256_mask ((__v4di) __A,
8902 							 (__v2di) __B,
8903 							 (__v4di)
8904 							 _mm256_setzero_si256 (),
8905 							 (__mmask8) __U);
8906 	}
8907 	
8908 	extern __inline __m256
8909 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8910 	_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
8911 				    __m256 __Y)
8912 	{
8913 	  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8914 							    (__v8si) __X,
8915 							    (__v8sf) __W,
8916 							    (__mmask8) __U);
8917 	}
8918 	
8919 	extern __inline __m256
8920 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8921 	_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
8922 	{
8923 	  return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8924 							    (__v8si) __X,
8925 							    (__v8sf)
8926 							    _mm256_setzero_ps (),
8927 							    (__mmask8) __U);
8928 	}
8929 	
8930 	extern __inline __m256d
8931 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8932 	_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8933 	{
8934 	  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8935 							     (__v4di) __X,
8936 							     (__v4df)
8937 							     _mm256_setzero_pd (),
8938 							     (__mmask8) -1);
8939 	}
8940 	
8941 	extern __inline __m256d
8942 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8943 	_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8944 				    __m256d __Y)
8945 	{
8946 	  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8947 							     (__v4di) __X,
8948 							     (__v4df) __W,
8949 							     (__mmask8) __U);
8950 	}
8951 	
8952 	extern __inline __m256d
8953 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8954 	_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8955 	{
8956 	  return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8957 							     (__v4di) __X,
8958 							     (__v4df)
8959 							     _mm256_setzero_pd (),
8960 							     (__mmask8) __U);
8961 	}
8962 	
8963 	extern __inline __m256d
8964 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8965 	_mm256_mask_permutevar_pd (__m256d __W, __mmask8 __U, __m256d __A,
8966 				   __m256i __C)
8967 	{
8968 	  return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8969 								(__v4di) __C,
8970 								(__v4df) __W,
8971 								(__mmask8)
8972 								__U);
8973 	}
8974 	
8975 	extern __inline __m256d
8976 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8977 	_mm256_maskz_permutevar_pd (__mmask8 __U, __m256d __A, __m256i __C)
8978 	{
8979 	  return (__m256d) __builtin_ia32_vpermilvarpd256_mask ((__v4df) __A,
8980 								(__v4di) __C,
8981 								(__v4df)
8982 								_mm256_setzero_pd (),
8983 								(__mmask8)
8984 								__U);
8985 	}
8986 	
8987 	extern __inline __m256
8988 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8989 	_mm256_mask_permutevar_ps (__m256 __W, __mmask8 __U, __m256 __A,
8990 				   __m256i __C)
8991 	{
8992 	  return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
8993 							       (__v8si) __C,
8994 							       (__v8sf) __W,
8995 							       (__mmask8) __U);
8996 	}
8997 	
8998 	extern __inline __m256
8999 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9000 	_mm256_maskz_permutevar_ps (__mmask8 __U, __m256 __A, __m256i __C)
9001 	{
9002 	  return (__m256) __builtin_ia32_vpermilvarps256_mask ((__v8sf) __A,
9003 							       (__v8si) __C,
9004 							       (__v8sf)
9005 							       _mm256_setzero_ps (),
9006 							       (__mmask8) __U);
9007 	}
9008 	
9009 	extern __inline __m128d
9010 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9011 	_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
9012 				__m128i __C)
9013 	{
9014 	  return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
9015 							     (__v2di) __C,
9016 							     (__v2df) __W,
9017 							     (__mmask8) __U);
9018 	}
9019 	
9020 	extern __inline __m128d
9021 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9022 	_mm_maskz_permutevar_pd (__mmask8 __U, __m128d __A, __m128i __C)
9023 	{
9024 	  return (__m128d) __builtin_ia32_vpermilvarpd_mask ((__v2df) __A,
9025 							     (__v2di) __C,
9026 							     (__v2df)
9027 							     _mm_setzero_pd (),
9028 							     (__mmask8) __U);
9029 	}
9030 	
9031 	extern __inline __m128
9032 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9033 	_mm_mask_permutevar_ps (__m128 __W, __mmask8 __U, __m128 __A,
9034 				__m128i __C)
9035 	{
9036 	  return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
9037 							    (__v4si) __C,
9038 							    (__v4sf) __W,
9039 							    (__mmask8) __U);
9040 	}
9041 	
9042 	extern __inline __m128
9043 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9044 	_mm_maskz_permutevar_ps (__mmask8 __U, __m128 __A, __m128i __C)
9045 	{
9046 	  return (__m128) __builtin_ia32_vpermilvarps_mask ((__v4sf) __A,
9047 							    (__v4si) __C,
9048 							    (__v4sf)
9049 							    _mm_setzero_ps (),
9050 							    (__mmask8) __U);
9051 	}
9052 	
9053 	extern __inline __m256i
9054 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9055 	_mm256_maskz_mullo_epi32 (__mmask8 __M, __m256i __A, __m256i __B)
9056 	{
9057 	  return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9058 							  (__v8si) __B,
9059 							  (__v8si)
9060 							  _mm256_setzero_si256 (),
9061 							  __M);
9062 	}
9063 	
9064 	extern __inline __m256i
9065 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9066 	_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
9067 	{
9068 	  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9069 							     (__v4di) __X,
9070 							     (__v4di)
9071 							     _mm256_setzero_si256 (),
9072 							     __M);
9073 	}
9074 	
9075 	extern __inline __m256i
9076 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9077 	_mm256_mask_mullo_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
9078 				 __m256i __B)
9079 	{
9080 	  return (__m256i) __builtin_ia32_pmulld256_mask ((__v8si) __A,
9081 							  (__v8si) __B,
9082 							  (__v8si) __W, __M);
9083 	}
9084 	
9085 	extern __inline __m128i
9086 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9087 	_mm_maskz_mullo_epi32 (__mmask8 __M, __m128i __A, __m128i __B)
9088 	{
9089 	  return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9090 							  (__v4si) __B,
9091 							  (__v4si)
9092 							  _mm_setzero_si128 (),
9093 							  __M);
9094 	}
9095 	
9096 	extern __inline __m128i
9097 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9098 	_mm_mask_mullo_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
9099 			      __m128i __B)
9100 	{
9101 	  return (__m128i) __builtin_ia32_pmulld128_mask ((__v4si) __A,
9102 							  (__v4si) __B,
9103 							  (__v4si) __W, __M);
9104 	}
9105 	
9106 	extern __inline __m256i
9107 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9108 	_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9109 			       __m256i __Y)
9110 	{
9111 	  return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9112 							  (__v8si) __Y,
9113 							  (__v4di) __W, __M);
9114 	}
9115 	
9116 	extern __inline __m256i
9117 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9118 	_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9119 	{
9120 	  return (__m256i) __builtin_ia32_pmuldq256_mask ((__v8si) __X,
9121 							  (__v8si) __Y,
9122 							  (__v4di)
9123 							  _mm256_setzero_si256 (),
9124 							  __M);
9125 	}
9126 	
9127 	extern __inline __m128i
9128 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9129 	_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
9130 			    __m128i __Y)
9131 	{
9132 	  return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9133 							  (__v4si) __Y,
9134 							  (__v2di) __W, __M);
9135 	}
9136 	
9137 	extern __inline __m128i
9138 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9139 	_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
9140 	{
9141 	  return (__m128i) __builtin_ia32_pmuldq128_mask ((__v4si) __X,
9142 							  (__v4si) __Y,
9143 							  (__v2di)
9144 							  _mm_setzero_si128 (),
9145 							  __M);
9146 	}
9147 	
9148 	extern __inline __m256i
9149 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9150 	_mm256_permutexvar_epi64 (__m256i __X, __m256i __Y)
9151 	{
9152 	  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9153 							     (__v4di) __X,
9154 							     (__v4di)
9155 							     _mm256_setzero_si256 (),
9156 							     (__mmask8) -1);
9157 	}
9158 	
9159 	extern __inline __m256i
9160 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9161 	_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
9162 				       __m256i __Y)
9163 	{
9164 	  return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
9165 							     (__v4di) __X,
9166 							     (__v4di) __W,
9167 							     __M);
9168 	}
9169 	
9170 	extern __inline __m256i
9171 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9172 	_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
9173 			       __m256i __Y)
9174 	{
9175 	  return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9176 							   (__v8si) __Y,
9177 							   (__v4di) __W, __M);
9178 	}
9179 	
9180 	extern __inline __m256i
9181 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9182 	_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
9183 	{
9184 	  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9185 							     (__v8si) __X,
9186 							     (__v8si)
9187 							     _mm256_setzero_si256 (),
9188 							     __M);
9189 	}
9190 	
9191 	extern __inline __m256i
9192 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9193 	_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y)
9194 	{
9195 	  return (__m256i) __builtin_ia32_pmuludq256_mask ((__v8si) __X,
9196 							   (__v8si) __Y,
9197 							   (__v4di)
9198 							   _mm256_setzero_si256 (),
9199 							   __M);
9200 	}
9201 	
9202 	extern __inline __m128i
9203 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9204 	_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
9205 			    __m128i __Y)
9206 	{
9207 	  return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9208 							   (__v4si) __Y,
9209 							   (__v2di) __W, __M);
9210 	}
9211 	
9212 	extern __inline __m128i
9213 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9214 	_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
9215 	{
9216 	  return (__m128i) __builtin_ia32_pmuludq128_mask ((__v4si) __X,
9217 							   (__v4si) __Y,
9218 							   (__v2di)
9219 							   _mm_setzero_si128 (),
9220 							   __M);
9221 	}
9222 	
9223 	extern __inline __m256i
9224 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9225 	_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
9226 	{
9227 	  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9228 							     (__v8si) __X,
9229 							     (__v8si)
9230 							     _mm256_setzero_si256 (),
9231 							     (__mmask8) -1);
9232 	}
9233 	
9234 	extern __inline __m256i
9235 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9236 	_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
9237 				       __m256i __Y)
9238 	{
9239 	  return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
9240 							     (__v8si) __X,
9241 							     (__v8si) __W,
9242 							     __M);
9243 	}
9244 	
9245 	extern __inline __mmask8
9246 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9247 	_mm256_mask_cmpneq_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9248 	{
9249 	  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9250 							  (__v8si) __Y, 4,
9251 							  (__mmask8) __M);
9252 	}
9253 	
9254 	extern __inline __mmask8
9255 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9256 	_mm256_cmpneq_epu32_mask (__m256i __X, __m256i __Y)
9257 	{
9258 	  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9259 							  (__v8si) __Y, 4,
9260 							  (__mmask8) -1);
9261 	}
9262 	
9263 	extern __inline __mmask8
9264 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9265 	_mm256_mask_cmplt_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9266 	{
9267 	  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9268 							  (__v8si) __Y, 1,
9269 							  (__mmask8) __M);
9270 	}
9271 	
9272 	extern __inline __mmask8
9273 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9274 	_mm256_cmplt_epu32_mask (__m256i __X, __m256i __Y)
9275 	{
9276 	  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9277 							  (__v8si) __Y, 1,
9278 							  (__mmask8) -1);
9279 	}
9280 	
9281 	extern __inline __mmask8
9282 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9283 	_mm256_mask_cmpge_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9284 	{
9285 	  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9286 							  (__v8si) __Y, 5,
9287 							  (__mmask8) __M);
9288 	}
9289 	
9290 	extern __inline __mmask8
9291 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9292 	_mm256_cmpge_epu32_mask (__m256i __X, __m256i __Y)
9293 	{
9294 	  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9295 							  (__v8si) __Y, 5,
9296 							  (__mmask8) -1);
9297 	}
9298 	
9299 	extern __inline __mmask8
9300 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9301 	_mm256_mask_cmple_epu32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9302 	{
9303 	  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9304 							  (__v8si) __Y, 2,
9305 							  (__mmask8) __M);
9306 	}
9307 	
9308 	extern __inline __mmask8
9309 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9310 	_mm256_cmple_epu32_mask (__m256i __X, __m256i __Y)
9311 	{
9312 	  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
9313 							  (__v8si) __Y, 2,
9314 							  (__mmask8) -1);
9315 	}
9316 	
9317 	extern __inline __mmask8
9318 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9319 	_mm256_mask_cmpneq_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9320 	{
9321 	  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9322 							  (__v4di) __Y, 4,
9323 							  (__mmask8) __M);
9324 	}
9325 	
9326 	extern __inline __mmask8
9327 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9328 	_mm256_cmpneq_epu64_mask (__m256i __X, __m256i __Y)
9329 	{
9330 	  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9331 							  (__v4di) __Y, 4,
9332 							  (__mmask8) -1);
9333 	}
9334 	
9335 	extern __inline __mmask8
9336 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9337 	_mm256_mask_cmplt_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9338 	{
9339 	  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9340 							  (__v4di) __Y, 1,
9341 							  (__mmask8) __M);
9342 	}
9343 	
9344 	extern __inline __mmask8
9345 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9346 	_mm256_cmplt_epu64_mask (__m256i __X, __m256i __Y)
9347 	{
9348 	  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9349 							  (__v4di) __Y, 1,
9350 							  (__mmask8) -1);
9351 	}
9352 	
9353 	extern __inline __mmask8
9354 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9355 	_mm256_mask_cmpge_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9356 	{
9357 	  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9358 							  (__v4di) __Y, 5,
9359 							  (__mmask8) __M);
9360 	}
9361 	
9362 	extern __inline __mmask8
9363 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9364 	_mm256_cmpge_epu64_mask (__m256i __X, __m256i __Y)
9365 	{
9366 	  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9367 							  (__v4di) __Y, 5,
9368 							  (__mmask8) -1);
9369 	}
9370 	
9371 	extern __inline __mmask8
9372 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9373 	_mm256_mask_cmple_epu64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9374 	{
9375 	  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9376 							  (__v4di) __Y, 2,
9377 							  (__mmask8) __M);
9378 	}
9379 	
9380 	extern __inline __mmask8
9381 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9382 	_mm256_cmple_epu64_mask (__m256i __X, __m256i __Y)
9383 	{
9384 	  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
9385 							  (__v4di) __Y, 2,
9386 							  (__mmask8) -1);
9387 	}
9388 	
9389 	extern __inline __mmask8
9390 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9391 	_mm256_mask_cmpneq_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9392 	{
9393 	  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9394 							 (__v8si) __Y, 4,
9395 							 (__mmask8) __M);
9396 	}
9397 	
9398 	extern __inline __mmask8
9399 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9400 	_mm256_cmpneq_epi32_mask (__m256i __X, __m256i __Y)
9401 	{
9402 	  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9403 							 (__v8si) __Y, 4,
9404 							 (__mmask8) -1);
9405 	}
9406 	
9407 	extern __inline __mmask8
9408 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9409 	_mm256_mask_cmplt_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9410 	{
9411 	  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9412 							 (__v8si) __Y, 1,
9413 							 (__mmask8) __M);
9414 	}
9415 	
9416 	extern __inline __mmask8
9417 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9418 	_mm256_cmplt_epi32_mask (__m256i __X, __m256i __Y)
9419 	{
9420 	  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9421 							 (__v8si) __Y, 1,
9422 							 (__mmask8) -1);
9423 	}
9424 	
9425 	extern __inline __mmask8
9426 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9427 	_mm256_mask_cmpge_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9428 	{
9429 	  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9430 							 (__v8si) __Y, 5,
9431 							 (__mmask8) __M);
9432 	}
9433 	
9434 	extern __inline __mmask8
9435 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9436 	_mm256_cmpge_epi32_mask (__m256i __X, __m256i __Y)
9437 	{
9438 	  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9439 							 (__v8si) __Y, 5,
9440 							 (__mmask8) -1);
9441 	}
9442 	
9443 	extern __inline __mmask8
9444 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9445 	_mm256_mask_cmple_epi32_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9446 	{
9447 	  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9448 							 (__v8si) __Y, 2,
9449 							 (__mmask8) __M);
9450 	}
9451 	
9452 	extern __inline __mmask8
9453 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9454 	_mm256_cmple_epi32_mask (__m256i __X, __m256i __Y)
9455 	{
9456 	  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
9457 							 (__v8si) __Y, 2,
9458 							 (__mmask8) -1);
9459 	}
9460 	
9461 	extern __inline __mmask8
9462 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9463 	_mm256_mask_cmpneq_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9464 	{
9465 	  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9466 							 (__v4di) __Y, 4,
9467 							 (__mmask8) __M);
9468 	}
9469 	
9470 	extern __inline __mmask8
9471 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9472 	_mm256_cmpneq_epi64_mask (__m256i __X, __m256i __Y)
9473 	{
9474 	  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9475 							 (__v4di) __Y, 4,
9476 							 (__mmask8) -1);
9477 	}
9478 	
9479 	extern __inline __mmask8
9480 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9481 	_mm256_mask_cmplt_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9482 	{
9483 	  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9484 							 (__v4di) __Y, 1,
9485 							 (__mmask8) __M);
9486 	}
9487 	
9488 	extern __inline __mmask8
9489 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9490 	_mm256_cmplt_epi64_mask (__m256i __X, __m256i __Y)
9491 	{
9492 	  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9493 							 (__v4di) __Y, 1,
9494 							 (__mmask8) -1);
9495 	}
9496 	
9497 	extern __inline __mmask8
9498 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9499 	_mm256_mask_cmpge_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9500 	{
9501 	  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9502 							 (__v4di) __Y, 5,
9503 							 (__mmask8) __M);
9504 	}
9505 	
9506 	extern __inline __mmask8
9507 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9508 	_mm256_cmpge_epi64_mask (__m256i __X, __m256i __Y)
9509 	{
9510 	  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9511 							 (__v4di) __Y, 5,
9512 							 (__mmask8) -1);
9513 	}
9514 	
9515 	extern __inline __mmask8
9516 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9517 	_mm256_mask_cmple_epi64_mask (__mmask8 __M, __m256i __X, __m256i __Y)
9518 	{
9519 	  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9520 							 (__v4di) __Y, 2,
9521 							 (__mmask8) __M);
9522 	}
9523 	
9524 	extern __inline __mmask8
9525 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9526 	_mm256_cmple_epi64_mask (__m256i __X, __m256i __Y)
9527 	{
9528 	  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
9529 							 (__v4di) __Y, 2,
9530 							 (__mmask8) -1);
9531 	}
9532 	
9533 	extern __inline __mmask8
9534 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9535 	_mm_mask_cmpneq_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9536 	{
9537 	  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9538 							  (__v4si) __Y, 4,
9539 							  (__mmask8) __M);
9540 	}
9541 	
9542 	extern __inline __mmask8
9543 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9544 	_mm_cmpneq_epu32_mask (__m128i __X, __m128i __Y)
9545 	{
9546 	  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9547 							  (__v4si) __Y, 4,
9548 							  (__mmask8) -1);
9549 	}
9550 	
9551 	extern __inline __mmask8
9552 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9553 	_mm_mask_cmplt_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9554 	{
9555 	  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9556 							  (__v4si) __Y, 1,
9557 							  (__mmask8) __M);
9558 	}
9559 	
9560 	extern __inline __mmask8
9561 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9562 	_mm_cmplt_epu32_mask (__m128i __X, __m128i __Y)
9563 	{
9564 	  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9565 							  (__v4si) __Y, 1,
9566 							  (__mmask8) -1);
9567 	}
9568 	
9569 	extern __inline __mmask8
9570 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9571 	_mm_mask_cmpge_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9572 	{
9573 	  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9574 							  (__v4si) __Y, 5,
9575 							  (__mmask8) __M);
9576 	}
9577 	
9578 	extern __inline __mmask8
9579 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9580 	_mm_cmpge_epu32_mask (__m128i __X, __m128i __Y)
9581 	{
9582 	  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9583 							  (__v4si) __Y, 5,
9584 							  (__mmask8) -1);
9585 	}
9586 	
9587 	extern __inline __mmask8
9588 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9589 	_mm_mask_cmple_epu32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9590 	{
9591 	  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9592 							  (__v4si) __Y, 2,
9593 							  (__mmask8) __M);
9594 	}
9595 	
9596 	extern __inline __mmask8
9597 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9598 	_mm_cmple_epu32_mask (__m128i __X, __m128i __Y)
9599 	{
9600 	  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
9601 							  (__v4si) __Y, 2,
9602 							  (__mmask8) -1);
9603 	}
9604 	
9605 	extern __inline __mmask8
9606 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9607 	_mm_mask_cmpneq_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9608 	{
9609 	  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9610 							  (__v2di) __Y, 4,
9611 							  (__mmask8) __M);
9612 	}
9613 	
9614 	extern __inline __mmask8
9615 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9616 	_mm_cmpneq_epu64_mask (__m128i __X, __m128i __Y)
9617 	{
9618 	  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9619 							  (__v2di) __Y, 4,
9620 							  (__mmask8) -1);
9621 	}
9622 	
9623 	extern __inline __mmask8
9624 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9625 	_mm_mask_cmplt_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9626 	{
9627 	  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9628 							  (__v2di) __Y, 1,
9629 							  (__mmask8) __M);
9630 	}
9631 	
9632 	extern __inline __mmask8
9633 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9634 	_mm_cmplt_epu64_mask (__m128i __X, __m128i __Y)
9635 	{
9636 	  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9637 							  (__v2di) __Y, 1,
9638 							  (__mmask8) -1);
9639 	}
9640 	
9641 	extern __inline __mmask8
9642 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9643 	_mm_mask_cmpge_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9644 	{
9645 	  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9646 							  (__v2di) __Y, 5,
9647 							  (__mmask8) __M);
9648 	}
9649 	
9650 	extern __inline __mmask8
9651 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9652 	_mm_cmpge_epu64_mask (__m128i __X, __m128i __Y)
9653 	{
9654 	  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9655 							  (__v2di) __Y, 5,
9656 							  (__mmask8) -1);
9657 	}
9658 	
9659 	extern __inline __mmask8
9660 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9661 	_mm_mask_cmple_epu64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9662 	{
9663 	  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9664 							  (__v2di) __Y, 2,
9665 							  (__mmask8) __M);
9666 	}
9667 	
9668 	extern __inline __mmask8
9669 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9670 	_mm_cmple_epu64_mask (__m128i __X, __m128i __Y)
9671 	{
9672 	  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
9673 							  (__v2di) __Y, 2,
9674 							  (__mmask8) -1);
9675 	}
9676 	
9677 	extern __inline __mmask8
9678 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9679 	_mm_mask_cmpneq_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9680 	{
9681 	  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9682 							 (__v4si) __Y, 4,
9683 							 (__mmask8) __M);
9684 	}
9685 	
9686 	extern __inline __mmask8
9687 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9688 	_mm_cmpneq_epi32_mask (__m128i __X, __m128i __Y)
9689 	{
9690 	  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9691 							 (__v4si) __Y, 4,
9692 							 (__mmask8) -1);
9693 	}
9694 	
9695 	extern __inline __mmask8
9696 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9697 	_mm_mask_cmplt_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9698 	{
9699 	  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9700 							 (__v4si) __Y, 1,
9701 							 (__mmask8) __M);
9702 	}
9703 	
9704 	extern __inline __mmask8
9705 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9706 	_mm_cmplt_epi32_mask (__m128i __X, __m128i __Y)
9707 	{
9708 	  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9709 							 (__v4si) __Y, 1,
9710 							 (__mmask8) -1);
9711 	}
9712 	
9713 	extern __inline __mmask8
9714 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9715 	_mm_mask_cmpge_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9716 	{
9717 	  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9718 							 (__v4si) __Y, 5,
9719 							 (__mmask8) __M);
9720 	}
9721 	
9722 	extern __inline __mmask8
9723 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9724 	_mm_cmpge_epi32_mask (__m128i __X, __m128i __Y)
9725 	{
9726 	  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9727 							 (__v4si) __Y, 5,
9728 							 (__mmask8) -1);
9729 	}
9730 	
9731 	extern __inline __mmask8
9732 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9733 	_mm_mask_cmple_epi32_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9734 	{
9735 	  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9736 							 (__v4si) __Y, 2,
9737 							 (__mmask8) __M);
9738 	}
9739 	
9740 	extern __inline __mmask8
9741 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9742 	_mm_cmple_epi32_mask (__m128i __X, __m128i __Y)
9743 	{
9744 	  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
9745 							 (__v4si) __Y, 2,
9746 							 (__mmask8) -1);
9747 	}
9748 	
9749 	extern __inline __mmask8
9750 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9751 	_mm_mask_cmpneq_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9752 	{
9753 	  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9754 							 (__v2di) __Y, 4,
9755 							 (__mmask8) __M);
9756 	}
9757 	
9758 	extern __inline __mmask8
9759 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9760 	_mm_cmpneq_epi64_mask (__m128i __X, __m128i __Y)
9761 	{
9762 	  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9763 							 (__v2di) __Y, 4,
9764 							 (__mmask8) -1);
9765 	}
9766 	
9767 	extern __inline __mmask8
9768 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9769 	_mm_mask_cmplt_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9770 	{
9771 	  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9772 							 (__v2di) __Y, 1,
9773 							 (__mmask8) __M);
9774 	}
9775 	
9776 	extern __inline __mmask8
9777 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9778 	_mm_cmplt_epi64_mask (__m128i __X, __m128i __Y)
9779 	{
9780 	  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9781 							 (__v2di) __Y, 1,
9782 							 (__mmask8) -1);
9783 	}
9784 	
9785 	extern __inline __mmask8
9786 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9787 	_mm_mask_cmpge_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9788 	{
9789 	  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9790 							 (__v2di) __Y, 5,
9791 							 (__mmask8) __M);
9792 	}
9793 	
9794 	extern __inline __mmask8
9795 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9796 	_mm_cmpge_epi64_mask (__m128i __X, __m128i __Y)
9797 	{
9798 	  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9799 							 (__v2di) __Y, 5,
9800 							 (__mmask8) -1);
9801 	}
9802 	
9803 	extern __inline __mmask8
9804 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9805 	_mm_mask_cmple_epi64_mask (__mmask8 __M, __m128i __X, __m128i __Y)
9806 	{
9807 	  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9808 							 (__v2di) __Y, 2,
9809 							 (__mmask8) __M);
9810 	}
9811 	
9812 	extern __inline __mmask8
9813 	  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9814 	_mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
9815 	{
9816 	  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
9817 							 (__v2di) __Y, 2,
9818 							 (__mmask8) -1);
9819 	}
9820 	
9821 	#ifdef __OPTIMIZE__
9822 	extern __inline __m256i
9823 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9824 	_mm256_permutex_epi64 (__m256i __X, const int __I)
9825 	{
9826 	  return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9827 						      __I,
9828 						      (__v4di)
9829 						      _mm256_setzero_si256(),
9830 						      (__mmask8) -1);
9831 	}
9832 	
9833 	extern __inline __m256i
9834 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9835 	_mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
9836 				    __m256i __X, const int __I)
9837 	{
9838 	  return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9839 							  __I,
9840 							  (__v4di) __W,
9841 							  (__mmask8) __M);
9842 	}
9843 	
9844 	extern __inline __m256i
9845 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9846 	_mm256_maskz_permutex_epi64 (__mmask8 __M, __m256i __X, const int __I)
9847 	{
9848 	  return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
9849 							  __I,
9850 							  (__v4di)
9851 							  _mm256_setzero_si256 (),
9852 							  (__mmask8) __M);
9853 	}
9854 	
9855 	extern __inline __m256d
9856 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9857 	_mm256_mask_shuffle_pd (__m256d __W, __mmask8 __U, __m256d __A,
9858 				__m256d __B, const int __imm)
9859 	{
9860 	  return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9861 							  (__v4df) __B, __imm,
9862 							  (__v4df) __W,
9863 							  (__mmask8) __U);
9864 	}
9865 	
9866 	extern __inline __m256d
9867 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9868 	_mm256_maskz_shuffle_pd (__mmask8 __U, __m256d __A, __m256d __B,
9869 				 const int __imm)
9870 	{
9871 	  return (__m256d) __builtin_ia32_shufpd256_mask ((__v4df) __A,
9872 							  (__v4df) __B, __imm,
9873 							  (__v4df)
9874 							  _mm256_setzero_pd (),
9875 							  (__mmask8) __U);
9876 	}
9877 	
9878 	extern __inline __m128d
9879 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9880 	_mm_mask_shuffle_pd (__m128d __W, __mmask8 __U, __m128d __A,
9881 			     __m128d __B, const int __imm)
9882 	{
9883 	  return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9884 							  (__v2df) __B, __imm,
9885 							  (__v2df) __W,
9886 							  (__mmask8) __U);
9887 	}
9888 	
9889 	extern __inline __m128d
9890 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9891 	_mm_maskz_shuffle_pd (__mmask8 __U, __m128d __A, __m128d __B,
9892 			      const int __imm)
9893 	{
9894 	  return (__m128d) __builtin_ia32_shufpd128_mask ((__v2df) __A,
9895 							  (__v2df) __B, __imm,
9896 							  (__v2df)
9897 							  _mm_setzero_pd (),
9898 							  (__mmask8) __U);
9899 	}
9900 	
9901 	extern __inline __m256
9902 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9903 	_mm256_mask_shuffle_ps (__m256 __W, __mmask8 __U, __m256 __A,
9904 				__m256 __B, const int __imm)
9905 	{
9906 	  return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9907 							 (__v8sf) __B, __imm,
9908 							 (__v8sf) __W,
9909 							 (__mmask8) __U);
9910 	}
9911 	
9912 	extern __inline __m256
9913 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9914 	_mm256_maskz_shuffle_ps (__mmask8 __U, __m256 __A, __m256 __B,
9915 				 const int __imm)
9916 	{
9917 	  return (__m256) __builtin_ia32_shufps256_mask ((__v8sf) __A,
9918 							 (__v8sf) __B, __imm,
9919 							 (__v8sf)
9920 							 _mm256_setzero_ps (),
9921 							 (__mmask8) __U);
9922 	}
9923 	
9924 	extern __inline __m128
9925 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9926 	_mm_mask_shuffle_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
9927 			     const int __imm)
9928 	{
9929 	  return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9930 							 (__v4sf) __B, __imm,
9931 							 (__v4sf) __W,
9932 							 (__mmask8) __U);
9933 	}
9934 	
9935 	extern __inline __m128
9936 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9937 	_mm_maskz_shuffle_ps (__mmask8 __U, __m128 __A, __m128 __B,
9938 			      const int __imm)
9939 	{
9940 	  return (__m128) __builtin_ia32_shufps128_mask ((__v4sf) __A,
9941 							 (__v4sf) __B, __imm,
9942 							 (__v4sf)
9943 							 _mm_setzero_ps (),
9944 							 (__mmask8) __U);
9945 	}
9946 	
9947 	extern __inline __m256i
9948 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9949 	_mm256_inserti32x4 (__m256i __A, __m128i __B, const int __imm)
9950 	{
9951 	  return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9952 								(__v4si) __B,
9953 								__imm,
9954 								(__v8si)
9955 								_mm256_setzero_si256 (),
9956 								(__mmask8) -1);
9957 	}
9958 	
9959 	extern __inline __m256i
9960 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9961 	_mm256_mask_inserti32x4 (__m256i __W, __mmask8 __U, __m256i __A,
9962 				 __m128i __B, const int __imm)
9963 	{
9964 	  return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9965 								(__v4si) __B,
9966 								__imm,
9967 								(__v8si) __W,
9968 								(__mmask8)
9969 								__U);
9970 	}
9971 	
9972 	extern __inline __m256i
9973 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9974 	_mm256_maskz_inserti32x4 (__mmask8 __U, __m256i __A, __m128i __B,
9975 				  const int __imm)
9976 	{
9977 	  return (__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si) __A,
9978 								(__v4si) __B,
9979 								__imm,
9980 								(__v8si)
9981 								_mm256_setzero_si256 (),
9982 								(__mmask8)
9983 								__U);
9984 	}
9985 	
9986 	extern __inline __m256
9987 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9988 	_mm256_insertf32x4 (__m256 __A, __m128 __B, const int __imm)
9989 	{
9990 	  return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
9991 							       (__v4sf) __B,
9992 							       __imm,
9993 							       (__v8sf)
9994 							       _mm256_setzero_ps (),
9995 							       (__mmask8) -1);
9996 	}
9997 	
9998 	extern __inline __m256
9999 	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10000	_mm256_mask_insertf32x4 (__m256 __W, __mmask8 __U, __m256 __A,
10001				 __m128 __B, const int __imm)
10002	{
10003	  return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
10004							       (__v4sf) __B,
10005							       __imm,
10006							       (__v8sf) __W,
10007							       (__mmask8) __U);
10008	}
10009	
10010	extern __inline __m256
10011	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10012	_mm256_maskz_insertf32x4 (__mmask8 __U, __m256 __A, __m128 __B,
10013				  const int __imm)
10014	{
10015	  return (__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf) __A,
10016							       (__v4sf) __B,
10017							       __imm,
10018							       (__v8sf)
10019							       _mm256_setzero_ps (),
10020							       (__mmask8) __U);
10021	}
10022	
10023	extern __inline __m128i
10024	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10025	_mm256_extracti32x4_epi32 (__m256i __A, const int __imm)
10026	{
10027	  return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10028								 __imm,
10029								 (__v4si)
10030								 _mm_setzero_si128 (),
10031								 (__mmask8) -1);
10032	}
10033	
10034	extern __inline __m128i
10035	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10036	_mm256_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m256i __A,
10037					const int __imm)
10038	{
10039	  return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10040								 __imm,
10041								 (__v4si) __W,
10042								 (__mmask8)
10043								 __U);
10044	}
10045	
10046	extern __inline __m128i
10047	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10048	_mm256_maskz_extracti32x4_epi32 (__mmask8 __U, __m256i __A,
10049					 const int __imm)
10050	{
10051	  return (__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si) __A,
10052								 __imm,
10053								 (__v4si)
10054								 _mm_setzero_si128 (),
10055								 (__mmask8)
10056								 __U);
10057	}
10058	
10059	extern __inline __m128
10060	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10061	_mm256_extractf32x4_ps (__m256 __A, const int __imm)
10062	{
10063	  return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10064								__imm,
10065								(__v4sf)
10066								_mm_setzero_ps (),
10067								(__mmask8) -1);
10068	}
10069	
10070	extern __inline __m128
10071	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10072	_mm256_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m256 __A,
10073				     const int __imm)
10074	{
10075	  return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10076								__imm,
10077								(__v4sf) __W,
10078								(__mmask8)
10079								__U);
10080	}
10081	
10082	extern __inline __m128
10083	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10084	_mm256_maskz_extractf32x4_ps (__mmask8 __U, __m256 __A,
10085				      const int __imm)
10086	{
10087	  return (__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf) __A,
10088								__imm,
10089								(__v4sf)
10090								_mm_setzero_ps (),
10091								(__mmask8)
10092								__U);
10093	}
10094	
10095	extern __inline __m256i
10096	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10097	_mm256_shuffle_i64x2 (__m256i __A, __m256i __B, const int __imm)
10098	{
10099	  return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10100							       (__v4di) __B,
10101							       __imm,
10102							       (__v4di)
10103							       _mm256_setzero_si256 (),
10104							       (__mmask8) -1);
10105	}
10106	
10107	extern __inline __m256i
10108	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10109	_mm256_mask_shuffle_i64x2 (__m256i __W, __mmask8 __U, __m256i __A,
10110				   __m256i __B, const int __imm)
10111	{
10112	  return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10113							       (__v4di) __B,
10114							       __imm,
10115							       (__v4di) __W,
10116							       (__mmask8) __U);
10117	}
10118	
10119	extern __inline __m256i
10120	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10121	_mm256_maskz_shuffle_i64x2 (__mmask8 __U, __m256i __A, __m256i __B,
10122				    const int __imm)
10123	{
10124	  return (__m256i) __builtin_ia32_shuf_i64x2_256_mask ((__v4di) __A,
10125							       (__v4di) __B,
10126							       __imm,
10127							       (__v4di)
10128							       _mm256_setzero_si256 (),
10129							       (__mmask8) __U);
10130	}
10131	
10132	extern __inline __m256i
10133	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10134	_mm256_shuffle_i32x4 (__m256i __A, __m256i __B, const int __imm)
10135	{
10136	  return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10137							       (__v8si) __B,
10138							       __imm,
10139							       (__v8si)
10140							       _mm256_setzero_si256 (),
10141							       (__mmask8) -1);
10142	}
10143	
10144	extern __inline __m256i
10145	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10146	_mm256_mask_shuffle_i32x4 (__m256i __W, __mmask8 __U, __m256i __A,
10147				   __m256i __B, const int __imm)
10148	{
10149	  return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10150							       (__v8si) __B,
10151							       __imm,
10152							       (__v8si) __W,
10153							       (__mmask8) __U);
10154	}
10155	
10156	extern __inline __m256i
10157	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10158	_mm256_maskz_shuffle_i32x4 (__mmask8 __U, __m256i __A, __m256i __B,
10159				    const int __imm)
10160	{
10161	  return (__m256i) __builtin_ia32_shuf_i32x4_256_mask ((__v8si) __A,
10162							       (__v8si) __B,
10163							       __imm,
10164							       (__v8si)
10165							       _mm256_setzero_si256 (),
10166							       (__mmask8) __U);
10167	}
10168	
10169	extern __inline __m256d
10170	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10171	_mm256_shuffle_f64x2 (__m256d __A, __m256d __B, const int __imm)
10172	{
10173	  return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10174							       (__v4df) __B,
10175							       __imm,
10176							       (__v4df)
10177							       _mm256_setzero_pd (),
10178							       (__mmask8) -1);
10179	}
10180	
10181	extern __inline __m256d
10182	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10183	_mm256_mask_shuffle_f64x2 (__m256d __W, __mmask8 __U, __m256d __A,
10184				   __m256d __B, const int __imm)
10185	{
10186	  return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10187							       (__v4df) __B,
10188							       __imm,
10189							       (__v4df) __W,
10190							       (__mmask8) __U);
10191	}
10192	
10193	extern __inline __m256d
10194	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10195	_mm256_maskz_shuffle_f64x2 (__mmask8 __U, __m256d __A, __m256d __B,
10196				    const int __imm)
10197	{
10198	  return (__m256d) __builtin_ia32_shuf_f64x2_256_mask ((__v4df) __A,
10199							       (__v4df) __B,
10200							       __imm,
10201							       (__v4df)
10202							       _mm256_setzero_pd (),
10203							       (__mmask8) __U);
10204	}
10205	
10206	extern __inline __m256
10207	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10208	_mm256_shuffle_f32x4 (__m256 __A, __m256 __B, const int __imm)
10209	{
10210	  return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10211							      (__v8sf) __B,
10212							      __imm,
10213							      (__v8sf)
10214							      _mm256_setzero_ps (),
10215							      (__mmask8) -1);
10216	}
10217	
10218	extern __inline __m256
10219	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10220	_mm256_mask_shuffle_f32x4 (__m256 __W, __mmask8 __U, __m256 __A,
10221				   __m256 __B, const int __imm)
10222	{
10223	  return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10224							      (__v8sf) __B,
10225							      __imm,
10226							      (__v8sf) __W,
10227							      (__mmask8) __U);
10228	}
10229	
10230	extern __inline __m256
10231	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10232	_mm256_maskz_shuffle_f32x4 (__mmask8 __U, __m256 __A, __m256 __B,
10233				    const int __imm)
10234	{
10235	  return (__m256) __builtin_ia32_shuf_f32x4_256_mask ((__v8sf) __A,
10236							      (__v8sf) __B,
10237							      __imm,
10238							      (__v8sf)
10239							      _mm256_setzero_ps (),
10240							      (__mmask8) __U);
10241	}
10242	
10243	extern __inline __m256d
10244	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10245	_mm256_fixupimm_pd (__m256d __A, __m256d __B, __m256i __C,
10246			    const int __imm)
10247	{
10248	  return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
10249							      (__v4df) __B,
10250							      (__v4di) __C,
10251							      __imm,
10252							      (__mmask8) -1);
10253	}
10254	
10255	extern __inline __m256d
10256	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10257	_mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
10258				 __m256i __C, const int __imm)
10259	{
10260	  return (__m256d) __builtin_ia32_fixupimmpd256_mask ((__v4df) __A,
10261							      (__v4df) __B,
10262							      (__v4di) __C,
10263							      __imm,
10264							      (__mmask8) __U);
10265	}
10266	
10267	extern __inline __m256d
10268	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10269	_mm256_maskz_fixupimm_pd (__mmask8 __U, __m256d __A, __m256d __B,
10270				  __m256i __C, const int __imm)
10271	{
10272	  return (__m256d) __builtin_ia32_fixupimmpd256_maskz ((__v4df) __A,
10273							       (__v4df) __B,
10274							       (__v4di) __C,
10275							       __imm,
10276							       (__mmask8) __U);
10277	}
10278	
10279	extern __inline __m256
10280	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10281	_mm256_fixupimm_ps (__m256 __A, __m256 __B, __m256i __C,
10282			    const int __imm)
10283	{
10284	  return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
10285							     (__v8sf) __B,
10286							     (__v8si) __C,
10287							     __imm,
10288							     (__mmask8) -1);
10289	}
10290	
10291	extern __inline __m256
10292	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10293	_mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
10294				 __m256i __C, const int __imm)
10295	{
10296	  return (__m256) __builtin_ia32_fixupimmps256_mask ((__v8sf) __A,
10297							     (__v8sf) __B,
10298							     (__v8si) __C,
10299							     __imm,
10300							     (__mmask8) __U);
10301	}
10302	
10303	extern __inline __m256
10304	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10305	_mm256_maskz_fixupimm_ps (__mmask8 __U, __m256 __A, __m256 __B,
10306				  __m256i __C, const int __imm)
10307	{
10308	  return (__m256) __builtin_ia32_fixupimmps256_maskz ((__v8sf) __A,
10309							      (__v8sf) __B,
10310							      (__v8si) __C,
10311							      __imm,
10312							      (__mmask8) __U);
10313	}
10314	
10315	extern __inline __m128d
10316	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10317	_mm_fixupimm_pd (__m128d __A, __m128d __B, __m128i __C,
10318			 const int __imm)
10319	{
10320	  return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
10321							      (__v2df) __B,
10322							      (__v2di) __C,
10323							      __imm,
10324							      (__mmask8) -1);
10325	}
10326	
10327	extern __inline __m128d
10328	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10329	_mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
10330			      __m128i __C, const int __imm)
10331	{
10332	  return (__m128d) __builtin_ia32_fixupimmpd128_mask ((__v2df) __A,
10333							      (__v2df) __B,
10334							      (__v2di) __C,
10335							      __imm,
10336							      (__mmask8) __U);
10337	}
10338	
10339	extern __inline __m128d
10340	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10341	_mm_maskz_fixupimm_pd (__mmask8 __U, __m128d __A, __m128d __B,
10342			       __m128i __C, const int __imm)
10343	{
10344	  return (__m128d) __builtin_ia32_fixupimmpd128_maskz ((__v2df) __A,
10345							       (__v2df) __B,
10346							       (__v2di) __C,
10347							       __imm,
10348							       (__mmask8) __U);
10349	}
10350	
10351	extern __inline __m128
10352	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10353	_mm_fixupimm_ps (__m128 __A, __m128 __B, __m128i __C, const int __imm)
10354	{
10355	  return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
10356							     (__v4sf) __B,
10357							     (__v4si) __C,
10358							     __imm,
10359							     (__mmask8) -1);
10360	}
10361	
10362	extern __inline __m128
10363	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10364	_mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
10365			      __m128i __C, const int __imm)
10366	{
10367	  return (__m128) __builtin_ia32_fixupimmps128_mask ((__v4sf) __A,
10368							     (__v4sf) __B,
10369							     (__v4si) __C,
10370							     __imm,
10371							     (__mmask8) __U);
10372	}
10373	
10374	extern __inline __m128
10375	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10376	_mm_maskz_fixupimm_ps (__mmask8 __U, __m128 __A, __m128 __B,
10377			       __m128i __C, const int __imm)
10378	{
10379	  return (__m128) __builtin_ia32_fixupimmps128_maskz ((__v4sf) __A,
10380							      (__v4sf) __B,
10381							      (__v4si) __C,
10382							      __imm,
10383							      (__mmask8) __U);
10384	}
10385	
10386	extern __inline __m256i
10387	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10388	_mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
10389				const int __imm)
10390	{
10391	  return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
10392							  (__v8si) __W,
10393							  (__mmask8) __U);
10394	}
10395	
10396	extern __inline __m256i
10397	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10398	_mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
10399	{
10400	  return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
10401							  (__v8si)
10402							  _mm256_setzero_si256 (),
10403							  (__mmask8) __U);
10404	}
10405	
10406	extern __inline __m128i
10407	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10408	_mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
10409			     const int __imm)
10410	{
10411	  return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
10412							  (__v4si) __W,
10413							  (__mmask8) __U);
10414	}
10415	
10416	extern __inline __m128i
10417	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10418	_mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
10419	{
10420	  return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
10421							  (__v4si)
10422							  _mm_setzero_si128 (),
10423							  (__mmask8) __U);
10424	}
10425	
10426	extern __inline __m256i
10427	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10428	_mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
10429				const int __imm)
10430	{
10431	  return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
10432							  (__v4di) __W,
10433							  (__mmask8) __U);
10434	}
10435	
10436	extern __inline __m256i
10437	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10438	_mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
10439	{
10440	  return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
10441							  (__v4di)
10442							  _mm256_setzero_si256 (),
10443							  (__mmask8) __U);
10444	}
10445	
10446	extern __inline __m128i
10447	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10448	_mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
10449			     const int __imm)
10450	{
10451	  return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
10452							  (__v2di) __W,
10453							  (__mmask8) __U);
10454	}
10455	
10456	extern __inline __m128i
10457	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10458	_mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
10459	{
10460	  return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
10461							  (__v2di)
10462							  _mm_setzero_si128 (),
10463							  (__mmask8) __U);
10464	}
10465	
10466	extern __inline __m256i
10467	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10468	_mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
10469				   const int __imm)
10470	{
10471	  return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
10472							     (__v4di) __B,
10473							     (__v4di) __C, __imm,
10474							     (__mmask8) -1);
10475	}
10476	
10477	extern __inline __m256i
10478	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10479	_mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
10480					__m256i __B, __m256i __C,
10481					const int __imm)
10482	{
10483	  return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
10484							     (__v4di) __B,
10485							     (__v4di) __C, __imm,
10486							     (__mmask8) __U);
10487	}
10488	
10489	extern __inline __m256i
10490	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10491	_mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
10492					 __m256i __B, __m256i __C,
10493					 const int __imm)
10494	{
10495	  return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
10496							      (__v4di) __B,
10497							      (__v4di) __C,
10498							      __imm,
10499							      (__mmask8) __U);
10500	}
10501	
10502	extern __inline __m256i
10503	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10504	_mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
10505				   const int __imm)
10506	{
10507	  return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
10508							     (__v8si) __B,
10509							     (__v8si) __C, __imm,
10510							     (__mmask8) -1);
10511	}
10512	
10513	extern __inline __m256i
10514	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10515	_mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
10516					__m256i __B, __m256i __C,
10517					const int __imm)
10518	{
10519	  return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
10520							     (__v8si) __B,
10521							     (__v8si) __C, __imm,
10522							     (__mmask8) __U);
10523	}
10524	
10525	extern __inline __m256i
10526	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10527	_mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
10528					 __m256i __B, __m256i __C,
10529					 const int __imm)
10530	{
10531	  return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
10532							      (__v8si) __B,
10533							      (__v8si) __C,
10534							      __imm,
10535							      (__mmask8) __U);
10536	}
10537	
10538	extern __inline __m128i
10539	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10540	_mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
10541				const int __imm)
10542	{
10543	  return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
10544							     (__v2di) __B,
10545							     (__v2di) __C, __imm,
10546							     (__mmask8) -1);
10547	}
10548	
10549	extern __inline __m128i
10550	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10551	_mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
10552				     __m128i __B, __m128i __C, const int __imm)
10553	{
10554	  return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
10555							     (__v2di) __B,
10556							     (__v2di) __C, __imm,
10557							     (__mmask8) __U);
10558	}
10559	
10560	extern __inline __m128i
10561	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10562	_mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
10563				      __m128i __B, __m128i __C, const int __imm)
10564	{
10565	  return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
10566							      (__v2di) __B,
10567							      (__v2di) __C,
10568							      __imm,
10569							      (__mmask8) __U);
10570	}
10571	
10572	extern __inline __m128i
10573	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10574	_mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
10575				const int __imm)
10576	{
10577	  return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
10578							     (__v4si) __B,
10579							     (__v4si) __C, __imm,
10580							     (__mmask8) -1);
10581	}
10582	
10583	extern __inline __m128i
10584	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10585	_mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
10586				     __m128i __B, __m128i __C, const int __imm)
10587	{
10588	  return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
10589							     (__v4si) __B,
10590							     (__v4si) __C, __imm,
10591							     (__mmask8) __U);
10592	}
10593	
10594	extern __inline __m128i
10595	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10596	_mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
10597				      __m128i __B, __m128i __C, const int __imm)
10598	{
10599	  return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
10600							      (__v4si) __B,
10601							      (__v4si) __C,
10602							      __imm,
10603							      (__mmask8) __U);
10604	}
10605	
10606	extern __inline __m256
10607	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10608	_mm256_roundscale_ps (__m256 __A, const int __imm)
10609	{
10610	  return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10611							      __imm,
10612							      (__v8sf)
10613							      _mm256_setzero_ps (),
10614							      (__mmask8) -1);
10615	}
10616	
10617	extern __inline __m256
10618	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10619	_mm256_mask_roundscale_ps (__m256 __W, __mmask8 __U, __m256 __A,
10620				   const int __imm)
10621	{
10622	  return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10623							      __imm,
10624							      (__v8sf) __W,
10625							      (__mmask8) __U);
10626	}
10627	
10628	extern __inline __m256
10629	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10630	_mm256_maskz_roundscale_ps (__mmask8 __U, __m256 __A, const int __imm)
10631	{
10632	  return (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,
10633							      __imm,
10634							      (__v8sf)
10635							      _mm256_setzero_ps (),
10636							      (__mmask8) __U);
10637	}
10638	
10639	extern __inline __m256d
10640	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10641	_mm256_roundscale_pd (__m256d __A, const int __imm)
10642	{
10643	  return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10644							       __imm,
10645							       (__v4df)
10646							       _mm256_setzero_pd (),
10647							       (__mmask8) -1);
10648	}
10649	
10650	extern __inline __m256d
10651	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10652	_mm256_mask_roundscale_pd (__m256d __W, __mmask8 __U, __m256d __A,
10653				   const int __imm)
10654	{
10655	  return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10656							       __imm,
10657							       (__v4df) __W,
10658							       (__mmask8) __U);
10659	}
10660	
10661	extern __inline __m256d
10662	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10663	_mm256_maskz_roundscale_pd (__mmask8 __U, __m256d __A, const int __imm)
10664	{
10665	  return (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A,
10666							       __imm,
10667							       (__v4df)
10668							       _mm256_setzero_pd (),
10669							       (__mmask8) __U);
10670	}
10671	
10672	extern __inline __m128
10673	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10674	_mm_roundscale_ps (__m128 __A, const int __imm)
10675	{
10676	  return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10677							      __imm,
10678							      (__v4sf)
10679							      _mm_setzero_ps (),
10680							      (__mmask8) -1);
10681	}
10682	
10683	extern __inline __m128
10684	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10685	_mm_mask_roundscale_ps (__m128 __W, __mmask8 __U, __m128 __A,
10686				const int __imm)
10687	{
10688	  return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10689							      __imm,
10690							      (__v4sf) __W,
10691							      (__mmask8) __U);
10692	}
10693	
10694	extern __inline __m128
10695	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10696	_mm_maskz_roundscale_ps (__mmask8 __U, __m128 __A, const int __imm)
10697	{
10698	  return (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A,
10699							      __imm,
10700							      (__v4sf)
10701							      _mm_setzero_ps (),
10702							      (__mmask8) __U);
10703	}
10704	
10705	extern __inline __m128d
10706	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10707	_mm_roundscale_pd (__m128d __A, const int __imm)
10708	{
10709	  return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10710							       __imm,
10711							       (__v2df)
10712							       _mm_setzero_pd (),
10713							       (__mmask8) -1);
10714	}
10715	
10716	extern __inline __m128d
10717	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10718	_mm_mask_roundscale_pd (__m128d __W, __mmask8 __U, __m128d __A,
10719				const int __imm)
10720	{
10721	  return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10722							       __imm,
10723							       (__v2df) __W,
10724							       (__mmask8) __U);
10725	}
10726	
10727	extern __inline __m128d
10728	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10729	_mm_maskz_roundscale_pd (__mmask8 __U, __m128d __A, const int __imm)
10730	{
10731	  return (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A,
10732							       __imm,
10733							       (__v2df)
10734							       _mm_setzero_pd (),
10735							       (__mmask8) __U);
10736	}
10737	
10738	extern __inline __m256
10739	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10740	_mm256_getmant_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
10741			   _MM_MANTISSA_SIGN_ENUM __C)
10742	{
10743	  return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10744							    (__C << 2) | __B,
10745							    (__v8sf)
10746							    _mm256_setzero_ps (),
10747							    (__mmask8) -1);
10748	}
10749	
10750	extern __inline __m256
10751	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10752	_mm256_mask_getmant_ps (__m256 __W, __mmask8 __U, __m256 __A,
10753				_MM_MANTISSA_NORM_ENUM __B,
10754				_MM_MANTISSA_SIGN_ENUM __C)
10755	{
10756	  return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10757							    (__C << 2) | __B,
10758							    (__v8sf) __W,
10759							    (__mmask8) __U);
10760	}
10761	
10762	extern __inline __m256
10763	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10764	_mm256_maskz_getmant_ps (__mmask8 __U, __m256 __A,
10765				 _MM_MANTISSA_NORM_ENUM __B,
10766				 _MM_MANTISSA_SIGN_ENUM __C)
10767	{
10768	  return (__m256) __builtin_ia32_getmantps256_mask ((__v8sf) __A,
10769							    (__C << 2) | __B,
10770							    (__v8sf)
10771							    _mm256_setzero_ps (),
10772							    (__mmask8) __U);
10773	}
10774	
10775	extern __inline __m128
10776	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10777	_mm_getmant_ps (__m128 __A, _MM_MANTISSA_NORM_ENUM __B,
10778			_MM_MANTISSA_SIGN_ENUM __C)
10779	{
10780	  return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10781							    (__C << 2) | __B,
10782							    (__v4sf)
10783							    _mm_setzero_ps (),
10784							    (__mmask8) -1);
10785	}
10786	
10787	extern __inline __m128
10788	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10789	_mm_mask_getmant_ps (__m128 __W, __mmask8 __U, __m128 __A,
10790			     _MM_MANTISSA_NORM_ENUM __B,
10791			     _MM_MANTISSA_SIGN_ENUM __C)
10792	{
10793	  return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10794							    (__C << 2) | __B,
10795							    (__v4sf) __W,
10796							    (__mmask8) __U);
10797	}
10798	
10799	extern __inline __m128
10800	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10801	_mm_maskz_getmant_ps (__mmask8 __U, __m128 __A,
10802			      _MM_MANTISSA_NORM_ENUM __B,
10803			      _MM_MANTISSA_SIGN_ENUM __C)
10804	{
10805	  return (__m128) __builtin_ia32_getmantps128_mask ((__v4sf) __A,
10806							    (__C << 2) | __B,
10807							    (__v4sf)
10808							    _mm_setzero_ps (),
10809							    (__mmask8) __U);
10810	}
10811	
10812	extern __inline __m256d
10813	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10814	_mm256_getmant_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
10815			   _MM_MANTISSA_SIGN_ENUM __C)
10816	{
10817	  return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10818							     (__C << 2) | __B,
10819							     (__v4df)
10820							     _mm256_setzero_pd (),
10821							     (__mmask8) -1);
10822	}
10823	
10824	extern __inline __m256d
10825	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10826	_mm256_mask_getmant_pd (__m256d __W, __mmask8 __U, __m256d __A,
10827				_MM_MANTISSA_NORM_ENUM __B,
10828				_MM_MANTISSA_SIGN_ENUM __C)
10829	{
10830	  return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10831							     (__C << 2) | __B,
10832							     (__v4df) __W,
10833							     (__mmask8) __U);
10834	}
10835	
10836	extern __inline __m256d
10837	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10838	_mm256_maskz_getmant_pd (__mmask8 __U, __m256d __A,
10839				 _MM_MANTISSA_NORM_ENUM __B,
10840				 _MM_MANTISSA_SIGN_ENUM __C)
10841	{
10842	  return (__m256d) __builtin_ia32_getmantpd256_mask ((__v4df) __A,
10843							     (__C << 2) | __B,
10844							     (__v4df)
10845							     _mm256_setzero_pd (),
10846							     (__mmask8) __U);
10847	}
10848	
10849	extern __inline __m128d
10850	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10851	_mm_getmant_pd (__m128d __A, _MM_MANTISSA_NORM_ENUM __B,
10852			_MM_MANTISSA_SIGN_ENUM __C)
10853	{
10854	  return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10855							     (__C << 2) | __B,
10856							     (__v2df)
10857							     _mm_setzero_pd (),
10858							     (__mmask8) -1);
10859	}
10860	
10861	extern __inline __m128d
10862	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10863	_mm_mask_getmant_pd (__m128d __W, __mmask8 __U, __m128d __A,
10864			     _MM_MANTISSA_NORM_ENUM __B,
10865			     _MM_MANTISSA_SIGN_ENUM __C)
10866	{
10867	  return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10868							     (__C << 2) | __B,
10869							     (__v2df) __W,
10870							     (__mmask8) __U);
10871	}
10872	
10873	extern __inline __m128d
10874	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10875	_mm_maskz_getmant_pd (__mmask8 __U, __m128d __A,
10876			      _MM_MANTISSA_NORM_ENUM __B,
10877			      _MM_MANTISSA_SIGN_ENUM __C)
10878	{
10879	  return (__m128d) __builtin_ia32_getmantpd128_mask ((__v2df) __A,
10880							     (__C << 2) | __B,
10881							     (__v2df)
10882							     _mm_setzero_pd (),
10883							     (__mmask8) __U);
10884	}
10885	
10886	extern __inline __m256
10887	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10888	_mm256_mmask_i32gather_ps (__m256 __v1_old, __mmask8 __mask,
10889				   __m256i __index, void const *__addr,
10890				   int __scale)
10891	{
10892	  return (__m256) __builtin_ia32_gather3siv8sf ((__v8sf) __v1_old,
10893							__addr,
10894							(__v8si) __index,
10895							__mask, __scale);
10896	}
10897	
10898	extern __inline __m128
10899	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10900	_mm_mmask_i32gather_ps (__m128 __v1_old, __mmask8 __mask,
10901				__m128i __index, void const *__addr,
10902				int __scale)
10903	{
10904	  return (__m128) __builtin_ia32_gather3siv4sf ((__v4sf) __v1_old,
10905							__addr,
10906							(__v4si) __index,
10907							__mask, __scale);
10908	}
10909	
10910	extern __inline __m256d
10911	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10912	_mm256_mmask_i32gather_pd (__m256d __v1_old, __mmask8 __mask,
10913				   __m128i __index, void const *__addr,
10914				   int __scale)
10915	{
10916	  return (__m256d) __builtin_ia32_gather3siv4df ((__v4df) __v1_old,
10917							 __addr,
10918							 (__v4si) __index,
10919							 __mask, __scale);
10920	}
10921	
10922	extern __inline __m128d
10923	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10924	_mm_mmask_i32gather_pd (__m128d __v1_old, __mmask8 __mask,
10925				__m128i __index, void const *__addr,
10926				int __scale)
10927	{
10928	  return (__m128d) __builtin_ia32_gather3siv2df ((__v2df) __v1_old,
10929							 __addr,
10930							 (__v4si) __index,
10931							 __mask, __scale);
10932	}
10933	
10934	extern __inline __m128
10935	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10936	_mm256_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10937				   __m256i __index, void const *__addr,
10938				   int __scale)
10939	{
10940	  return (__m128) __builtin_ia32_gather3div8sf ((__v4sf) __v1_old,
10941							__addr,
10942							(__v4di) __index,
10943							__mask, __scale);
10944	}
10945	
10946	extern __inline __m128
10947	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10948	_mm_mmask_i64gather_ps (__m128 __v1_old, __mmask8 __mask,
10949				__m128i __index, void const *__addr,
10950				int __scale)
10951	{
10952	  return (__m128) __builtin_ia32_gather3div4sf ((__v4sf) __v1_old,
10953							__addr,
10954							(__v2di) __index,
10955							__mask, __scale);
10956	}
10957	
10958	extern __inline __m256d
10959	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10960	_mm256_mmask_i64gather_pd (__m256d __v1_old, __mmask8 __mask,
10961				   __m256i __index, void const *__addr,
10962				   int __scale)
10963	{
10964	  return (__m256d) __builtin_ia32_gather3div4df ((__v4df) __v1_old,
10965							 __addr,
10966							 (__v4di) __index,
10967							 __mask, __scale);
10968	}
10969	
10970	extern __inline __m128d
10971	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10972	_mm_mmask_i64gather_pd (__m128d __v1_old, __mmask8 __mask,
10973				__m128i __index, void const *__addr,
10974				int __scale)
10975	{
10976	  return (__m128d) __builtin_ia32_gather3div2df ((__v2df) __v1_old,
10977							 __addr,
10978							 (__v2di) __index,
10979							 __mask, __scale);
10980	}
10981	
10982	extern __inline __m256i
10983	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10984	_mm256_mmask_i32gather_epi32 (__m256i __v1_old, __mmask8 __mask,
10985				      __m256i __index, void const *__addr,
10986				      int __scale)
10987	{
10988	  return (__m256i) __builtin_ia32_gather3siv8si ((__v8si) __v1_old,
10989							 __addr,
10990							 (__v8si) __index,
10991							 __mask, __scale);
10992	}
10993	
10994	extern __inline __m128i
10995	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10996	_mm_mmask_i32gather_epi32 (__m128i __v1_old, __mmask8 __mask,
10997				   __m128i __index, void const *__addr,
10998				   int __scale)
10999	{
11000	  return (__m128i) __builtin_ia32_gather3siv4si ((__v4si) __v1_old,
11001							 __addr,
11002							 (__v4si) __index,
11003							 __mask, __scale);
11004	}
11005	
11006	extern __inline __m256i
11007	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11008	_mm256_mmask_i32gather_epi64 (__m256i __v1_old, __mmask8 __mask,
11009				      __m128i __index, void const *__addr,
11010				      int __scale)
11011	{
11012	  return (__m256i) __builtin_ia32_gather3siv4di ((__v4di) __v1_old,
11013							 __addr,
11014							 (__v4si) __index,
11015							 __mask, __scale);
11016	}
11017	
11018	extern __inline __m128i
11019	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11020	_mm_mmask_i32gather_epi64 (__m128i __v1_old, __mmask8 __mask,
11021				   __m128i __index, void const *__addr,
11022				   int __scale)
11023	{
11024	  return (__m128i) __builtin_ia32_gather3siv2di ((__v2di) __v1_old,
11025							 __addr,
11026							 (__v4si) __index,
11027							 __mask, __scale);
11028	}
11029	
11030	extern __inline __m128i
11031	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11032	_mm256_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
11033				      __m256i __index, void const *__addr,
11034				      int __scale)
11035	{
11036	  return (__m128i) __builtin_ia32_gather3div8si ((__v4si) __v1_old,
11037							 __addr,
11038							 (__v4di) __index,
11039							 __mask, __scale);
11040	}
11041	
11042	extern __inline __m128i
11043	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11044	_mm_mmask_i64gather_epi32 (__m128i __v1_old, __mmask8 __mask,
11045				   __m128i __index, void const *__addr,
11046				   int __scale)
11047	{
11048	  return (__m128i) __builtin_ia32_gather3div4si ((__v4si) __v1_old,
11049							 __addr,
11050							 (__v2di) __index,
11051							 __mask, __scale);
11052	}
11053	
11054	extern __inline __m256i
11055	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11056	_mm256_mmask_i64gather_epi64 (__m256i __v1_old, __mmask8 __mask,
11057				      __m256i __index, void const *__addr,
11058				      int __scale)
11059	{
11060	  return (__m256i) __builtin_ia32_gather3div4di ((__v4di) __v1_old,
11061							 __addr,
11062							 (__v4di) __index,
11063							 __mask, __scale);
11064	}
11065	
11066	extern __inline __m128i
11067	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11068	_mm_mmask_i64gather_epi64 (__m128i __v1_old, __mmask8 __mask,
11069				   __m128i __index, void const *__addr,
11070				   int __scale)
11071	{
11072	  return (__m128i) __builtin_ia32_gather3div2di ((__v2di) __v1_old,
11073							 __addr,
11074							 (__v2di) __index,
11075							 __mask, __scale);
11076	}
11077	
11078	extern __inline void
11079	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11080	_mm256_i32scatter_ps (void *__addr, __m256i __index,
11081			      __m256 __v1, const int __scale)
11082	{
11083	  __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,
11084					(__v8si) __index, (__v8sf) __v1,
11085					__scale);
11086	}
11087	
11088	extern __inline void
11089	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11090	_mm256_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
11091				   __m256i __index, __m256 __v1,
11092				   const int __scale)
11093	{
11094	  __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,
11095					(__v8sf) __v1, __scale);
11096	}
11097	
11098	extern __inline void
11099	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11100	_mm_i32scatter_ps (void *__addr, __m128i __index, __m128 __v1,
11101			   const int __scale)
11102	{
11103	  __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,
11104					(__v4si) __index, (__v4sf) __v1,
11105					__scale);
11106	}
11107	
11108	extern __inline void
11109	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11110	_mm_mask_i32scatter_ps (void *__addr, __mmask8 __mask,
11111				__m128i __index, __m128 __v1,
11112				const int __scale)
11113	{
11114	  __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index,
11115					(__v4sf) __v1, __scale);
11116	}
11117	
11118	extern __inline void
11119	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11120	_mm256_i32scatter_pd (void *__addr, __m128i __index,
11121			      __m256d __v1, const int __scale)
11122	{
11123	  __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,
11124					(__v4si) __index, (__v4df) __v1,
11125					__scale);
11126	}
11127	
11128	extern __inline void
11129	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11130	_mm256_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
11131				   __m128i __index, __m256d __v1,
11132				   const int __scale)
11133	{
11134	  __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index,
11135					(__v4df) __v1, __scale);
11136	}
11137	
11138	extern __inline void
11139	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11140	_mm_i32scatter_pd (void *__addr, __m128i __index,
11141			   __m128d __v1, const int __scale)
11142	{
11143	  __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF,
11144					(__v4si) __index, (__v2df) __v1,
11145					__scale);
11146	}
11147	
11148	extern __inline void
11149	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11150	_mm_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
11151				__m128i __index, __m128d __v1,
11152				const int __scale)
11153	{
11154	  __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,
11155					(__v2df) __v1, __scale);
11156	}
11157	
11158	extern __inline void
11159	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11160	_mm256_i64scatter_ps (void *__addr, __m256i __index,
11161			      __m128 __v1, const int __scale)
11162	{
11163	  __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF,
11164					(__v4di) __index, (__v4sf) __v1,
11165					__scale);
11166	}
11167	
11168	extern __inline void
11169	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11170	_mm256_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
11171				   __m256i __index, __m128 __v1,
11172				   const int __scale)
11173	{
11174	  __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index,
11175					(__v4sf) __v1, __scale);
11176	}
11177	
11178	extern __inline void
11179	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11180	_mm_i64scatter_ps (void *__addr, __m128i __index, __m128 __v1,
11181			   const int __scale)
11182	{
11183	  __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,
11184					(__v2di) __index, (__v4sf) __v1,
11185					__scale);
11186	}
11187	
11188	extern __inline void
11189	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11190	_mm_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
11191				__m128i __index, __m128 __v1,
11192				const int __scale)
11193	{
11194	  __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,
11195					(__v4sf) __v1, __scale);
11196	}
11197	
11198	extern __inline void
11199	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11200	_mm256_i64scatter_pd (void *__addr, __m256i __index,
11201			      __m256d __v1, const int __scale)
11202	{
11203	  __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,
11204					(__v4di) __index, (__v4df) __v1,
11205					__scale);
11206	}
11207	
11208	extern __inline void
11209	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11210	_mm256_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
11211				   __m256i __index, __m256d __v1,
11212				   const int __scale)
11213	{
11214	  __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,
11215					(__v4df) __v1, __scale);
11216	}
11217	
11218	extern __inline void
11219	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11220	_mm_i64scatter_pd (void *__addr, __m128i __index,
11221			   __m128d __v1, const int __scale)
11222	{
11223	  __builtin_ia32_scatterdiv2df (__addr, (__mmask8) 0xFF,
11224					(__v2di) __index, (__v2df) __v1,
11225					__scale);
11226	}
11227	
11228	extern __inline void
11229	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11230	_mm_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
11231				__m128i __index, __m128d __v1,
11232				const int __scale)
11233	{
11234	  __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index,
11235					(__v2df) __v1, __scale);
11236	}
11237	
11238	extern __inline void
11239	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11240	_mm256_i32scatter_epi32 (void *__addr, __m256i __index,
11241				 __m256i __v1, const int __scale)
11242	{
11243	  __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,
11244					(__v8si) __index, (__v8si) __v1,
11245					__scale);
11246	}
11247	
11248	extern __inline void
11249	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11250	_mm256_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
11251				      __m256i __index, __m256i __v1,
11252				      const int __scale)
11253	{
11254	  __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,
11255					(__v8si) __v1, __scale);
11256	}
11257	
11258	extern __inline void
11259	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11260	_mm_i32scatter_epi32 (void *__addr, __m128i __index,
11261			      __m128i __v1, const int __scale)
11262	{
11263	  __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,
11264					(__v4si) __index, (__v4si) __v1,
11265					__scale);
11266	}
11267	
11268	extern __inline void
11269	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11270	_mm_mask_i32scatter_epi32 (void *__addr, __mmask8 __mask,
11271				   __m128i __index, __m128i __v1,
11272				   const int __scale)
11273	{
11274	  __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,
11275					(__v4si) __v1, __scale);
11276	}
11277	
11278	extern __inline void
11279	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11280	_mm256_i32scatter_epi64 (void *__addr, __m128i __index,
11281				 __m256i __v1, const int __scale)
11282	{
11283	  __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,
11284					(__v4si) __index, (__v4di) __v1,
11285					__scale);
11286	}
11287	
11288	extern __inline void
11289	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11290	_mm256_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
11291				      __m128i __index, __m256i __v1,
11292				      const int __scale)
11293	{
11294	  __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,
11295					(__v4di) __v1, __scale);
11296	}
11297	
11298	extern __inline void
11299	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11300	_mm_i32scatter_epi64 (void *__addr, __m128i __index,
11301			      __m128i __v1, const int __scale)
11302	{
11303	  __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,
11304					(__v4si) __index, (__v2di) __v1,
11305					__scale);
11306	}
11307	
11308	extern __inline void
11309	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11310	_mm_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
11311				   __m128i __index, __m128i __v1,
11312				   const int __scale)
11313	{
11314	  __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index,
11315					(__v2di) __v1, __scale);
11316	}
11317	
11318	extern __inline void
11319	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11320	_mm256_i64scatter_epi32 (void *__addr, __m256i __index,
11321				 __m128i __v1, const int __scale)
11322	{
11323	  __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF,
11324					(__v4di) __index, (__v4si) __v1,
11325					__scale);
11326	}
11327	
11328	extern __inline void
11329	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11330	_mm256_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
11331				      __m256i __index, __m128i __v1,
11332				      const int __scale)
11333	{
11334	  __builtin_ia32_scatterdiv8si (__addr, __mask, (__v4di) __index,
11335					(__v4si) __v1, __scale);
11336	}
11337	
11338	extern __inline void
11339	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11340	_mm_i64scatter_epi32 (void *__addr, __m128i __index,
11341			      __m128i __v1, const int __scale)
11342	{
11343	  __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,
11344					(__v2di) __index, (__v4si) __v1,
11345					__scale);
11346	}
11347	
11348	extern __inline void
11349	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11350	_mm_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
11351				   __m128i __index, __m128i __v1,
11352				   const int __scale)
11353	{
11354	  __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,
11355					(__v4si) __v1, __scale);
11356	}
11357	
11358	extern __inline void
11359	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11360	_mm256_i64scatter_epi64 (void *__addr, __m256i __index,
11361				 __m256i __v1, const int __scale)
11362	{
11363	  __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF,
11364					(__v4di) __index, (__v4di) __v1,
11365					__scale);
11366	}
11367	
11368	extern __inline void
11369	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11370	_mm256_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
11371				      __m256i __index, __m256i __v1,
11372				      const int __scale)
11373	{
11374	  __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,
11375					(__v4di) __v1, __scale);
11376	}
11377	
11378	extern __inline void
11379	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11380	_mm_i64scatter_epi64 (void *__addr, __m128i __index,
11381			      __m128i __v1, const int __scale)
11382	{
11383	  __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF,
11384					(__v2di) __index, (__v2di) __v1,
11385					__scale);
11386	}
11387	
11388	extern __inline void
11389	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11390	_mm_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
11391				   __m128i __index, __m128i __v1,
11392				   const int __scale)
11393	{
11394	  __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,
11395					(__v2di) __v1, __scale);
11396	}
11397	
11398	extern __inline __m256i
11399	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11400	_mm256_mask_shuffle_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11401				   _MM_PERM_ENUM __mask)
11402	{
11403	  return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
11404							  (__v8si) __W,
11405							  (__mmask8) __U);
11406	}
11407	
11408	extern __inline __m256i
11409	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11410	_mm256_maskz_shuffle_epi32 (__mmask8 __U, __m256i __A,
11411				    _MM_PERM_ENUM __mask)
11412	{
11413	  return (__m256i) __builtin_ia32_pshufd256_mask ((__v8si) __A, __mask,
11414							  (__v8si)
11415							  _mm256_setzero_si256 (),
11416							  (__mmask8) __U);
11417	}
11418	
11419	extern __inline __m128i
11420	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11421	_mm_mask_shuffle_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11422				_MM_PERM_ENUM __mask)
11423	{
11424	  return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
11425							  (__v4si) __W,
11426							  (__mmask8) __U);
11427	}
11428	
11429	extern __inline __m128i
11430	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11431	_mm_maskz_shuffle_epi32 (__mmask8 __U, __m128i __A,
11432				 _MM_PERM_ENUM __mask)
11433	{
11434	  return (__m128i) __builtin_ia32_pshufd128_mask ((__v4si) __A, __mask,
11435							  (__v4si)
11436							  _mm_setzero_si128 (),
11437							  (__mmask8) __U);
11438	}
11439	
11440	extern __inline __m256i
11441	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11442	_mm256_rol_epi32 (__m256i __A, const int __B)
11443	{
11444	  return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11445							 (__v8si)
11446							 _mm256_setzero_si256 (),
11447							 (__mmask8) -1);
11448	}
11449	
11450	extern __inline __m256i
11451	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11452	_mm256_mask_rol_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11453			       const int __B)
11454	{
11455	  return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11456							 (__v8si) __W,
11457							 (__mmask8) __U);
11458	}
11459	
11460	extern __inline __m256i
11461	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11462	_mm256_maskz_rol_epi32 (__mmask8 __U, __m256i __A, const int __B)
11463	{
11464	  return (__m256i) __builtin_ia32_prold256_mask ((__v8si) __A, __B,
11465							 (__v8si)
11466							 _mm256_setzero_si256 (),
11467							 (__mmask8) __U);
11468	}
11469	
11470	extern __inline __m128i
11471	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11472	_mm_rol_epi32 (__m128i __A, const int __B)
11473	{
11474	  return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11475							 (__v4si)
11476							 _mm_setzero_si128 (),
11477							 (__mmask8) -1);
11478	}
11479	
11480	extern __inline __m128i
11481	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11482	_mm_mask_rol_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11483			    const int __B)
11484	{
11485	  return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11486							 (__v4si) __W,
11487							 (__mmask8) __U);
11488	}
11489	
11490	extern __inline __m128i
11491	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11492	_mm_maskz_rol_epi32 (__mmask8 __U, __m128i __A, const int __B)
11493	{
11494	  return (__m128i) __builtin_ia32_prold128_mask ((__v4si) __A, __B,
11495							 (__v4si)
11496							 _mm_setzero_si128 (),
11497							 (__mmask8) __U);
11498	}
11499	
11500	extern __inline __m256i
11501	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11502	_mm256_ror_epi32 (__m256i __A, const int __B)
11503	{
11504	  return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11505							 (__v8si)
11506							 _mm256_setzero_si256 (),
11507							 (__mmask8) -1);
11508	}
11509	
11510	extern __inline __m256i
11511	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11512	_mm256_mask_ror_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11513			       const int __B)
11514	{
11515	  return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11516							 (__v8si) __W,
11517							 (__mmask8) __U);
11518	}
11519	
11520	extern __inline __m256i
11521	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11522	_mm256_maskz_ror_epi32 (__mmask8 __U, __m256i __A, const int __B)
11523	{
11524	  return (__m256i) __builtin_ia32_prord256_mask ((__v8si) __A, __B,
11525							 (__v8si)
11526							 _mm256_setzero_si256 (),
11527							 (__mmask8) __U);
11528	}
11529	
11530	extern __inline __m128i
11531	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11532	_mm_ror_epi32 (__m128i __A, const int __B)
11533	{
11534	  return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11535							 (__v4si)
11536							 _mm_setzero_si128 (),
11537							 (__mmask8) -1);
11538	}
11539	
11540	extern __inline __m128i
11541	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11542	_mm_mask_ror_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11543			    const int __B)
11544	{
11545	  return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11546							 (__v4si) __W,
11547							 (__mmask8) __U);
11548	}
11549	
11550	extern __inline __m128i
11551	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11552	_mm_maskz_ror_epi32 (__mmask8 __U, __m128i __A, const int __B)
11553	{
11554	  return (__m128i) __builtin_ia32_prord128_mask ((__v4si) __A, __B,
11555							 (__v4si)
11556							 _mm_setzero_si128 (),
11557							 (__mmask8) __U);
11558	}
11559	
11560	extern __inline __m256i
11561	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11562	_mm256_rol_epi64 (__m256i __A, const int __B)
11563	{
11564	  return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11565							 (__v4di)
11566							 _mm256_setzero_si256 (),
11567							 (__mmask8) -1);
11568	}
11569	
11570	extern __inline __m256i
11571	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11572	_mm256_mask_rol_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11573			       const int __B)
11574	{
11575	  return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11576							 (__v4di) __W,
11577							 (__mmask8) __U);
11578	}
11579	
11580	extern __inline __m256i
11581	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11582	_mm256_maskz_rol_epi64 (__mmask8 __U, __m256i __A, const int __B)
11583	{
11584	  return (__m256i) __builtin_ia32_prolq256_mask ((__v4di) __A, __B,
11585							 (__v4di)
11586							 _mm256_setzero_si256 (),
11587							 (__mmask8) __U);
11588	}
11589	
11590	extern __inline __m128i
11591	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11592	_mm_rol_epi64 (__m128i __A, const int __B)
11593	{
11594	  return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11595							 (__v2di)
11596							 _mm_setzero_si128 (),
11597							 (__mmask8) -1);
11598	}
11599	
11600	extern __inline __m128i
11601	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11602	_mm_mask_rol_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11603			    const int __B)
11604	{
11605	  return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11606							 (__v2di) __W,
11607							 (__mmask8) __U);
11608	}
11609	
11610	extern __inline __m128i
11611	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11612	_mm_maskz_rol_epi64 (__mmask8 __U, __m128i __A, const int __B)
11613	{
11614	  return (__m128i) __builtin_ia32_prolq128_mask ((__v2di) __A, __B,
11615							 (__v2di)
11616							 _mm_setzero_si128 (),
11617							 (__mmask8) __U);
11618	}
11619	
11620	extern __inline __m256i
11621	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11622	_mm256_ror_epi64 (__m256i __A, const int __B)
11623	{
11624	  return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11625							 (__v4di)
11626							 _mm256_setzero_si256 (),
11627							 (__mmask8) -1);
11628	}
11629	
11630	extern __inline __m256i
11631	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11632	_mm256_mask_ror_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11633			       const int __B)
11634	{
11635	  return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11636							 (__v4di) __W,
11637							 (__mmask8) __U);
11638	}
11639	
11640	extern __inline __m256i
11641	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11642	_mm256_maskz_ror_epi64 (__mmask8 __U, __m256i __A, const int __B)
11643	{
11644	  return (__m256i) __builtin_ia32_prorq256_mask ((__v4di) __A, __B,
11645							 (__v4di)
11646							 _mm256_setzero_si256 (),
11647							 (__mmask8) __U);
11648	}
11649	
11650	extern __inline __m128i
11651	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11652	_mm_ror_epi64 (__m128i __A, const int __B)
11653	{
11654	  return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11655							 (__v2di)
11656							 _mm_setzero_si128 (),
11657							 (__mmask8) -1);
11658	}
11659	
11660	extern __inline __m128i
11661	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11662	_mm_mask_ror_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11663			    const int __B)
11664	{
11665	  return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11666							 (__v2di) __W,
11667							 (__mmask8) __U);
11668	}
11669	
11670	extern __inline __m128i
11671	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11672	_mm_maskz_ror_epi64 (__mmask8 __U, __m128i __A, const int __B)
11673	{
11674	  return (__m128i) __builtin_ia32_prorq128_mask ((__v2di) __A, __B,
11675							 (__v2di)
11676							 _mm_setzero_si128 (),
11677							 (__mmask8) __U);
11678	}
11679	
11680	extern __inline __m128i
11681	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11682	_mm_alignr_epi32 (__m128i __A, __m128i __B, const int __imm)
11683	{
11684	  return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11685							  (__v4si) __B, __imm,
11686							  (__v4si)
11687							  _mm_setzero_si128 (),
11688							  (__mmask8) -1);
11689	}
11690	
11691	extern __inline __m128i
11692	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11693	_mm_mask_alignr_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11694			       __m128i __B, const int __imm)
11695	{
11696	  return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11697							  (__v4si) __B, __imm,
11698							  (__v4si) __W,
11699							  (__mmask8) __U);
11700	}
11701	
11702	extern __inline __m128i
11703	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11704	_mm_maskz_alignr_epi32 (__mmask8 __U, __m128i __A, __m128i __B,
11705				const int __imm)
11706	{
11707	  return (__m128i) __builtin_ia32_alignd128_mask ((__v4si) __A,
11708							  (__v4si) __B, __imm,
11709							  (__v4si)
11710							  _mm_setzero_si128 (),
11711							  (__mmask8) __U);
11712	}
11713	
11714	extern __inline __m128i
11715	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11716	_mm_alignr_epi64 (__m128i __A, __m128i __B, const int __imm)
11717	{
11718	  return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11719							  (__v2di) __B, __imm,
11720							  (__v2di)
11721							  _mm_setzero_si128 (),
11722							  (__mmask8) -1);
11723	}
11724	
11725	extern __inline __m128i
11726	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11727	_mm_mask_alignr_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11728			       __m128i __B, const int __imm)
11729	{
11730	  return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11731							  (__v2di) __B, __imm,
11732							  (__v2di) __W,
11733							  (__mmask8) __U);
11734	}
11735	
11736	extern __inline __m128i
11737	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11738	_mm_maskz_alignr_epi64 (__mmask8 __U, __m128i __A, __m128i __B,
11739				const int __imm)
11740	{
11741	  return (__m128i) __builtin_ia32_alignq128_mask ((__v2di) __A,
11742							  (__v2di) __B, __imm,
11743							  (__v2di)
11744							  _mm_setzero_si128 (),
11745							  (__mmask8) __U);
11746	}
11747	
11748	extern __inline __m256i
11749	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11750	_mm256_alignr_epi32 (__m256i __A, __m256i __B, const int __imm)
11751	{
11752	  return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11753							  (__v8si) __B, __imm,
11754							  (__v8si)
11755							  _mm256_setzero_si256 (),
11756							  (__mmask8) -1);
11757	}
11758	
11759	extern __inline __m256i
11760	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11761	_mm256_mask_alignr_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11762				  __m256i __B, const int __imm)
11763	{
11764	  return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11765							  (__v8si) __B, __imm,
11766							  (__v8si) __W,
11767							  (__mmask8) __U);
11768	}
11769	
11770	extern __inline __m256i
11771	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11772	_mm256_maskz_alignr_epi32 (__mmask8 __U, __m256i __A, __m256i __B,
11773				   const int __imm)
11774	{
11775	  return (__m256i) __builtin_ia32_alignd256_mask ((__v8si) __A,
11776							  (__v8si) __B, __imm,
11777							  (__v8si)
11778							  _mm256_setzero_si256 (),
11779							  (__mmask8) __U);
11780	}
11781	
11782	extern __inline __m256i
11783	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11784	_mm256_alignr_epi64 (__m256i __A, __m256i __B, const int __imm)
11785	{
11786	  return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11787							  (__v4di) __B, __imm,
11788							  (__v4di)
11789							  _mm256_setzero_si256 (),
11790							  (__mmask8) -1);
11791	}
11792	
11793	extern __inline __m256i
11794	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11795	_mm256_mask_alignr_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11796				  __m256i __B, const int __imm)
11797	{
11798	  return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11799							  (__v4di) __B, __imm,
11800							  (__v4di) __W,
11801							  (__mmask8) __U);
11802	}
11803	
11804	extern __inline __m256i
11805	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11806	_mm256_maskz_alignr_epi64 (__mmask8 __U, __m256i __A, __m256i __B,
11807				   const int __imm)
11808	{
11809	  return (__m256i) __builtin_ia32_alignq256_mask ((__v4di) __A,
11810							  (__v4di) __B, __imm,
11811							  (__v4di)
11812							  _mm256_setzero_si256 (),
11813							  (__mmask8) __U);
11814	}
11815	
11816	extern __inline __m128i
11817	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11818	_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A,
11819			   const int __I)
11820	{
11821	  return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11822							  (__v8hi) __W,
11823							  (__mmask8) __U);
11824	}
11825	
11826	extern __inline __m128i
11827	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11828	_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A, const int __I)
11829	{
11830	  return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, __I,
11831							  (__v8hi)
11832							  _mm_setzero_si128 (),
11833							  (__mmask8) __U);
11834	}
11835	
11836	extern __inline __m128i
11837	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11838	_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A,
11839			      const int __I)
11840	{
11841	  return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11842							     (__v8hi) __W,
11843							     (__mmask8) __U);
11844	}
11845	
11846	extern __inline __m128i
11847	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11848	_mm256_maskz_cvtps_ph (__mmask8 __U, __m256 __A, const int __I)
11849	{
11850	  return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, __I,
11851							     (__v8hi)
11852							     _mm_setzero_si128 (),
11853							     (__mmask8) __U);
11854	}
11855	
11856	extern __inline __m256i
11857	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11858	_mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11859				const int __imm)
11860	{
11861	  return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11862							  (__v8si) __W,
11863							  (__mmask8) __U);
11864	}
11865	
11866	extern __inline __m256i
11867	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11868	_mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
11869	{
11870	  return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
11871							  (__v8si)
11872							  _mm256_setzero_si256 (),
11873							  (__mmask8) __U);
11874	}
11875	
11876	extern __inline __m128i
11877	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11878	_mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
11879			     const int __imm)
11880	{
11881	  return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11882							  (__v4si) __W,
11883							  (__mmask8) __U);
11884	}
11885	
11886	extern __inline __m128i
11887	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11888	_mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
11889	{
11890	  return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
11891							  (__v4si)
11892							  _mm_setzero_si128 (),
11893							  (__mmask8) __U);
11894	}
11895	
11896	extern __inline __m256i
11897	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11898	_mm256_srai_epi64 (__m256i __A, const int __imm)
11899	{
11900	  return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11901							  (__v4di)
11902							  _mm256_setzero_si256 (),
11903							  (__mmask8) -1);
11904	}
11905	
11906	extern __inline __m256i
11907	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11908	_mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
11909				const int __imm)
11910	{
11911	  return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11912							  (__v4di) __W,
11913							  (__mmask8) __U);
11914	}
11915	
11916	extern __inline __m256i
11917	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11918	_mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
11919	{
11920	  return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
11921							  (__v4di)
11922							  _mm256_setzero_si256 (),
11923							  (__mmask8) __U);
11924	}
11925	
11926	extern __inline __m128i
11927	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11928	_mm_srai_epi64 (__m128i __A, const int __imm)
11929	{
11930	  return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11931							  (__v2di)
11932							  _mm_setzero_si128 (),
11933							  (__mmask8) -1);
11934	}
11935	
11936	extern __inline __m128i
11937	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11938	_mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
11939			     const int __imm)
11940	{
11941	  return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11942							  (__v2di) __W,
11943							  (__mmask8) __U);
11944	}
11945	
11946	extern __inline __m128i
11947	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11948	_mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
11949	{
11950	  return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
11951							  (__v2di)
11952							  _mm_setzero_si128 (),
11953							  (__mmask8) __U);
11954	}
11955	
11956	extern __inline __m128i
11957	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11958	_mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11959	{
11960	  return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11961							  (__v4si) __W,
11962							  (__mmask8) __U);
11963	}
11964	
11965	extern __inline __m128i
11966	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11967	_mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
11968	{
11969	  return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
11970							  (__v4si)
11971							  _mm_setzero_si128 (),
11972							  (__mmask8) __U);
11973	}
11974	
11975	extern __inline __m128i
11976	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11977	_mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
11978	{
11979	  return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11980							  (__v2di) __W,
11981							  (__mmask8) __U);
11982	}
11983	
11984	extern __inline __m128i
11985	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11986	_mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
11987	{
11988	  return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
11989							  (__v2di)
11990							  _mm_setzero_si128 (),
11991							  (__mmask8) __U);
11992	}
11993	
11994	extern __inline __m256i
11995	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11996	_mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
11997				int __B)
11998	{
11999	  return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
12000							  (__v8si) __W,
12001							  (__mmask8) __U);
12002	}
12003	
12004	extern __inline __m256i
12005	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12006	_mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
12007	{
12008	  return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
12009							  (__v8si)
12010							  _mm256_setzero_si256 (),
12011							  (__mmask8) __U);
12012	}
12013	
12014	extern __inline __m256i
12015	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12016	_mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
12017				int __B)
12018	{
12019	  return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
12020							  (__v4di) __W,
12021							  (__mmask8) __U);
12022	}
12023	
12024	extern __inline __m256i
12025	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12026	_mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
12027	{
12028	  return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
12029							  (__v4di)
12030							  _mm256_setzero_si256 (),
12031							  (__mmask8) __U);
12032	}
12033	
12034	extern __inline __m256d
12035	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12036	_mm256_mask_permutex_pd (__m256d __W, __mmask8 __U, __m256d __X,
12037				 const int __imm)
12038	{
12039	  return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
12040							  (__v4df) __W,
12041							  (__mmask8) __U);
12042	}
12043	
12044	extern __inline __m256d
12045	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12046	_mm256_maskz_permutex_pd (__mmask8 __U, __m256d __X, const int __imm)
12047	{
12048	  return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __imm,
12049							  (__v4df)
12050							  _mm256_setzero_pd (),
12051							  (__mmask8) __U);
12052	}
12053	
12054	extern __inline __m256d
12055	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12056	_mm256_mask_permute_pd (__m256d __W, __mmask8 __U, __m256d __X,
12057				const int __C)
12058	{
12059	  return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
12060							     (__v4df) __W,
12061							     (__mmask8) __U);
12062	}
12063	
12064	extern __inline __m256d
12065	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12066	_mm256_maskz_permute_pd (__mmask8 __U, __m256d __X, const int __C)
12067	{
12068	  return (__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df) __X, __C,
12069							     (__v4df)
12070							     _mm256_setzero_pd (),
12071							     (__mmask8) __U);
12072	}
12073	
12074	extern __inline __m128d
12075	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12076	_mm_mask_permute_pd (__m128d __W, __mmask8 __U, __m128d __X,
12077			     const int __C)
12078	{
12079	  return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
12080							  (__v2df) __W,
12081							  (__mmask8) __U);
12082	}
12083	
12084	extern __inline __m128d
12085	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12086	_mm_maskz_permute_pd (__mmask8 __U, __m128d __X, const int __C)
12087	{
12088	  return (__m128d) __builtin_ia32_vpermilpd_mask ((__v2df) __X, __C,
12089							  (__v2df)
12090							  _mm_setzero_pd (),
12091							  (__mmask8) __U);
12092	}
12093	
12094	extern __inline __m256
12095	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12096	_mm256_mask_permute_ps (__m256 __W, __mmask8 __U, __m256 __X,
12097				const int __C)
12098	{
12099	  return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
12100							    (__v8sf) __W,
12101							    (__mmask8) __U);
12102	}
12103	
12104	extern __inline __m256
12105	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12106	_mm256_maskz_permute_ps (__mmask8 __U, __m256 __X, const int __C)
12107	{
12108	  return (__m256) __builtin_ia32_vpermilps256_mask ((__v8sf) __X, __C,
12109							    (__v8sf)
12110							    _mm256_setzero_ps (),
12111							    (__mmask8) __U);
12112	}
12113	
12114	extern __inline __m128
12115	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12116	_mm_mask_permute_ps (__m128 __W, __mmask8 __U, __m128 __X,
12117			     const int __C)
12118	{
12119	  return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
12120							 (__v4sf) __W,
12121							 (__mmask8) __U);
12122	}
12123	
12124	extern __inline __m128
12125	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12126	_mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C)
12127	{
12128	  return (__m128) __builtin_ia32_vpermilps_mask ((__v4sf) __X, __C,
12129							 (__v4sf)
12130							 _mm_setzero_ps (),
12131							 (__mmask8) __U);
12132	}
12133	
12134	extern __inline __m256d
12135	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12136	_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W)
12137	{
12138	  return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
12139							     (__v4df) __W,
12140							     (__mmask8) __U);
12141	}
12142	
12143	extern __inline __m256
12144	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12145	_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W)
12146	{
12147	  return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
12148							    (__v8sf) __W,
12149							    (__mmask8) __U);
12150	}
12151	
12152	extern __inline __m256i
12153	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12154	_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W)
12155	{
12156	  return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
12157							    (__v4di) __W,
12158							    (__mmask8) __U);
12159	}
12160	
12161	extern __inline __m256i
12162	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12163	_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W)
12164	{
12165	  return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
12166							    (__v8si) __W,
12167							    (__mmask8) __U);
12168	}
12169	
12170	extern __inline __m128d
12171	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12172	_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W)
12173	{
12174	  return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
12175							     (__v2df) __W,
12176							     (__mmask8) __U);
12177	}
12178	
12179	extern __inline __m128
12180	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12181	_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W)
12182	{
12183	  return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
12184							    (__v4sf) __W,
12185							    (__mmask8) __U);
12186	}
12187	
12188	extern __inline __m128i
12189	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12190	_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W)
12191	{
12192	  return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
12193							    (__v2di) __W,
12194							    (__mmask8) __U);
12195	}
12196	
12197	extern __inline __m128i
12198	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12199	_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W)
12200	{
12201	  return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
12202							    (__v4si) __W,
12203							    (__mmask8) __U);
12204	}
12205	
12206	extern __inline __mmask8
12207	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12208	_mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P)
12209	{
12210	  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12211							 (__v4di) __Y, __P,
12212							 (__mmask8) -1);
12213	}
12214	
12215	extern __inline __mmask8
12216	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12217	_mm256_cmp_epi32_mask (__m256i __X, __m256i __Y, const int __P)
12218	{
12219	  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12220							 (__v8si) __Y, __P,
12221							 (__mmask8) -1);
12222	}
12223	
12224	extern __inline __mmask8
12225	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12226	_mm256_cmp_epu64_mask (__m256i __X, __m256i __Y, const int __P)
12227	{
12228	  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
12229							  (__v4di) __Y, __P,
12230							  (__mmask8) -1);
12231	}
12232	
12233	extern __inline __mmask8
12234	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12235	_mm256_cmp_epu32_mask (__m256i __X, __m256i __Y, const int __P)
12236	{
12237	  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
12238							  (__v8si) __Y, __P,
12239							  (__mmask8) -1);
12240	}
12241	
12242	extern __inline __mmask8
12243	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12244	_mm256_cmp_pd_mask (__m256d __X, __m256d __Y, const int __P)
12245	{
12246	  return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
12247							  (__v4df) __Y, __P,
12248							  (__mmask8) -1);
12249	}
12250	
12251	extern __inline __mmask8
12252	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12253	_mm256_cmp_ps_mask (__m256 __X, __m256 __Y, const int __P)
12254	{
12255	  return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
12256							  (__v8sf) __Y, __P,
12257							  (__mmask8) -1);
12258	}
12259	
12260	extern __inline __mmask8
12261	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12262	_mm256_mask_cmp_epi64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12263				    const int __P)
12264	{
12265	  return (__mmask8) __builtin_ia32_cmpq256_mask ((__v4di) __X,
12266							 (__v4di) __Y, __P,
12267							 (__mmask8) __U);
12268	}
12269	
12270	extern __inline __mmask8
12271	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12272	_mm256_mask_cmp_epi32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12273				    const int __P)
12274	{
12275	  return (__mmask8) __builtin_ia32_cmpd256_mask ((__v8si) __X,
12276							 (__v8si) __Y, __P,
12277							 (__mmask8) __U);
12278	}
12279	
12280	extern __inline __mmask8
12281	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12282	_mm256_mask_cmp_epu64_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12283				    const int __P)
12284	{
12285	  return (__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di) __X,
12286							  (__v4di) __Y, __P,
12287							  (__mmask8) __U);
12288	}
12289	
12290	extern __inline __mmask8
12291	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12292	_mm256_mask_cmp_epu32_mask (__mmask8 __U, __m256i __X, __m256i __Y,
12293				    const int __P)
12294	{
12295	  return (__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si) __X,
12296							  (__v8si) __Y, __P,
12297							  (__mmask8) __U);
12298	}
12299	
12300	extern __inline __mmask8
12301	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12302	_mm256_mask_cmp_pd_mask (__mmask8 __U, __m256d __X, __m256d __Y,
12303				 const int __P)
12304	{
12305	  return (__mmask8) __builtin_ia32_cmppd256_mask ((__v4df) __X,
12306							  (__v4df) __Y, __P,
12307							  (__mmask8) __U);
12308	}
12309	
12310	extern __inline __mmask8
12311	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12312	_mm256_mask_cmp_ps_mask (__mmask8 __U, __m256 __X, __m256 __Y,
12313				 const int __P)
12314	{
12315	  return (__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf) __X,
12316							  (__v8sf) __Y, __P,
12317							  (__mmask8) __U);
12318	}
12319	
12320	extern __inline __mmask8
12321	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12322	_mm_cmp_epi64_mask (__m128i __X, __m128i __Y, const int __P)
12323	{
12324	  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12325							 (__v2di) __Y, __P,
12326							 (__mmask8) -1);
12327	}
12328	
12329	extern __inline __mmask8
12330	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12331	_mm_cmp_epi32_mask (__m128i __X, __m128i __Y, const int __P)
12332	{
12333	  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12334							 (__v4si) __Y, __P,
12335							 (__mmask8) -1);
12336	}
12337	
12338	extern __inline __mmask8
12339	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12340	_mm_cmp_epu64_mask (__m128i __X, __m128i __Y, const int __P)
12341	{
12342	  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12343							  (__v2di) __Y, __P,
12344							  (__mmask8) -1);
12345	}
12346	
12347	extern __inline __mmask8
12348	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12349	_mm_cmp_epu32_mask (__m128i __X, __m128i __Y, const int __P)
12350	{
12351	  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12352							  (__v4si) __Y, __P,
12353							  (__mmask8) -1);
12354	}
12355	
12356	extern __inline __mmask8
12357	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12358	_mm_cmp_pd_mask (__m128d __X, __m128d __Y, const int __P)
12359	{
12360	  return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
12361							  (__v2df) __Y, __P,
12362							  (__mmask8) -1);
12363	}
12364	
12365	extern __inline __mmask8
12366	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12367	_mm_cmp_ps_mask (__m128 __X, __m128 __Y, const int __P)
12368	{
12369	  return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
12370							  (__v4sf) __Y, __P,
12371							  (__mmask8) -1);
12372	}
12373	
12374	extern __inline __mmask8
12375	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12376	_mm_mask_cmp_epi64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12377				 const int __P)
12378	{
12379	  return (__mmask8) __builtin_ia32_cmpq128_mask ((__v2di) __X,
12380							 (__v2di) __Y, __P,
12381							 (__mmask8) __U);
12382	}
12383	
12384	extern __inline __mmask8
12385	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12386	_mm_mask_cmp_epi32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12387				 const int __P)
12388	{
12389	  return (__mmask8) __builtin_ia32_cmpd128_mask ((__v4si) __X,
12390							 (__v4si) __Y, __P,
12391							 (__mmask8) __U);
12392	}
12393	
12394	extern __inline __mmask8
12395	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12396	_mm_mask_cmp_epu64_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12397				 const int __P)
12398	{
12399	  return (__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di) __X,
12400							  (__v2di) __Y, __P,
12401							  (__mmask8) __U);
12402	}
12403	
12404	extern __inline __mmask8
12405	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12406	_mm_mask_cmp_epu32_mask (__mmask8 __U, __m128i __X, __m128i __Y,
12407				 const int __P)
12408	{
12409	  return (__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si) __X,
12410							  (__v4si) __Y, __P,
12411							  (__mmask8) __U);
12412	}
12413	
12414	extern __inline __mmask8
12415	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12416	_mm_mask_cmp_pd_mask (__mmask8 __U, __m128d __X, __m128d __Y,
12417			      const int __P)
12418	{
12419	  return (__mmask8) __builtin_ia32_cmppd128_mask ((__v2df) __X,
12420							  (__v2df) __Y, __P,
12421							  (__mmask8) __U);
12422	}
12423	
12424	extern __inline __mmask8
12425	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12426	_mm_mask_cmp_ps_mask (__mmask8 __U, __m128 __X, __m128 __Y,
12427			      const int __P)
12428	{
12429	  return (__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf) __X,
12430							  (__v4sf) __Y, __P,
12431							  (__mmask8) __U);
12432	}
12433	
12434	extern __inline __m256d
12435	__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12436	_mm256_permutex_pd (__m256d __X, const int __M)
12437	{
12438	  return (__m256d) __builtin_ia32_permdf256_mask ((__v4df) __X, __M,
12439							  (__v4df)
12440							  _mm256_undefined_pd (),
12441							  (__mmask8) -1);
12442	}
12443	
12444	#else
12445	#define _mm256_permutex_pd(X, M)						\
12446	  ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(X), (int)(M),	\
12447						    (__v4df)(__m256d)			\
12448						    _mm256_undefined_pd (),		\
12449						    (__mmask8)-1))
12450	
12451	#define _mm256_permutex_epi64(X, I)               \
12452	  ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12453						    (int)(I),		\
12454						    (__v4di)(__m256i)	\
12455						    (_mm256_setzero_si256 ()),\
12456						    (__mmask8) -1))
12457	
12458	#define _mm256_maskz_permutex_epi64(M, X, I)                    \
12459	  ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X),    \
12460						    (int)(I),                \
12461						    (__v4di)(__m256i)        \
12462						    (_mm256_setzero_si256 ()),\
12463						    (__mmask8)(M)))
12464	
12465	#define _mm256_mask_permutex_epi64(W, M, X, I)               \
12466	  ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
12467						    (int)(I),             \
12468						    (__v4di)(__m256i)(W), \
12469						    (__mmask8)(M)))
12470	
12471	#define _mm256_insertf32x4(X, Y, C)                                     \
12472	  ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),  \
12473	    (__v4sf)(__m128) (Y), (int) (C),					\
12474	    (__v8sf)(__m256)_mm256_setzero_ps (),				\
12475	    (__mmask8)-1))
12476	
12477	#define _mm256_mask_insertf32x4(W, U, X, Y, C)                          \
12478	  ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),  \
12479	    (__v4sf)(__m128) (Y), (int) (C),					\
12480	    (__v8sf)(__m256)(W),						\
12481	    (__mmask8)(U)))
12482	
12483	#define _mm256_maskz_insertf32x4(U, X, Y, C)                            \
12484	  ((__m256) __builtin_ia32_insertf32x4_256_mask ((__v8sf)(__m256) (X),	\
12485	    (__v4sf)(__m128) (Y), (int) (C),					\
12486	    (__v8sf)(__m256)_mm256_setzero_ps (),				\
12487	    (__mmask8)(U)))
12488	
12489	#define _mm256_inserti32x4(X, Y, C)                                     \
12490	  ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12491	    (__v4si)(__m128i) (Y), (int) (C),					\
12492	    (__v8si)(__m256i)_mm256_setzero_si256 (),				\
12493	    (__mmask8)-1))
12494	
12495	#define _mm256_mask_inserti32x4(W, U, X, Y, C)                          \
12496	  ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12497	    (__v4si)(__m128i) (Y), (int) (C),					\
12498	    (__v8si)(__m256i)(W),						\
12499	    (__mmask8)(U)))
12500	
12501	#define _mm256_maskz_inserti32x4(U, X, Y, C)                            \
12502	  ((__m256i) __builtin_ia32_inserti32x4_256_mask ((__v8si)(__m256i) (X),\
12503	    (__v4si)(__m128i) (Y), (int) (C),					\
12504	    (__v8si)(__m256i)_mm256_setzero_si256 (),				\
12505	    (__mmask8)(U)))
12506	
12507	#define _mm256_extractf32x4_ps(X, C)                                    \
12508	  ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12509	    (int) (C),								\
12510	    (__v4sf)(__m128)_mm_setzero_ps (),					\
12511	    (__mmask8)-1))
12512	
12513	#define _mm256_mask_extractf32x4_ps(W, U, X, C)                         \
12514	  ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12515	    (int) (C),								\
12516	    (__v4sf)(__m128)(W),						\
12517	    (__mmask8)(U)))
12518	
12519	#define _mm256_maskz_extractf32x4_ps(U, X, C)                           \
12520	  ((__m128) __builtin_ia32_extractf32x4_256_mask ((__v8sf)(__m256) (X), \
12521	    (int) (C),								\
12522	    (__v4sf)(__m128)_mm_setzero_ps (),					\
12523	    (__mmask8)(U)))
12524	
12525	#define _mm256_extracti32x4_epi32(X, C)                                 \
12526	  ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12527	    (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)-1))
12528	
12529	#define _mm256_mask_extracti32x4_epi32(W, U, X, C)                      \
12530	  ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12531	    (int) (C), (__v4si)(__m128i)(W), (__mmask8)(U)))
12532	
12533	#define _mm256_maskz_extracti32x4_epi32(U, X, C)                        \
12534	  ((__m128i) __builtin_ia32_extracti32x4_256_mask ((__v8si)(__m256i) (X),\
12535	    (int) (C), (__v4si)(__m128i)_mm_setzero_si128 (), (__mmask8)(U)))
12536	
12537	#define _mm256_shuffle_i64x2(X, Y, C)                                                   \
12538	  ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
12539	                                                  (__v4di)(__m256i)(Y), (int)(C),       \
12540	                                                  (__v4di)(__m256i)_mm256_setzero_si256 (), \
12541	                                                  (__mmask8)-1))
12542	
12543	#define _mm256_mask_shuffle_i64x2(W, U, X, Y, C)                                        \
12544	  ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
12545	                                                  (__v4di)(__m256i)(Y), (int)(C),       \
12546	                                                  (__v4di)(__m256i)(W),\
12547	                                                  (__mmask8)(U)))
12548	
12549	#define _mm256_maskz_shuffle_i64x2(U, X, Y, C)                                          \
12550	  ((__m256i)  __builtin_ia32_shuf_i64x2_256_mask ((__v4di)(__m256i)(X),                 \
12551	                                                  (__v4di)(__m256i)(Y), (int)(C),       \
12552	                                                  (__v4di)(__m256i)_mm256_setzero_si256 (), \
12553	                                                  (__mmask8)(U)))
12554	
12555	#define _mm256_shuffle_i32x4(X, Y, C)                                                   \
12556	  ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
12557	                                                  (__v8si)(__m256i)(Y), (int)(C),       \
12558							  (__v8si)(__m256i)			\
12559							  _mm256_setzero_si256 (),		\
12560	                                                  (__mmask8)-1))
12561	
12562	#define _mm256_mask_shuffle_i32x4(W, U, X, Y, C)                                        \
12563	  ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
12564	                                                  (__v8si)(__m256i)(Y), (int)(C),       \
12565	                                                  (__v8si)(__m256i)(W),                 \
12566	                                                  (__mmask8)(U)))
12567	
12568	#define _mm256_maskz_shuffle_i32x4(U, X, Y, C)                                          \
12569	  ((__m256i)  __builtin_ia32_shuf_i32x4_256_mask ((__v8si)(__m256i)(X),                 \
12570	                                                  (__v8si)(__m256i)(Y), (int)(C),       \
12571							  (__v8si)(__m256i)			\
12572							  _mm256_setzero_si256 (),		\
12573	                                                  (__mmask8)(U)))
12574	
12575	#define _mm256_shuffle_f64x2(X, Y, C)                                                   \
12576	  ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
12577	                                                  (__v4df)(__m256d)(Y), (int)(C),       \
12578							  (__v4df)(__m256d)_mm256_setzero_pd (),\
12579	                                                  (__mmask8)-1))
12580	
12581	#define _mm256_mask_shuffle_f64x2(W, U, X, Y, C)                                        \
12582	  ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
12583	                                                  (__v4df)(__m256d)(Y), (int)(C),       \
12584	                                                  (__v4df)(__m256d)(W),                 \
12585	                                                  (__mmask8)(U)))
12586	
12587	#define _mm256_maskz_shuffle_f64x2(U, X, Y, C)                                          \
12588	  ((__m256d)  __builtin_ia32_shuf_f64x2_256_mask ((__v4df)(__m256d)(X),                 \
12589	                                                  (__v4df)(__m256d)(Y), (int)(C),       \
12590							  (__v4df)(__m256d)_mm256_setzero_pd( ),\
12591	                                                  (__mmask8)(U)))
12592	
12593	#define _mm256_shuffle_f32x4(X, Y, C)                                                   \
12594	  ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
12595	                                                 (__v8sf)(__m256)(Y), (int)(C),         \
12596							 (__v8sf)(__m256)_mm256_setzero_ps (),  \
12597	                                                 (__mmask8)-1))
12598	
12599	#define _mm256_mask_shuffle_f32x4(W, U, X, Y, C)                                        \
12600	  ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
12601	                                                 (__v8sf)(__m256)(Y), (int)(C),         \
12602	                                                 (__v8sf)(__m256)(W),                   \
12603	                                                 (__mmask8)(U)))
12604	
12605	#define _mm256_maskz_shuffle_f32x4(U, X, Y, C)                                          \
12606	  ((__m256)  __builtin_ia32_shuf_f32x4_256_mask ((__v8sf)(__m256)(X),                   \
12607	                                                 (__v8sf)(__m256)(Y), (int)(C),         \
12608							 (__v8sf)(__m256)_mm256_setzero_ps (),  \
12609	                                                 (__mmask8)(U)))
12610	
12611	#define _mm256_mask_shuffle_pd(W, U, A, B, C)                                   \
12612	  ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A),                \
12613	                                           (__v4df)(__m256d)(B), (int)(C),      \
12614	                                           (__v4df)(__m256d)(W),                \
12615	                                           (__mmask8)(U)))
12616	
12617	#define _mm256_maskz_shuffle_pd(U, A, B, C)                                     \
12618	  ((__m256d)__builtin_ia32_shufpd256_mask ((__v4df)(__m256d)(A),                \
12619	                                           (__v4df)(__m256d)(B), (int)(C),      \
12620						   (__v4df)(__m256d)			\
12621						   _mm256_setzero_pd (),		\
12622	                                           (__mmask8)(U)))
12623	
12624	#define _mm_mask_shuffle_pd(W, U, A, B, C)                                      \
12625	  ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A),                \
12626	                                           (__v2df)(__m128d)(B), (int)(C),      \
12627	                                           (__v2df)(__m128d)(W),                \
12628	                                           (__mmask8)(U)))
12629	
12630	#define _mm_maskz_shuffle_pd(U, A, B, C)                                        \
12631	  ((__m128d)__builtin_ia32_shufpd128_mask ((__v2df)(__m128d)(A),                \
12632	                                           (__v2df)(__m128d)(B), (int)(C),      \
12633						   (__v2df)(__m128d)_mm_setzero_pd (),  \
12634	                                           (__mmask8)(U)))
12635	
12636	#define _mm256_mask_shuffle_ps(W, U, A, B, C)                                   \
12637	  ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A),                 \
12638	                                           (__v8sf)(__m256)(B), (int)(C),       \
12639	                                           (__v8sf)(__m256)(W),                 \
12640	                                           (__mmask8)(U)))
12641	
12642	#define _mm256_maskz_shuffle_ps(U, A, B, C)                                     \
12643	  ((__m256) __builtin_ia32_shufps256_mask ((__v8sf)(__m256)(A),                 \
12644	                                           (__v8sf)(__m256)(B), (int)(C),       \
12645						   (__v8sf)(__m256)_mm256_setzero_ps (),\
12646	                                           (__mmask8)(U)))
12647	
12648	#define _mm_mask_shuffle_ps(W, U, A, B, C)                                      \
12649	  ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A),                 \
12650	                                           (__v4sf)(__m128)(B), (int)(C),       \
12651	                                           (__v4sf)(__m128)(W),                 \
12652	                                           (__mmask8)(U)))
12653	
12654	#define _mm_maskz_shuffle_ps(U, A, B, C)                                        \
12655	  ((__m128) __builtin_ia32_shufps128_mask ((__v4sf)(__m128)(A),                 \
12656	                                           (__v4sf)(__m128)(B), (int)(C),       \
12657						   (__v4sf)(__m128)_mm_setzero_ps (),   \
12658	                                           (__mmask8)(U)))
12659	
12660	#define _mm256_fixupimm_pd(X, Y, Z, C)                                          \
12661	  ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X),		\
12662						       (__v4df)(__m256d)(Y),		\
12663						       (__v4di)(__m256i)(Z), (int)(C),	\
12664						       (__mmask8)(-1)))
12665	
12666	#define _mm256_mask_fixupimm_pd(X, U, Y, Z, C)                                  \
12667	   ((__m256d)__builtin_ia32_fixupimmpd256_mask ((__v4df)(__m256d)(X),           \
12668							(__v4df)(__m256d)(Y),           \
12669							(__v4di)(__m256i)(Z), (int)(C), \
12670							(__mmask8)(U)))
12671	
12672	#define _mm256_maskz_fixupimm_pd(U, X, Y, Z, C)                                 \
12673	   ((__m256d)__builtin_ia32_fixupimmpd256_maskz ((__v4df)(__m256d)(X),          \
12674							 (__v4df)(__m256d)(Y),          \
12675							 (__v4di)(__m256i)(Z), (int)(C),\
12676							 (__mmask8)(U)))
12677	
12678	#define _mm256_fixupimm_ps(X, Y, Z, C)						\
12679	  ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X),		\
12680						      (__v8sf)(__m256)(Y),		\
12681						      (__v8si)(__m256i)(Z), (int)(C),	\
12682						      (__mmask8)(-1)))
12683	
12684	
12685	#define _mm256_mask_fixupimm_ps(X, U, Y, Z, C)                                  \
12686	    ((__m256)__builtin_ia32_fixupimmps256_mask ((__v8sf)(__m256)(X),            \
12687							(__v8sf)(__m256)(Y),            \
12688							(__v8si)(__m256i)(Z), (int)(C), \
12689							(__mmask8)(U)))
12690	
12691	#define _mm256_maskz_fixupimm_ps(U, X, Y, Z, C)                                 \
12692	    ((__m256)__builtin_ia32_fixupimmps256_maskz ((__v8sf)(__m256)(X),           \
12693							 (__v8sf)(__m256)(Y),           \
12694							 (__v8si)(__m256i)(Z), (int)(C),\
12695							 (__mmask8)(U)))
12696	
12697	#define _mm_fixupimm_pd(X, Y, Z, C)						\
12698	  ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X),		\
12699						       (__v2df)(__m128d)(Y),		\
12700						       (__v2di)(__m128i)(Z), (int)(C), 	\
12701						       (__mmask8)(-1)))
12702	
12703	
12704	#define _mm_mask_fixupimm_pd(X, U, Y, Z, C)                                       \
12705	     ((__m128d)__builtin_ia32_fixupimmpd128_mask ((__v2df)(__m128d)(X),           \
12706							  (__v2df)(__m128d)(Y),           \
12707							  (__v2di)(__m128i)(Z), (int)(C), \
12708							  (__mmask8)(U)))
12709	
12710	#define _mm_maskz_fixupimm_pd(U, X, Y, Z, C)                                      \
12711	     ((__m128d)__builtin_ia32_fixupimmpd128_maskz ((__v2df)(__m128d)(X),          \
12712							   (__v2df)(__m128d)(Y),          \
12713							   (__v2di)(__m128i)(Z), (int)(C),\
12714							   (__mmask8)(U)))
12715	
12716	#define _mm_fixupimm_ps(X, Y, Z, C)						\
12717	   ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X),		\
12718						       (__v4sf)(__m128)(Y),		\
12719						       (__v4si)(__m128i)(Z), (int)(C), 	\
12720						       (__mmask8)(-1)))
12721	
12722	#define _mm_mask_fixupimm_ps(X, U, Y, Z, C)                                      \
12723	      ((__m128)__builtin_ia32_fixupimmps128_mask ((__v4sf)(__m128)(X),           \
12724							  (__v4sf)(__m128)(Y),           \
12725							  (__v4si)(__m128i)(Z), (int)(C),\
12726							  (__mmask8)(U)))
12727	
12728	#define _mm_maskz_fixupimm_ps(U, X, Y, Z, C)                                      \
12729	      ((__m128)__builtin_ia32_fixupimmps128_maskz ((__v4sf)(__m128)(X),           \
12730							   (__v4sf)(__m128)(Y),           \
12731							   (__v4si)(__m128i)(Z), (int)(C),\
12732							   (__mmask8)(U)))
12733	
12734	#define _mm256_mask_srli_epi32(W, U, A, B)				\
12735	  ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A),	\
12736	    (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
12737	
12738	#define _mm256_maskz_srli_epi32(U, A, B)				\
12739	  ((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A),	\
12740	    (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
12741	
12742	#define _mm_mask_srli_epi32(W, U, A, B)                                 \
12743	  ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A),       \
12744	    (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
12745	
12746	#define _mm_maskz_srli_epi32(U, A, B)                                   \
12747	  ((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A),       \
12748	    (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
12749	
12750	#define _mm256_mask_srli_epi64(W, U, A, B)				\
12751	  ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A),	\
12752	    (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
12753	
12754	#define _mm256_maskz_srli_epi64(U, A, B)				\
12755	  ((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A),	\
12756	    (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
12757	
12758	#define _mm_mask_srli_epi64(W, U, A, B)                                 \
12759	  ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A),       \
12760	    (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
12761	
12762	#define _mm_maskz_srli_epi64(U, A, B)                                   \
12763	  ((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A),       \
12764	    (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
12765	
12766	#define _mm256_mask_slli_epi32(W, U, X, C)                                \
12767	  ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12768	    (__v8si)(__m256i)(W),						  \
12769	    (__mmask8)(U)))
12770	
12771	#define _mm256_maskz_slli_epi32(U, X, C)                                  \
12772	  ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
12773	    (__v8si)(__m256i)_mm256_setzero_si256 (),				  \
12774	    (__mmask8)(U)))
12775	
12776	#define _mm256_mask_slli_epi64(W, U, X, C)                                \
12777	  ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12778	    (__v4di)(__m256i)(W),						  \
12779	    (__mmask8)(U)))
12780	
12781	#define _mm256_maskz_slli_epi64(U, X, C)                                  \
12782	  ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
12783	    (__v4di)(__m256i)_mm256_setzero_si256 (),				  \
12784	    (__mmask8)(U)))
12785	
12786	#define _mm_mask_slli_epi32(W, U, X, C)					  \
12787	  ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12788	    (__v4si)(__m128i)(W),\
12789	    (__mmask8)(U)))
12790	
12791	#define _mm_maskz_slli_epi32(U, X, C)					  \
12792	  ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
12793	    (__v4si)(__m128i)_mm_setzero_si128 (),\
12794	    (__mmask8)(U)))
12795	
12796	#define _mm_mask_slli_epi64(W, U, X, C)					  \
12797	  ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12798	    (__v2di)(__m128i)(W),\
12799	    (__mmask8)(U)))
12800	
12801	#define _mm_maskz_slli_epi64(U, X, C)					  \
12802	  ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
12803	    (__v2di)(__m128i)_mm_setzero_si128 (),\
12804	    (__mmask8)(U)))
12805	
12806	#define _mm256_ternarylogic_epi64(A, B, C, I)                           \
12807	  ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A),	\
12808	    (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1))
12809	
12810	#define _mm256_mask_ternarylogic_epi64(A, U, B, C, I)			\
12811	  ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A),	\
12812	    (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12813	
12814	#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I)			\
12815	  ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A),	\
12816	    (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U)))
12817	
12818	#define _mm256_ternarylogic_epi32(A, B, C, I)                           \
12819	  ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A),	\
12820	    (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1))
12821	
12822	#define _mm256_mask_ternarylogic_epi32(A, U, B, C, I)                   \
12823	  ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A),	\
12824	    (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12825	
12826	#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I)			\
12827	  ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A),	\
12828	    (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U)))
12829	
12830	#define _mm_ternarylogic_epi64(A, B, C, I)                              \
12831	  ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A),	\
12832	    (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1))
12833	
12834	#define _mm_mask_ternarylogic_epi64(A, U, B, C, I)			\
12835	  ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A),	\
12836	    (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12837	
12838	#define _mm_maskz_ternarylogic_epi64(U, A, B, C, I)			\
12839	  ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A),	\
12840	    (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U)))
12841	
12842	#define _mm_ternarylogic_epi32(A, B, C, I)                              \
12843	  ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A),	\
12844	    (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1))
12845	
12846	#define _mm_mask_ternarylogic_epi32(A, U, B, C, I)			\
12847	  ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A),	\
12848	    (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12849	
12850	#define _mm_maskz_ternarylogic_epi32(U, A, B, C, I)			\
12851	  ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A),	\
12852	    (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U)))
12853	
12854	#define _mm256_roundscale_ps(A, B)				        \
12855	  ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
12856	    (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)-1))
12857	
12858	#define _mm256_mask_roundscale_ps(W, U, A, B)			        \
12859	  ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
12860	    (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U)))
12861	
12862	#define _mm256_maskz_roundscale_ps(U, A, B)			        \
12863	  ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A),    \
12864	    (int)(B), (__v8sf)(__m256)_mm256_setzero_ps (), (__mmask8)(U)))
12865	
12866	#define _mm256_roundscale_pd(A, B)				        \
12867	  ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
12868	    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)-1))
12869	
12870	#define _mm256_mask_roundscale_pd(W, U, A, B)			        \
12871	  ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
12872	    (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
12873	
12874	#define _mm256_maskz_roundscale_pd(U, A, B)			        \
12875	  ((__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df)(__m256d)(A),  \
12876	    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
12877	
12878	#define _mm_roundscale_ps(A, B)					        \
12879	  ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
12880	    (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)-1))
12881	
12882	#define _mm_mask_roundscale_ps(W, U, A, B)			        \
12883	  ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
12884	    (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U)))
12885	
12886	#define _mm_maskz_roundscale_ps(U, A, B)			        \
12887	  ((__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf)(__m128)(A),    \
12888	    (int)(B), (__v4sf)(__m128)_mm_setzero_ps (), (__mmask8)(U)))
12889	
12890	#define _mm_roundscale_pd(A, B)					        \
12891	  ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
12892	    (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)-1))
12893	
12894	#define _mm_mask_roundscale_pd(W, U, A, B)			        \
12895	  ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
12896	    (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U)))
12897	
12898	#define _mm_maskz_roundscale_pd(U, A, B)			        \
12899	  ((__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df)(__m128d)(A),  \
12900	    (int)(B), (__v2df)(__m128d)_mm_setzero_pd (), (__mmask8)(U)))
12901	
12902	#define _mm256_getmant_ps(X, B, C)                                              \
12903	  ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
12904	                                         (int)(((C)<<2) | (B)),                 \
12905						 (__v8sf)(__m256)_mm256_setzero_ps (),  \
12906	                                         (__mmask8)-1))
12907	
12908	#define _mm256_mask_getmant_ps(W, U, X, B, C)                                   \
12909	  ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
12910	                                         (int)(((C)<<2) | (B)),                 \
12911	                                         (__v8sf)(__m256)(W),                   \
12912	                                         (__mmask8)(U)))
12913	
12914	#define _mm256_maskz_getmant_ps(U, X, B, C)                                     \
12915	  ((__m256) __builtin_ia32_getmantps256_mask ((__v8sf)(__m256) (X),             \
12916	                                         (int)(((C)<<2) | (B)),                 \
12917						 (__v8sf)(__m256)_mm256_setzero_ps (),  \
12918	                                         (__mmask8)(U)))
12919	
12920	#define _mm_getmant_ps(X, B, C)                                                 \
12921	  ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
12922	                                         (int)(((C)<<2) | (B)),                 \
12923						 (__v4sf)(__m128)_mm_setzero_ps (),     \
12924	                                         (__mmask8)-1))
12925	
12926	#define _mm_mask_getmant_ps(W, U, X, B, C)                                      \
12927	  ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
12928	                                         (int)(((C)<<2) | (B)),                 \
12929	                                         (__v4sf)(__m128)(W),                   \
12930	                                         (__mmask8)(U)))
12931	
12932	#define _mm_maskz_getmant_ps(U, X, B, C)                                        \
12933	  ((__m128) __builtin_ia32_getmantps128_mask ((__v4sf)(__m128) (X),             \
12934	                                         (int)(((C)<<2) | (B)),                 \
12935						 (__v4sf)(__m128)_mm_setzero_ps (),     \
12936	                                         (__mmask8)(U)))
12937	
12938	#define _mm256_getmant_pd(X, B, C)                                              \
12939	  ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
12940	                                         (int)(((C)<<2) | (B)),                 \
12941						  (__v4df)(__m256d)_mm256_setzero_pd (),\
12942	                                          (__mmask8)-1))
12943	
12944	#define _mm256_mask_getmant_pd(W, U, X, B, C)                                   \
12945	  ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
12946	                                         (int)(((C)<<2) | (B)),                 \
12947	                                          (__v4df)(__m256d)(W),                 \
12948	                                          (__mmask8)(U)))
12949	
12950	#define _mm256_maskz_getmant_pd(U, X, B, C)                                     \
12951	  ((__m256d) __builtin_ia32_getmantpd256_mask ((__v4df)(__m256d) (X),           \
12952	                                         (int)(((C)<<2) | (B)),                 \
12953						  (__v4df)(__m256d)_mm256_setzero_pd (),\
12954	                                          (__mmask8)(U)))
12955	
12956	#define _mm_getmant_pd(X, B, C)                                                 \
12957	  ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
12958	                                         (int)(((C)<<2) | (B)),                 \
12959						  (__v2df)(__m128d)_mm_setzero_pd (),   \
12960	                                          (__mmask8)-1))
12961	
12962	#define _mm_mask_getmant_pd(W, U, X, B, C)                                      \
12963	  ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
12964	                                         (int)(((C)<<2) | (B)),                 \
12965	                                          (__v2df)(__m128d)(W),                 \
12966	                                          (__mmask8)(U)))
12967	
12968	#define _mm_maskz_getmant_pd(U, X, B, C)                                        \
12969	  ((__m128d) __builtin_ia32_getmantpd128_mask ((__v2df)(__m128d) (X),           \
12970	                                         (int)(((C)<<2) | (B)),                 \
12971						  (__v2df)(__m128d)_mm_setzero_pd (),   \
12972	                                          (__mmask8)(U)))
12973	
12974	#define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
12975	  (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD,		\
12976						 (void const *)ADDR,		\
12977						 (__v8si)(__m256i)INDEX,	\
12978						 (__mmask8)MASK, (int)SCALE)
12979	
12980	#define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)		\
12981	  (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD,		\
12982						 (void const *)ADDR,		\
12983						 (__v4si)(__m128i)INDEX,	\
12984						 (__mmask8)MASK, (int)SCALE)
12985	
12986	#define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
12987	  (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD,	\
12988						  (void const *)ADDR,		\
12989						  (__v4si)(__m128i)INDEX,	\
12990						  (__mmask8)MASK, (int)SCALE)
12991	
12992	#define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)		\
12993	  (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD,	\
12994						  (void const *)ADDR,		\
12995						  (__v4si)(__m128i)INDEX,	\
12996						  (__mmask8)MASK, (int)SCALE)
12997	
12998	#define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
12999	  (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD,		\
13000						 (void const *)ADDR,		\
13001						 (__v4di)(__m256i)INDEX,	\
13002						 (__mmask8)MASK, (int)SCALE)
13003	
13004	#define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)		\
13005	  (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD,		\
13006						 (void const *)ADDR,		\
13007						 (__v2di)(__m128i)INDEX,	\
13008						 (__mmask8)MASK, (int)SCALE)
13009	
13010	#define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13011	  (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD,	\
13012						  (void const *)ADDR,		\
13013						  (__v4di)(__m256i)INDEX,	\
13014						  (__mmask8)MASK, (int)SCALE)
13015	
13016	#define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)		\
13017	  (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD,	\
13018						  (void const *)ADDR,		\
13019						  (__v2di)(__m128i)INDEX,	\
13020						  (__mmask8)MASK, (int)SCALE)
13021	
13022	#define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13023	  (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD,	\
13024						  (void const *)ADDR,		\
13025						  (__v8si)(__m256i)INDEX,	\
13026						  (__mmask8)MASK, (int)SCALE)
13027	
13028	#define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13029	  (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD,	\
13030						  (void const *)ADDR,		\
13031						  (__v4si)(__m128i)INDEX,	\
13032						  (__mmask8)MASK, (int)SCALE)
13033	
13034	#define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13035	  (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD,	\
13036						  (void const *)ADDR,		\
13037						  (__v4si)(__m128i)INDEX,	\
13038						  (__mmask8)MASK, (int)SCALE)
13039	
13040	#define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13041	  (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD,	\
13042						  (void const *)ADDR,		\
13043						  (__v4si)(__m128i)INDEX,	\
13044						  (__mmask8)MASK, (int)SCALE)
13045	
13046	#define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13047	  (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD,	\
13048						  (void const *)ADDR,		\
13049						  (__v4di)(__m256i)INDEX,	\
13050						  (__mmask8)MASK, (int)SCALE)
13051	
13052	#define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13053	  (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD,	\
13054						  (void const *)ADDR,		\
13055						  (__v2di)(__m128i)INDEX,	\
13056						  (__mmask8)MASK, (int)SCALE)
13057	
13058	#define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13059	  (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD,	\
13060						  (void const *)ADDR,		\
13061						  (__v4di)(__m256i)INDEX,	\
13062						  (__mmask8)MASK, (int)SCALE)
13063	
13064	#define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
13065	  (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD,	\
13066						  (void const *)ADDR,		\
13067						  (__v2di)(__m128i)INDEX,	\
13068						  (__mmask8)MASK, (int)SCALE)
13069	
13070	#define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE)			\
13071	  __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)0xFF,		\
13072					(__v8si)(__m256i)INDEX,			\
13073					(__v8sf)(__m256)V1, (int)SCALE)
13074	
13075	#define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
13076	  __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)MASK,		\
13077					(__v8si)(__m256i)INDEX,			\
13078					(__v8sf)(__m256)V1, (int)SCALE)
13079	
13080	#define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE)			\
13081	  __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)0xFF,		\
13082					(__v4si)(__m128i)INDEX,			\
13083					(__v4sf)(__m128)V1, (int)SCALE)
13084	
13085	#define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
13086	  __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)MASK,		\
13087					(__v4si)(__m128i)INDEX,			\
13088					(__v4sf)(__m128)V1, (int)SCALE)
13089	
13090	#define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE)			\
13091	  __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)0xFF,		\
13092					(__v4si)(__m128i)INDEX,			\
13093					(__v4df)(__m256d)V1, (int)SCALE)
13094	
13095	#define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
13096	  __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)MASK,		\
13097					(__v4si)(__m128i)INDEX,			\
13098					(__v4df)(__m256d)V1, (int)SCALE)
13099	
13100	#define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE)			\
13101	  __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)0xFF,		\
13102					(__v4si)(__m128i)INDEX,			\
13103					(__v2df)(__m128d)V1, (int)SCALE)
13104	
13105	#define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
13106	  __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)MASK,		\
13107					(__v4si)(__m128i)INDEX,			\
13108					(__v2df)(__m128d)V1, (int)SCALE)
13109	
13110	#define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE)			\
13111	  __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)0xFF,		\
13112					(__v4di)(__m256i)INDEX,			\
13113					(__v4sf)(__m128)V1, (int)SCALE)
13114	
13115	#define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
13116	  __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)MASK,		\
13117					(__v4di)(__m256i)INDEX,			\
13118					(__v4sf)(__m128)V1, (int)SCALE)
13119	
13120	#define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE)			\
13121	  __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)0xFF,		\
13122					(__v2di)(__m128i)INDEX,			\
13123					(__v4sf)(__m128)V1, (int)SCALE)
13124	
13125	#define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
13126	  __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)MASK,		\
13127					(__v2di)(__m128i)INDEX,			\
13128					(__v4sf)(__m128)V1, (int)SCALE)
13129	
13130	#define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE)			\
13131	  __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)0xFF,		\
13132					(__v4di)(__m256i)INDEX,			\
13133					(__v4df)(__m256d)V1, (int)SCALE)
13134	
13135	#define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
13136	  __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)MASK,		\
13137					(__v4di)(__m256i)INDEX,			\
13138					(__v4df)(__m256d)V1, (int)SCALE)
13139	
13140	#define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE)			\
13141	  __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)0xFF,		\
13142					(__v2di)(__m128i)INDEX,			\
13143					(__v2df)(__m128d)V1, (int)SCALE)
13144	
13145	#define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
13146	  __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)MASK,		\
13147					(__v2di)(__m128i)INDEX,			\
13148					(__v2df)(__m128d)V1, (int)SCALE)
13149	
13150	#define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE)			\
13151	  __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)0xFF,		\
13152					(__v8si)(__m256i)INDEX,			\
13153					(__v8si)(__m256i)V1, (int)SCALE)
13154	
13155	#define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
13156	  __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)MASK,		\
13157					(__v8si)(__m256i)INDEX,			\
13158					(__v8si)(__m256i)V1, (int)SCALE)
13159	
13160	#define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE)			\
13161	  __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)0xFF,		\
13162					(__v4si)(__m128i)INDEX,			\
13163					(__v4si)(__m128i)V1, (int)SCALE)
13164	
13165	#define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)		\
13166	  __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)MASK,		\
13167					(__v4si)(__m128i)INDEX,			\
13168					(__v4si)(__m128i)V1, (int)SCALE)
13169	
13170	#define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE)			\
13171	  __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)0xFF,		\
13172					(__v4si)(__m128i)INDEX,			\
13173					(__v4di)(__m256i)V1, (int)SCALE)
13174	
13175	#define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
13176	  __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)MASK,		\
13177					(__v4si)(__m128i)INDEX,			\
13178					(__v4di)(__m256i)V1, (int)SCALE)
13179	
13180	#define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE)			\
13181	  __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)0xFF,		\
13182					(__v4si)(__m128i)INDEX,			\
13183					(__v2di)(__m128i)V1, (int)SCALE)
13184	
13185	#define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)		\
13186	  __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)MASK,		\
13187					(__v4si)(__m128i)INDEX,			\
13188					(__v2di)(__m128i)V1, (int)SCALE)
13189	
13190	#define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE)			\
13191	  __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)0xFF,		\
13192					(__v4di)(__m256i)INDEX,			\
13193					(__v4si)(__m128i)V1, (int)SCALE)
13194	
13195	#define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
13196	  __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)MASK,		\
13197					(__v4di)(__m256i)INDEX,			\
13198					(__v4si)(__m128i)V1, (int)SCALE)
13199	
13200	#define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE)			\
13201	  __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)0xFF,		\
13202					(__v2di)(__m128i)INDEX,			\
13203					(__v4si)(__m128i)V1, (int)SCALE)
13204	
13205	#define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)		\
13206	  __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)MASK,		\
13207					(__v2di)(__m128i)INDEX,			\
13208					(__v4si)(__m128i)V1, (int)SCALE)
13209	
13210	#define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE)			\
13211	  __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)0xFF,		\
13212					(__v4di)(__m256i)INDEX,			\
13213					(__v4di)(__m256i)V1, (int)SCALE)
13214	
13215	#define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
13216	  __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)MASK,		\
13217					(__v4di)(__m256i)INDEX,			\
13218					(__v4di)(__m256i)V1, (int)SCALE)
13219	
13220	#define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE)			\
13221	  __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)0xFF,		\
13222					(__v2di)(__m128i)INDEX,			\
13223					(__v2di)(__m128i)V1, (int)SCALE)
13224	
13225	#define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)		\
13226	  __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)MASK,		\
13227					(__v2di)(__m128i)INDEX,			\
13228					(__v2di)(__m128i)V1, (int)SCALE)
13229	
13230	#define _mm256_mask_shuffle_epi32(W, U, X, C)                                       \
13231	  ((__m256i)  __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C),        \
13232	                                             (__v8si)(__m256i)(W),                  \
13233	                                             (__mmask8)(U)))
13234	
13235	#define _mm256_maskz_shuffle_epi32(U, X, C)                                         \
13236	  ((__m256i)  __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C),        \
13237						     (__v8si)(__m256i)			    \
13238						     _mm256_setzero_si256 (),		    \
13239	                                             (__mmask8)(U)))
13240	
13241	#define _mm_mask_shuffle_epi32(W, U, X, C)                                          \
13242	  ((__m128i)  __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C),        \
13243	                                             (__v4si)(__m128i)(W),                  \
13244	                                             (__mmask8)(U)))
13245	
13246	#define _mm_maskz_shuffle_epi32(U, X, C)                                            \
13247	  ((__m128i)  __builtin_ia32_pshufd128_mask ((__v4si)(__m128i)(X), (int)(C),        \
13248						     (__v4si)(__m128i)_mm_setzero_si128 (), \
13249	                                             (__mmask8)(U)))
13250	
13251	#define _mm256_rol_epi64(A, B)                                                 \
13252	  ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13253	                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
13254	                                          (__mmask8)-1))
13255	
13256	#define _mm256_mask_rol_epi64(W, U, A, B)                                      \
13257	  ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13258	                                          (__v4di)(__m256i)(W),                \
13259	                                          (__mmask8)(U)))
13260	
13261	#define _mm256_maskz_rol_epi64(U, A, B)                                        \
13262	  ((__m256i)__builtin_ia32_prolq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13263	                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
13264	                                          (__mmask8)(U)))
13265	
13266	#define _mm_rol_epi64(A, B)                                                    \
13267	  ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13268						  (__v2di)(__m128i)_mm_setzero_si128 (),\
13269	                                          (__mmask8)-1))
13270	
13271	#define _mm_mask_rol_epi64(W, U, A, B)                                         \
13272	  ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13273	                                          (__v2di)(__m128i)(W),                \
13274	                                          (__mmask8)(U)))
13275	
13276	#define _mm_maskz_rol_epi64(U, A, B)                                           \
13277	  ((__m128i)__builtin_ia32_prolq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13278						  (__v2di)(__m128i)_mm_setzero_si128 (),\
13279	                                          (__mmask8)(U)))
13280	
13281	#define _mm256_ror_epi64(A, B)                                                 \
13282	  ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13283	                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
13284	                                          (__mmask8)-1))
13285	
13286	#define _mm256_mask_ror_epi64(W, U, A, B)                                      \
13287	  ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13288	                                          (__v4di)(__m256i)(W),                \
13289	                                          (__mmask8)(U)))
13290	
13291	#define _mm256_maskz_ror_epi64(U, A, B)                                        \
13292	  ((__m256i)__builtin_ia32_prorq256_mask ((__v4di)(__m256i)(A), (int)(B),      \
13293	                                          (__v4di)(__m256i)_mm256_setzero_si256 (),\
13294	                                          (__mmask8)(U)))
13295	
13296	#define _mm_ror_epi64(A, B)                                                    \
13297	  ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13298						  (__v2di)(__m128i)_mm_setzero_si128 (),\
13299	                                          (__mmask8)-1))
13300	
13301	#define _mm_mask_ror_epi64(W, U, A, B)                                         \
13302	  ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13303	                                          (__v2di)(__m128i)(W),                \
13304	                                          (__mmask8)(U)))
13305	
13306	#define _mm_maskz_ror_epi64(U, A, B)                                           \
13307	  ((__m128i)__builtin_ia32_prorq128_mask ((__v2di)(__m128i)(A), (int)(B),      \
13308						  (__v2di)(__m128i)_mm_setzero_si128 (),\
13309	                                          (__mmask8)(U)))
13310	
13311	#define _mm256_rol_epi32(A, B)                                                 \
13312	  ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13313						  (__v8si)(__m256i)_mm256_setzero_si256 (),\
13314	                                          (__mmask8)-1))
13315	
13316	#define _mm256_mask_rol_epi32(W, U, A, B)                                      \
13317	  ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13318	                                          (__v8si)(__m256i)(W),                \
13319	                                          (__mmask8)(U)))
13320	
13321	#define _mm256_maskz_rol_epi32(U, A, B)                                        \
13322	  ((__m256i)__builtin_ia32_prold256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13323						  (__v8si)(__m256i)_mm256_setzero_si256 (),\
13324	                                          (__mmask8)(U)))
13325	
13326	#define _mm_rol_epi32(A, B)                                                    \
13327	  ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13328						  (__v4si)(__m128i)_mm_setzero_si128 (),\
13329	                                          (__mmask8)-1))
13330	
13331	#define _mm_mask_rol_epi32(W, U, A, B)                                         \
13332	  ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13333	                                          (__v4si)(__m128i)(W),                \
13334	                                          (__mmask8)(U)))
13335	
13336	#define _mm_maskz_rol_epi32(U, A, B)                                           \
13337	  ((__m128i)__builtin_ia32_prold128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13338						  (__v4si)(__m128i)_mm_setzero_si128 (),\
13339	                                          (__mmask8)(U)))
13340	
13341	#define _mm256_ror_epi32(A, B)                                                 \
13342	  ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13343						  (__v8si)(__m256i)_mm256_setzero_si256 (),\
13344	                                          (__mmask8)-1))
13345	
13346	#define _mm256_mask_ror_epi32(W, U, A, B)                                      \
13347	  ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13348	                                          (__v8si)(__m256i)(W),                \
13349	                                          (__mmask8)(U)))
13350	
13351	#define _mm256_maskz_ror_epi32(U, A, B)                                        \
13352	  ((__m256i)__builtin_ia32_prord256_mask ((__v8si)(__m256i)(A), (int)(B),      \
13353						  (__v8si)(__m256i)		       \
13354						  _mm256_setzero_si256 (),	       \
13355	                                          (__mmask8)(U)))
13356	
13357	#define _mm_ror_epi32(A, B)                                                    \
13358	  ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13359						  (__v4si)(__m128i)_mm_setzero_si128 (),\
13360	                                          (__mmask8)-1))
13361	
13362	#define _mm_mask_ror_epi32(W, U, A, B)                                         \
13363	  ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13364	                                          (__v4si)(__m128i)(W),                \
13365	                                          (__mmask8)(U)))
13366	
13367	#define _mm_maskz_ror_epi32(U, A, B)                                           \
13368	  ((__m128i)__builtin_ia32_prord128_mask ((__v4si)(__m128i)(A), (int)(B),      \
13369						  (__v4si)(__m128i)_mm_setzero_si128 (),\
13370	                                          (__mmask8)(U)))
13371	
13372	#define _mm256_alignr_epi32(X, Y, C)                                        \
13373	    ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
13374	        (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(X), (__mmask8)-1))
13375	
13376	#define _mm256_mask_alignr_epi32(W, U, X, Y, C)                             \
13377	    ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
13378	        (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)(W), (__mmask8)(U)))
13379	
13380	#define _mm256_maskz_alignr_epi32(U, X, Y, C)                               \
13381	    ((__m256i)__builtin_ia32_alignd256_mask ((__v8si)(__m256i)(X),          \
13382	        (__v8si)(__m256i)(Y), (int)(C), (__v8si)(__m256i)_mm256_setzero_si256 (),\
13383	        (__mmask8)(U)))
13384	
13385	#define _mm256_alignr_epi64(X, Y, C)                                        \
13386	    ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
13387	        (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(X), (__mmask8)-1))
13388	
13389	#define _mm256_mask_alignr_epi64(W, U, X, Y, C)                             \
13390	    ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
13391	        (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)(W), (__mmask8)(U)))
13392	
13393	#define _mm256_maskz_alignr_epi64(U, X, Y, C)                               \
13394	    ((__m256i)__builtin_ia32_alignq256_mask ((__v4di)(__m256i)(X),          \
13395	        (__v4di)(__m256i)(Y), (int)(C), (__v4di)(__m256i)_mm256_setzero_si256 (),\
13396	        (__mmask8)(U)))
13397	
13398	#define _mm_alignr_epi32(X, Y, C)                                           \
13399	    ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
13400	        (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(X), (__mmask8)-1))
13401	
13402	#define _mm_mask_alignr_epi32(W, U, X, Y, C)                                \
13403	    ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
13404	        (__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)(W), (__mmask8)(U)))
13405	
13406	#define _mm_maskz_alignr_epi32(U, X, Y, C)                                  \
13407	    ((__m128i)__builtin_ia32_alignd128_mask ((__v4si)(__m128i)(X),          \
13408		(__v4si)(__m128i)(Y), (int)(C), (__v4si)(__m128i)_mm_setzero_si128 (),\
13409	        (__mmask8)(U)))
13410	
13411	#define _mm_alignr_epi64(X, Y, C)                                           \
13412	    ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
13413	        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13414	
13415	#define _mm_mask_alignr_epi64(W, U, X, Y, C)                                \
13416	    ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
13417	        (__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)(X), (__mmask8)-1))
13418	
13419	#define _mm_maskz_alignr_epi64(U, X, Y, C)                                  \
13420	    ((__m128i)__builtin_ia32_alignq128_mask ((__v2di)(__m128i)(X),          \
13421		(__v2di)(__m128i)(Y), (int)(C), (__v2di)(__m128i)_mm_setzero_si128 (),\
13422	        (__mmask8)(U)))
13423	
13424	#define _mm_mask_cvtps_ph(W, U, A, I)						\
13425	  ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I),      \
13426	      (__v8hi)(__m128i) (W), (__mmask8) (U)))
13427	
13428	#define _mm_maskz_cvtps_ph(U, A, I)						\
13429	  ((__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf)(__m128) A, (int) (I),      \
13430	      (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
13431	
13432	#define _mm256_mask_cvtps_ph(W, U, A, I)					\
13433	  ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I),	\
13434	      (__v8hi)(__m128i) (W), (__mmask8) (U)))
13435	
13436	#define _mm256_maskz_cvtps_ph(U, A, I)						\
13437	  ((__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf)(__m256) A, (int) (I),   \
13438	      (__v8hi)(__m128i) _mm_setzero_si128 (), (__mmask8) (U)))
13439	
13440	#define _mm256_mask_srai_epi32(W, U, A, B)				\
13441	  ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A),	\
13442	    (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
13443	
13444	#define _mm256_maskz_srai_epi32(U, A, B)				\
13445	  ((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A),	\
13446	    (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
13447	
13448	#define _mm_mask_srai_epi32(W, U, A, B)                                 \
13449	  ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A),       \
13450	    (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
13451	
13452	#define _mm_maskz_srai_epi32(U, A, B)                                   \
13453	  ((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A),       \
13454	    (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
13455	
13456	#define _mm256_srai_epi64(A, B)						\
13457	  ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),	\
13458	    (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
13459	
13460	#define _mm256_mask_srai_epi64(W, U, A, B)				\
13461	  ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),	\
13462	    (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
13463	
13464	#define _mm256_maskz_srai_epi64(U, A, B)				\
13465	  ((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A),	\
13466	    (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
13467	
13468	#define _mm_srai_epi64(A, B)						\
13469	  ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
13470	    (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)-1))
13471	
13472	#define _mm_mask_srai_epi64(W, U, A, B)                                 \
13473	  ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
13474	    (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
13475	
13476	#define _mm_maskz_srai_epi64(U, A, B)                                   \
13477	  ((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A),       \
13478	    (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
13479	
13480	#define _mm256_mask_permutex_pd(W, U, A, B)                             \
13481	  ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A),       \
13482	    (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U)))
13483	
13484	#define _mm256_maskz_permutex_pd(U, A, B)				\
13485	  ((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A),       \
13486	    (int)(B), (__v4df)(__m256d)_mm256_setzero_pd (), (__mmask8)(U)))
13487	
13488	#define _mm256_mask_permute_pd(W, U, X, C)					    \
13489	  ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C),	    \
13490						      (__v4df)(__m256d)(W),		    \
13491						      (__mmask8)(U)))
13492	
13493	#define _mm256_maskz_permute_pd(U, X, C)					    \
13494	  ((__m256d) __builtin_ia32_vpermilpd256_mask ((__v4df)(__m256d)(X), (int)(C),	    \
13495						      (__v4df)(__m256d)_mm256_setzero_pd (),\
13496						      (__mmask8)(U)))
13497	
13498	#define _mm256_mask_permute_ps(W, U, X, C)					    \
13499	  ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C),	    \
13500						      (__v8sf)(__m256)(W), (__mmask8)(U)))
13501	
13502	#define _mm256_maskz_permute_ps(U, X, C)					    \
13503	  ((__m256) __builtin_ia32_vpermilps256_mask ((__v8sf)(__m256)(X), (int)(C),	    \
13504						      (__v8sf)(__m256)_mm256_setzero_ps (), \
13505						      (__mmask8)(U)))
13506	
13507	#define _mm_mask_permute_pd(W, U, X, C)						    \
13508	  ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C),	    \
13509						    (__v2df)(__m128d)(W), (__mmask8)(U)))
13510	
13511	#define _mm_maskz_permute_pd(U, X, C)						    \
13512	  ((__m128d) __builtin_ia32_vpermilpd_mask ((__v2df)(__m128d)(X), (int)(C),	    \
13513						    (__v2df)(__m128d)_mm_setzero_pd (),	    \
13514						    (__mmask8)(U)))
13515	
13516	#define _mm_mask_permute_ps(W, U, X, C)						    \
13517	  ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C),	    \
13518						  (__v4sf)(__m128)(W), (__mmask8)(U)))
13519	
13520	#define _mm_maskz_permute_ps(U, X, C)						    \
13521	  ((__m128) __builtin_ia32_vpermilps_mask ((__v4sf)(__m128)(X), (int)(C),	    \
13522						  (__v4sf)(__m128)_mm_setzero_ps (),	    \
13523						  (__mmask8)(U)))
13524	
13525	#define _mm256_mask_blend_pd(__U, __A, __W)			      \
13526	  ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A),	      \
13527							     (__v4df) (__W),  \
13528							     (__mmask8) (__U)))
13529	
13530	#define _mm256_mask_blend_ps(__U, __A, __W)			      \
13531	  ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A),	      \
13532							    (__v8sf) (__W),   \
13533							    (__mmask8) (__U)))
13534	
13535	#define _mm256_mask_blend_epi64(__U, __A, __W)			      \
13536	  ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A),	      \
13537							    (__v4di) (__W),   \
13538							    (__mmask8) (__U)))
13539	
13540	#define _mm256_mask_blend_epi32(__U, __A, __W)			      \
13541	  ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A),	      \
13542							    (__v8si) (__W),   \
13543							    (__mmask8) (__U)))
13544	
13545	#define _mm_mask_blend_pd(__U, __A, __W)			      \
13546	  ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A),	      \
13547							     (__v2df) (__W),  \
13548							     (__mmask8) (__U)))
13549	
13550	#define _mm_mask_blend_ps(__U, __A, __W)			      \
13551	  ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A),	      \
13552							    (__v4sf) (__W),   \
13553							    (__mmask8) (__U)))
13554	
13555	#define _mm_mask_blend_epi64(__U, __A, __W)			      \
13556	  ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A),	      \
13557							    (__v2di) (__W),   \
13558							    (__mmask8) (__U)))
13559	
13560	#define _mm_mask_blend_epi32(__U, __A, __W)			      \
13561	  ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A),	      \
13562							    (__v4si) (__W),   \
13563							    (__mmask8) (__U)))
13564	
13565	#define _mm256_cmp_epu32_mask(X, Y, P)					\
13566	  ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X),	\
13567						    (__v8si)(__m256i)(Y), (int)(P),\
13568						    (__mmask8)-1))
13569	
13570	#define _mm256_cmp_epi64_mask(X, Y, P)					\
13571	  ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X),	\
13572						   (__v4di)(__m256i)(Y), (int)(P),\
13573						   (__mmask8)-1))
13574	
13575	#define _mm256_cmp_epi32_mask(X, Y, P)					\
13576	  ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X),	\
13577						   (__v8si)(__m256i)(Y), (int)(P),\
13578						   (__mmask8)-1))
13579	
13580	#define _mm256_cmp_epu64_mask(X, Y, P)					\
13581	  ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X),	\
13582						    (__v4di)(__m256i)(Y), (int)(P),\
13583						    (__mmask8)-1))
13584	
13585	#define _mm256_cmp_pd_mask(X, Y, P)					\
13586	  ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X),	\
13587						    (__v4df)(__m256d)(Y), (int)(P),\
13588						    (__mmask8)-1))
13589	
13590	#define _mm256_cmp_ps_mask(X, Y, P)					\
13591	  ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X),	\
13592						     (__v8sf)(__m256)(Y), (int)(P),\
13593						     (__mmask8)-1))
13594	
13595	#define _mm256_mask_cmp_epi64_mask(M, X, Y, P)				\
13596	  ((__mmask8) __builtin_ia32_cmpq256_mask ((__v4di)(__m256i)(X),	\
13597						   (__v4di)(__m256i)(Y), (int)(P),\
13598						   (__mmask8)(M)))
13599	
13600	#define _mm256_mask_cmp_epi32_mask(M, X, Y, P)				\
13601	  ((__mmask8) __builtin_ia32_cmpd256_mask ((__v8si)(__m256i)(X),	\
13602						   (__v8si)(__m256i)(Y), (int)(P),\
13603						   (__mmask8)(M)))
13604	
13605	#define _mm256_mask_cmp_epu64_mask(M, X, Y, P)				\
13606	  ((__mmask8) __builtin_ia32_ucmpq256_mask ((__v4di)(__m256i)(X),	\
13607						    (__v4di)(__m256i)(Y), (int)(P),\
13608						    (__mmask8)(M)))
13609	
13610	#define _mm256_mask_cmp_epu32_mask(M, X, Y, P)				\
13611	  ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X),	\
13612						    (__v8si)(__m256i)(Y), (int)(P),\
13613						    (__mmask8)(M)))
13614	
13615	#define _mm256_mask_cmp_pd_mask(M, X, Y, P)				\
13616	  ((__mmask8) __builtin_ia32_cmppd256_mask ((__v4df)(__m256d)(X),	\
13617						    (__v4df)(__m256d)(Y), (int)(P),\
13618						    (__mmask8)(M)))
13619	
13620	#define _mm256_mask_cmp_ps_mask(M, X, Y, P)				\
13621	  ((__mmask8) __builtin_ia32_cmpps256_mask ((__v8sf)(__m256)(X),	\
13622						     (__v8sf)(__m256)(Y), (int)(P),\
13623						     (__mmask8)(M)))
13624	
13625	#define _mm_cmp_epi64_mask(X, Y, P)					\
13626	  ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X),	\
13627						   (__v2di)(__m128i)(Y), (int)(P),\
13628						   (__mmask8)-1))
13629	
13630	#define _mm_cmp_epi32_mask(X, Y, P)					\
13631	  ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X),	\
13632						   (__v4si)(__m128i)(Y), (int)(P),\
13633						   (__mmask8)-1))
13634	
13635	#define _mm_cmp_epu64_mask(X, Y, P)					\
13636	  ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X),	\
13637						    (__v2di)(__m128i)(Y), (int)(P),\
13638						    (__mmask8)-1))
13639	
13640	#define _mm_cmp_epu32_mask(X, Y, P)					\
13641	  ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X),	\
13642						    (__v4si)(__m128i)(Y), (int)(P),\
13643						    (__mmask8)-1))
13644	
13645	#define _mm_cmp_pd_mask(X, Y, P)					\
13646	  ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X),	\
13647						    (__v2df)(__m128d)(Y), (int)(P),\
13648						    (__mmask8)-1))
13649	
13650	#define _mm_cmp_ps_mask(X, Y, P)					\
13651	  ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X),	\
13652						     (__v4sf)(__m128)(Y), (int)(P),\
13653						     (__mmask8)-1))
13654	
13655	#define _mm_mask_cmp_epi64_mask(M, X, Y, P)				\
13656	  ((__mmask8) __builtin_ia32_cmpq128_mask ((__v2di)(__m128i)(X),	\
13657						   (__v2di)(__m128i)(Y), (int)(P),\
13658						   (__mmask8)(M)))
13659	
13660	#define _mm_mask_cmp_epi32_mask(M, X, Y, P)				\
13661	  ((__mmask8) __builtin_ia32_cmpd128_mask ((__v4si)(__m128i)(X),	\
13662						   (__v4si)(__m128i)(Y), (int)(P),\
13663						   (__mmask8)(M)))
13664	
13665	#define _mm_mask_cmp_epu64_mask(M, X, Y, P)				\
13666	  ((__mmask8) __builtin_ia32_ucmpq128_mask ((__v2di)(__m128i)(X),	\
13667						    (__v2di)(__m128i)(Y), (int)(P),\
13668						    (__mmask8)(M)))
13669	
13670	#define _mm_mask_cmp_epu32_mask(M, X, Y, P)				\
13671	  ((__mmask8) __builtin_ia32_ucmpd128_mask ((__v4si)(__m128i)(X),	\
13672						    (__v4si)(__m128i)(Y), (int)(P),\
13673						    (__mmask8)(M)))
13674	
13675	#define _mm_mask_cmp_pd_mask(M, X, Y, P)				\
13676	  ((__mmask8) __builtin_ia32_cmppd128_mask ((__v2df)(__m128d)(X),	\
13677						    (__v2df)(__m128d)(Y), (int)(P),\
13678						    (__mmask8)(M)))
13679	
13680	#define _mm_mask_cmp_ps_mask(M, X, Y, P)				\
13681	  ((__mmask8) __builtin_ia32_cmpps128_mask ((__v4sf)(__m128)(X),	\
13682						     (__v4sf)(__m128)(Y), (int)(P),\
13683						     (__mmask8)(M)))
13684	
13685	#endif
13686	
13687	#define _mm256_permutexvar_ps(A, B)	_mm256_permutevar8x32_ps ((B), (A))
13688	
13689	#ifdef __DISABLE_AVX512VL__
13690	#undef __DISABLE_AVX512VL__
13691	#pragma GCC pop_options
13692	#endif /* __DISABLE_AVX512VL__ */
13693	
13694	#endif /* _AVX512VLINTRIN_H_INCLUDED */
13695