瀏覽代碼

intial pub.xmmintrin module

Dave Camp 9 年之前
父節點
當前提交
2f2367364e

+ 0 - 150
intrinsics.mod/intrinsics.bmx

@@ -1,150 +0,0 @@
-'  Copyright (C) 2016 Bruce A Henderson
-'
-'  This software is provided 'as-is', without any express or implied
-'  warranty.  In no event will the authors be held liable for any damages
-'  arising from the use of this software.
-'
-'  Permission is granted to anyone to use this software for any purpose,
-'  including commercial applications, and to alter it and redistribute it
-'  freely, subject to the following restrictions:
-'
-'  1. The origin of this software must not be misrepresented; you must not
-'     claim that you wrote the original software. If you use this software
-'     in a product, an acknowledgment in the product documentation would be
-'     appreciated but is not required.
-'  2. Altered source versions must be plainly marked as such, and must not be
-'     misrepresented as being the original software.
-'  3. This notice may not be removed or altered from any source distribution.
-'
-SuperStrict
-
-Rem
-bbdoc: x64 Intrinsics
-End Rem
-Module Pub.Intrinsics
-
-ModuleInfo "Version: 1.00"
-ModuleInfo "Author: Bruce A Henderson"
-ModuleInfo "License: zlib/libpng"
-ModuleInfo "Copyright: Bruce A Henderson"
-
-?x64
-
-Extern
-
-
-	Function _mm_set_epi32:Int128(e3:Int, e2:Int, e1:Int, e0:Int) = "_mm_set_epi32"
-
-
-
-
-
-	' SSE2
-	Function _mm_add_pd:Double128(a:Double128, b:Double128) = "_mm_add_pd"
-	Function _mm_add_sd:Double128(a:Double128, b:Double128) = "_mm_add_sd"
-	Function _mm_and_pd:Double128(a:Double128, b:Double128) = "_mm_and_pd"
-	Function _mm_andnot_pd:Double128(a:Double128, b:Double128) = "_mm_andnot_pd"
-	Function _mm_castpd_ps:Float128(a:Double128) = "_mm_castpd_ps"
-	Function _mm_castpd_si128:Int128(a:Double128) = "_mm_castpd_si128"
-	Function _mm_castps_pd:Double128(a:Float128) = "_mm_castps_pd"
-	Function _mm_castsi128_pd:Double128(a:Int128) = "_mm_castsi128_pd"
-	Function _mm_cmpeq_pd:Double128(a:Double128, b:Double128) = "_mm_cmpeq_pd"
-	Function _mm_cmpeq_sd:Double128(a:Double128, b:Double128) = "_mm_cmpeq_sd"
-	Function _mm_cmpge_pd:Double128(a:Double128, b:Double128) = "_mm_cmpge_pd"
-	Function _mm_cmpge_sd:Double128(a:Double128, b:Double128) = "_mm_cmpge_sd"
-	Function _mm_cmpgt_pd:Double128(a:Double128, b:Double128) = "_mm_cmpgt_pd"
-	Function _mm_cmpgt_sd:Double128(a:Double128, b:Double128) = "_mm_cmpgt_sd"
-	Function _mm_cmple_pd:Double128(a:Double128, b:Double128) = "_mm_cmple_pd"
-	Function _mm_cmple_sd:Double128(a:Double128, b:Double128) = "_mm_cmple_sd"
-	Function _mm_cmplt_pd:Double128(a:Double128, b:Double128) = "_mm_cmplt_pd"
-	Function _mm_cmplt_sd:Double128(a:Double128, b:Double128) = "_mm_cmplt_sd"
-	Function _mm_cmpneq_pd:Double128(a:Double128, b:Double128) = "_mm_cmpneq_pd"
-	Function _mm_cmpneq_sd:Double128(a:Double128, b:Double128) = "_mm_cmpneq_sd"
-	Function _mm_cmpnge_pd:Double128(a:Double128, b:Double128) = "_mm_cmpnge_pd"
-	Function _mm_cmpnge_sd:Double128(a:Double128, b:Double128) = "_mm_cmpnge_sd"
-	Function _mm_cmpngt_pd:Double128(a:Double128, b:Double128) = "_mm_cmpngt_pd"
-	Function _mm_cmpngt_sd:Double128(a:Double128, b:Double128) = "_mm_cmpngt_sd"
-	Function _mm_cmpnle_pd:Double128(a:Double128, b:Double128) = "_mm_cmpnle_pd"
-	Function _mm_cmpnle_sd:Double128(a:Double128, b:Double128) = "_mm_cmpnle_sd"
-	Function _mm_cmpnlt_pd:Double128(a:Double128, b:Double128) = "_mm_cmpnlt_pd"
-	Function _mm_cmpnlt_sd:Double128(a:Double128, b:Double128) = "_mm_cmpnlt_sd"
-	Function _mm_cmpord_pd:Double128(a:Double128, b:Double128) = "_mm_cmpord_pd"
-	Function _mm_cmpord_sd:Double128(a:Double128, b:Double128) = "_mm_cmpord_sd"
-	Function _mm_cmpunord_pd:Double128(a:Double128, b:Double128) = "_mm_cmpunord_pd"
-	Function _mm_cmpunord_sd:Double128(a:Double128, b:Double128) = "_mm_cmpunord_sd"
-	Function _mm_comieq_sd:Int(a:Double128, b:Double128) = "_mm_comieq_sd"
-	Function _mm_comige_sd:Int(a:Double128, b:Double128) = "_mm_comige_sd"
-	Function _mm_comigt_sd:Int(a:Double128, b:Double128) = "_mm_comigt_sd"
-	Function _mm_comile_sd:Int(a:Double128, b:Double128) = "_mm_comile_sd"
-	Function _mm_comilt_sd:Int(a:Double128, b:Double128) = "_mm_comilt_sd"
-	Function _mm_comineq_sd:Int(a:Double128, b:Double128) = "_mm_comineq_sd"
-	Function _mm_cvtepi32_pd:Double128(a:Int128) = "_mm_cvtepi32_pd"
-	Function _mm_cvtpd_epi32:Int128(a:Double128) = "_mm_cvtpd_epi32"
-	Function _mm_cvtpd_ps:Float128(a:Double128) = "_mm_cvtpd_ps"
-	Function _mm_cvtps_pd:Double128(a:Float128) = "_mm_cvtps_pd"
-	Function _mm_cvtsd_f64:Double(a:Double128) = "_mm_cvtsd_f64"
-	Function _mm_cvtsd_si32:Int(a:Double128) = "_mm_cvtsd_si32"
-	Function _mm_cvtsd_si64:Long(a:Double128) = "_mm_cvtsd_si64"
-	Function _mm_cvtsd_si64x:Long(a:Double128) = "_mm_cvtsd_si64x"
-	Function _mm_cvtsd_ss:Float128(a:Float128, b:Double128) = "_mm_cvtsd_ss"
-	Function _mm_cvtsi32_sd:Double128(a:Double128, b:Int) = "_mm_cvtsi32_sd"
-	Function _mm_cvtsi64_sd:Double128(a:Double128, b:Long) = "_mm_cvtsi64_sd"
-	Function _mm_cvtsi64x_sd:Double128(a:Double128, b:Long) = "_mm_cvtsi64x_sd"
-	Function _mm_cvtss_sd:Double128(a:Double128, b:Float128) = "_mm_cvtss_sd"
-	Function _mm_cvttpd_epi32:Int128(a:Double128) = "_mm_cvttpd_epi32"
-	Function _mm_cvttsd_si32:Int(a:Double128) = "_mm_cvttsd_si32"
-	Function _mm_cvttsd_si64:Long(a:Double128) = "_mm_cvttsd_si64"
-	Function _mm_cvttsd_si64x:Long(a:Double128) = "_mm_cvttsd_si64x"
-	Function _mm_div_pd:Double128(a:Double128, b:Double128) = "_mm_div_pd"
-	Function _mm_div_sd:Double128(a:Double128, b:Double128) = "_mm_div_sd"
-	Function _mm_load_pd:Double128(mem_addr:Double Ptr) = "_mm_load_pd"
-	Function _mm_load_pd1:Double128(mem_addr:Double Ptr) = "_mm_load_pd1"
-	Function _mm_load_sd:Double128(mem_addr:Double Ptr) = "_mm_load_sd"
-	Function _mm_load1_pd:Double128(mem_addr:Double Ptr) = "_mm_load1_pd"
-	Function _mm_loadh_pd:Double128(a:Double128, mem_addr:Double Ptr) = "_mm_loadh_pd"
-	Function _mm_loadl_pd:Double128(a:Double128, mem_addr:Double Ptr) = "_mm_loadl_pd"
-	Function _mm_loadr_pd:Double128(mem_addr:Double Ptr) = "_mm_loadr_pd"
-	Function _mm_loadu_pd:Double128(mem_addr:Double Ptr) = "_mm_loadu_pd"
-	Function _mm_max_pd:Double128(a:Double128, b:Double128) = "_mm_max_pd"
-	Function _mm_max_sd:Double128(a:Double128, b:Double128) = "_mm_max_sd"
-	Function _mm_min_pd:Double128(a:Double128, b:Double128) = "_mm_min_pd"
-	Function _mm_min_sd:Double128(a:Double128, b:Double128) = "_mm_min_sd"
-	Function _mm_move_sd:Double128(a:Double128, b:Double128) = "_mm_move_sd"
-	Function _mm_movemask_pd:Int(a:Double128) = "_mm_movemask_pd"
-	Function _mm_mul_pd:Double128(a:Double128, b:Double128) = "_mm_mul_pd"
-	Function _mm_mul_sd:Double128(a:Double128, b:Double128) = "_mm_mul_sd"
-	Function _mm_or_pd:Double128(a:Double128, b:Double128) = "_mm_or_pd"
-	Function _mm_set_pd:Double128(e1:Double, e0:Double) = "_mm_set_pd"
-	Function _mm_set_pd1:Double128(a:Double) = "_mm_set_pd1"
-	Function _mm_set_sd:Double128(a:Double) = "_mm_set_sd"
-	Function _mm_set1_pd:Double128(a:Double) = "_mm_set1_pd"
-	Function _mm_setr_pd:Double128(e1:Double, e0:Double) = "_mm_setr_pd"
-	Function _mm_setzero_pd:Double128() = "_mm_setzero_pd"
-	Function _mm_shuffle_pd:Double128(a:Double128, b:Double128, imm8:Int) = "_mm_shuffle_pd"
-	Function _mm_sqrt_pd:Double128(a:Double128) = "_mm_sqrt_pd"
-	Function _mm_sqrt_sd:Double128(a:Double128, b:Double128) = "_mm_sqrt_sd"
-	Function _mm_store_pd(mem_addr:Double Ptr, a:Double128) = "_mm_store_pd"
-	Function _mm_store_pd1(mem_addr:Double Ptr, a:Double128) = "_mm_store_pd1"
-	Function _mm_store_sd(mem_addr:Double Ptr, a:Double128) = "_mm_store_sd"
-	Function _mm_store1_pd(mem_addr:Double Ptr, a:Double128) = "_mm_store1_pd"
-	Function _mm_storeh_pd(mem_addr:Double Ptr, a:Double128) = "_mm_storeh_pd"
-	Function _mm_storel_pd(mem_addr:Double Ptr, a:Double128) = "_mm_storel_pd"
-	Function _mm_storer_pd(mem_addr:Double Ptr, a:Double128) = "_mm_storer_pd"
-	Function _mm_storeu_pd(mem_addr:Double Ptr, a:Double128) = "_mm_storeu_pd"
-	Function _mm_stream_pd(mem_addr:Double Ptr, a:Double128) = "_mm_stream_pd"
-	Function _mm_sub_pd:Double128(a:Double128, b:Double128) = "_mm_sub_pd"
-	Function _mm_sub_sd:Double128(a:Double128, b:Double128) = "_mm_sub_sd"
-	Function _mm_ucomieq_sd:Int(a:Double128, b:Double128) = "_mm_ucomieq_sd"
-	Function _mm_ucomige_sd:Int(a:Double128, b:Double128) = "_mm_ucomige_sd"
-	Function _mm_ucomigt_sd:Int(a:Double128, b:Double128) = "_mm_ucomigt_sd"
-	Function _mm_ucomile_sd:Int(a:Double128, b:Double128) = "_mm_ucomile_sd"
-	Function _mm_ucomilt_sd:Int(a:Double128, b:Double128) = "_mm_ucomilt_sd"
-	Function _mm_ucomineq_sd:Int(a:Double128, b:Double128) = "_mm_ucomineq_sd"
-	Function _mm_unpackhi_pd:Double128(a:Double128, b:Double128) = "_mm_unpackhi_pd"
-	Function _mm_unpacklo_pd:Double128(a:Double128, b:Double128) = "_mm_unpacklo_pd"
-	Function _mm_xor_pd:Double128(a:Double128, b:Double128) = "_mm_xor_pd"
-
-
-End Extern
-
-?

+ 0 - 9
intrinsics.mod/intrinsics.x

@@ -1,9 +0,0 @@
-void _mm_load_pd(double *)!
-void _mm_load1_pd(double *)!
-void _mm_load_sd(double *)!
-void _mm_load_pd1(double *)!
-void _mm_loadh_pd(__m128d, double *)!
-void _mm_loadl_pd(__m128d , double *)!
-void _mm_loadr_pd(double *)!
-void _mm_loadu_pd(double *)!
-BBDOUBLE128 _mm_shuffle_pd(BBDOUBLE128 ,BBDOUBLE128 ,BBINT )!

+ 108 - 0
xmmintrin.mod/sse.bmx

@@ -0,0 +1,108 @@
+SuperStrict
+Extern
+	Function _mm_add_ps:Float128(a:Float128,b:Float128)="_mm_add_ps"
+	Function _mm_add_ss:Float128(a:Float128,b:Float128)="_mm_add_ss"
+	Function _mm_and_ps:Float128(a:Float128,b:Float128)="_mm_and_ps"
+	Function _mm_andnot_ps:Float128(a:Float128,b:Float128)="_mm_andnot_ps"
+	Function _mm_cmpeq_ps:Float128(a:Float128,b:Float128)="_mm_cmpeq_ps"
+	Function _mm_cmpeq_ss:Float128(a:Float128,b:Float128)="_mm_cmpeq_ss"
+	Function _mm_cmpge_ps:Float128(a:Float128,b:Float128)="_mm_cmpge_ps"
+	Function _mm_cmpge_ss:Float128(a:Float128,b:Float128)="_mm_cmpge_ss"
+	Function _mm_cmpgt_ps:Float128(a:Float128,b:Float128)="_mm_cmpgt_ps"
+	Function _mm_cmpgt_ss:Float128(a:Float128,b:Float128)="_mm_cmpgt_ss"
+	Function _mm_cmple_ps:Float128(a:Float128,b:Float128)="_mm_cmple_ps"
+	Function _mm_cmple_ss:Float128(a:Float128,b:Float128)="_mm_cmple_ss"
+	Function _mm_cmplt_ps:Float128(a:Float128,b:Float128)="_mm_cmplt_ps"
+	Function _mm_cmplt_ss:Float128(a:Float128,b:Float128)="_mm_cmplt_ss"
+	Function _mm_cmpneq_ps:Float128(a:Float128,b:Float128)="_mm_cmpneq_ps"
+	Function _mm_cmpneq_ss:Float128(a:Float128,b:Float128)="_mm_cmpneq_ss"
+	Function _mm_cmpnge_ps:Float128(a:Float128,b:Float128)="_mm_cmpnge_ps"
+	Function _mm_cmpnge_ss:Float128(a:Float128,b:Float128)="_mm_cmpnge_ss"
+	Function _mm_cmpngt_ps:Float128(a:Float128,b:Float128)="_mm_cmpngt_ps"
+	Function _mm_cmpngt_ss:Float128(a:Float128,b:Float128)="_mm_cmpngt_ss"
+	Function _mm_cmpnle_ps:Float128(a:Float128,b:Float128)="_mm_cmpnle_ps"
+	Function _mm_cmpnle_ss:Float128(a:Float128,b:Float128)="_mm_cmpnle_ss"
+	Function _mm_cmpnlt_ps:Float128(a:Float128,b:Float128)="_mm_cmpnlt_ps"
+	Function _mm_cmpnlt_ss:Float128(a:Float128,b:Float128)="_mm_cmpnlt_ss"
+	Function _mm_cmpord_ps:Float128(a:Float128,b:Float128)="_mm_cmpord_ps"
+	Function _mm_cmpord_ss:Float128(a:Float128,b:Float128)="_mm_cmpord_ss"
+	Function _mm_cmpunord_ps:Float128(a:Float128,b:Float128)="_mm_cmpunord_ps"
+	Function _mm_cmpunord_ss:Float128(a:Float128,b:Float128)="_mm_cmpunord_ss"
+	Function _mm_comieq_ss:Int(a:Float128,b:Float128)="_mm_comieq_ss"
+	Function _mm_comige_ss:Int(a:Float128,b:Float128)="_mm_comige_ss"
+	Function _mm_comigt_ss:Int(a:Float128,b:Float128)="_mm_comigt_ss"
+	Function _mm_comile_ss:Int(a:Float128,b:Float128)="_mm_comile_ss"
+	Function _mm_comilt_ss:Int(a:Float128,b:Float128)="_mm_comilt_ss"
+	Function _mm_comineq_ss:Int(a:Float128,b:Float128)="_mm_comineq_ss"
+	Function _mm_cvt_si2ss:Float128(a:Float128,b:Int)="_mm_cvt_si2ss"
+	Function _mm_cvt_ss2si:Int(a:Float128)="_mm_cvt_ss2si"
+	Function _mm_cvtsi32_ss:Float128(a:Float128,b:Int)="_mm_cvtsi32_ss"
+	Function _mm_cvtsi64_ss:Float128(a:Float128,b:Long)="_mm_cvtsi64_ss"
+	Function _mm_cvtss_f32:Float(a:Float128)="_mm_cvtss_f32"
+	Function _mm_cvtss_si32:Int(a:Float128)="_mm_cvtss_si32"
+	Function _mm_cvtss_si64:Long(a:Float128)="_mm_cvtss_si64"
+	Function _mm_cvtt_ss2si:Int(a:Float128)="_mm_cvtt_ss2si"
+	Function _mm_cvttss_si32:Int(a:Float128)="_mm_cvttss_si32"
+	Function _mm_cvttss_si64:Long(a:Float128)="_mm_cvttss_si64"
+	Function _mm_div_ps:Float128(a:Float128,b:Float128)="_mm_div_ps"
+	Function _mm_div_ss:Float128(a:Float128,b:Float128)="_mm_div_ss"
+	Function _MM_GET_EXCEPTION_MASK:UInt()="_MM_GET_EXCEPTION_MASK"
+	Function _MM_GET_EXCEPTION_STATE:UInt()="_MM_GET_EXCEPTION_STATE"
+	Function _MM_GET_FLUSH_ZERO_MODE:UInt()="_MM_GET_FLUSH_ZERO_MODE"
+	Function _MM_GET_ROUNDING_MODE:UInt()="_MM_GET_ROUNDING_MODE"
+	Function _mm_getcsr:UInt()="_mm_getcsr"
+	Function _mm_load_ps:Float128(mem_addr:Float Ptr)="_mm_load_ps"
+	Function _mm_load_ps1:Float128(mem_addr:Float Ptr)="_mm_load_ps1"
+	Function _mm_load_ss:Float128(mem_addr:Float Ptr)="_mm_load_ss"
+	Function _mm_load1_ps:Float128(mem_addr:Float Ptr)="_mm_load1_ps"
+	Function _mm_loadr_ps:Float128(mem_addr:Float Ptr)="_mm_loadr_ps"
+	Function _mm_loadu_ps:Float128(mem_addr:Float Ptr)="_mm_loadu_ps"
+	Function _mm_max_ps:Float128(a:Float128,b:Float128)="_mm_max_ps"
+	Function _mm_max_ss:Float128(a:Float128,b:Float128)="_mm_max_ss"
+	Function _mm_min_ps:Float128(a:Float128,b:Float128)="_mm_min_ps"
+	Function _mm_min_ss:Float128(a:Float128,b:Float128)="_mm_min_ss"
+	Function _mm_move_ss:Float128(a:Float128,b:Float128)="_mm_move_ss"
+	Function _mm_movehl_ps:Float128(a:Float128,b:Float128)="_mm_movehl_ps"
+	Function _mm_movelh_ps:Float128(a:Float128,b:Float128)="_mm_movelh_ps"
+	Function _mm_movemask_ps:Int(a:Float128)="_mm_movemask_ps"
+	Function _mm_mul_ps:Float128(a:Float128,b:Float128)="_mm_mul_ps"
+	Function _mm_mul_ss:Float128(a:Float128,b:Float128)="_mm_mul_ss"
+	Function _mm_or_ps:Float128(a:Float128,b:Float128)="_mm_or_ps"
+	Function _mm_prefetch(p:Byte Ptr,i:Int)="_mm_prefetch"
+	Function _mm_rcp_ps:Float128(a:Float128)="_mm_rcp_ps"
+	Function _mm_rcp_ss:Float128(a:Float128)="_mm_rcp_ss"
+	Function _mm_rsqrt_ps:Float128(a:Float128)="_mm_rsqrt_ps"
+	Function _mm_rsqrt_ss:Float128(a:Float128)="_mm_rsqrt_ss"
+	Function _MM_SET_EXCEPTION_MASK(a:UInt)="_MM_SET_EXCEPTION_MASK"
+	Function _MM_SET_EXCEPTION_STATE(a:UInt)="_MM_SET_EXCEPTION_STATE"
+	Function _MM_SET_FLUSH_ZERO_MODE(a:UInt)="_MM_SET_FLUSH_ZERO_MODE"
+	Function _mm_set_ps:Float128(e3:Float,e2:Float,e1:Float,e0:Float)="_mm_set_ps"
+	Function _mm_set_ps1:Float128(a:Float)="_mm_set_ps1"
+	Function _MM_SET_ROUNDING_MODE(a:UInt)="_MM_SET_ROUNDING_MODE"
+	Function _mm_set_ss:Float128(a:Float)="_mm_set_ss"
+	Function _mm_set1_ps:Float128(a:Float)="_mm_set1_ps"
+	Function _mm_setcsr(a:UInt)="_mm_setcsr"
+	Function _mm_setr_ps:Float128(e3:Float,e2:Float,e1:Float,e0:Float)="_mm_setr_ps"
+	Function _mm_setzero_ps:Float128()="_mm_setzero_ps"
+	Function _mm_sfence()="_mm_sfence"
+	Function _mm_sqrt_ps:Float128(a:Float128)="_mm_sqrt_ps"
+	Function _mm_sqrt_ss:Float128(a:Float128)="_mm_sqrt_ss"
+	Function _mm_store_ps(mem_addr:Float Ptr,a:Float128)="_mm_store_ps"
+	Function _mm_store_ps1(mem_addr:Float Ptr,a:Float128)="_mm_store_ps1"
+	Function _mm_store_ss(mem_addr:Float Ptr,a:Float128)="_mm_store_ss"
+	Function _mm_store1_ps(mem_addr:Float Ptr,a:Float128)="_mm_store1_ps"
+	Function _mm_storer_ps(mem_addr:Float Ptr,a:Float128)="_mm_storer_ps"
+	Function _mm_storeu_ps(mem_addr:Float Ptr,a:Float128)="_mm_storeu_ps"
+	Function _mm_stream_ps(mem_addr:Float Ptr,a:Float128)="_mm_stream_ps"
+	Function _mm_sub_ps:Float128(a:Float128,b:Float128)="_mm_sub_ps"
+	Function _mm_sub_ss:Float128(a:Float128,b:Float128)="_mm_sub_ss"
+	Function _mm_ucomieq_ss:Int(a:Float128,b:Float128)="_mm_ucomieq_ss"
+	Function _mm_ucomige_ss:Int(a:Float128,b:Float128)="_mm_ucomige_ss"
+	Function _mm_ucomigt_ss:Int(a:Float128,b:Float128)="_mm_ucomigt_ss"
+	Function _mm_ucomile_ss:Int(a:Float128,b:Float128)="_mm_ucomile_ss"
+	Function _mm_ucomilt_ss:Int(a:Float128,b:Float128)="_mm_ucomilt_ss"
+	Function _mm_ucomineq_ss:Int(a:Float128,b:Float128)="_mm_ucomineq_ss"
+	Function _mm_unpackhi_ps:Float128(a:Float128,b:Float128)="_mm_unpackhi_ps"
+	Function _mm_unpacklo_ps:Float128(a:Float128,b:Float128)="_mm_unpacklo_ps"
+	Function _mm_xor_ps:Float128(a:Float128,b:Float128)="_mm_xor_ps"
+EndExtern

+ 21 - 0
xmmintrin.mod/sse.x

@@ -0,0 +1,21 @@
+__m128 _mm_load_ps(float* )!
+__m128 _mm_load_ps1(float* )!
+__m128 _mm_load_ss(float* )!
+__m128 _mm_load1_ps(float* )!
+__m128 _mm_loadh_pi(__m128 ,__m64* )!
+__m128 _mm_loadl_pi(__m128 ,__m64* )!
+__m128 _mm_loadr_ps(float* )!
+__m128 _mm_loadu_ps(float* )!
+void _mm_maskmove_si64(__m64 ,__m64 ,char* )!
+void _m_maskmovq(__m64 ,__m64 ,char* )!
+void _mm_prefetch(char* ,int )!
+void _mm_store_ps(float* ,__m128 )!
+void _mm_store_ps1(float* ,__m128 )!
+void _mm_store_ss(float* ,__m128 )!
+void _mm_store1_ps(float* ,__m128 )!
+void _mm_storeh_pi(__m64* ,__m128 )!
+void _mm_storel_pi(__m64* ,__m128 )!
+void _mm_storer_ps(float* ,__m128 )!
+void _mm_storeu_ps(float* ,__m128 )!
+void _mm_stream_pi(__m64* ,__m64 )!
+void _mm_stream_ps(float* ,__m128 )!

+ 113 - 0
xmmintrin.mod/sse2.bmx

@@ -0,0 +1,113 @@
+SuperStrict
+Extern
+	Function _mm_add_epi16:Int128(a:Int128,b:Int128)="_mm_add_epi16"
+	Function _mm_add_epi32:Int128(a:Int128,b:Int128)="_mm_add_epi32"
+	Function _mm_add_epi64:Int128(a:Int128,b:Int128)="_mm_add_epi64"
+	Function _mm_add_epi8:Int128(a:Int128,b:Int128)="_mm_add_epi8"
+	Function _mm_adds_epi16:Int128(a:Int128,b:Int128)="_mm_adds_epi16"
+	Function _mm_adds_epi8:Int128(a:Int128,b:Int128)="_mm_adds_epi8"
+	Function _mm_adds_epu16:Int128(a:Int128,b:Int128)="_mm_adds_epu16"
+	Function _mm_adds_epu8:Int128(a:Int128,b:Int128)="_mm_adds_epu8"
+	Function _mm_and_si128:Int128(a:Int128,b:Int128)="_mm_and_si128"
+	Function _mm_andnot_si128:Int128(a:Int128,b:Int128)="_mm_andnot_si128"
+	Function _mm_avg_epu16:Int128(a:Int128,b:Int128)="_mm_avg_epu16"
+	Function _mm_avg_epu8:Int128(a:Int128,b:Int128)="_mm_avg_epu8"
+	Function _mm_bslli_si128:Int128(a:Int128,imm8:Int)="_mm_bslli_si128"
+	Function _mm_bsrli_si128:Int128(a:Int128,imm8:Int)="_mm_bsrli_si128"
+	Function _mm_castps_si128:Int128(a:Float128)="_mm_castps_si128"
+	Function _mm_castsi128_ps:Float128(a:Int128)="_mm_castsi128_ps"
+	Function _mm_clflush(p:Byte Ptr)="_mm_clflush"
+	Function _mm_cmpeq_epi16:Int128(a:Int128,b:Int128)="_mm_cmpeq_epi16"
+	Function _mm_cmpeq_epi32:Int128(a:Int128,b:Int128)="_mm_cmpeq_epi32"
+	Function _mm_cmpeq_epi8:Int128(a:Int128,b:Int128)="_mm_cmpeq_epi8"
+	Function _mm_cmpgt_epi16:Int128(a:Int128,b:Int128)="_mm_cmpgt_epi16"
+	Function _mm_cmpgt_epi32:Int128(a:Int128,b:Int128)="_mm_cmpgt_epi32"
+	Function _mm_cmpgt_epi8:Int128(a:Int128,b:Int128)="_mm_cmpgt_epi8"
+	Function _mm_cmplt_epi16:Int128(a:Int128,b:Int128)="_mm_cmplt_epi16"
+	Function _mm_cmplt_epi32:Int128(a:Int128,b:Int128)="_mm_cmplt_epi32"
+	Function _mm_cmplt_epi8:Int128(a:Int128,b:Int128)="_mm_cmplt_epi8"
+	Function _mm_cvtepi32_ps:Float128(a:Int128)="_mm_cvtepi32_ps"
+	Function _mm_cvtps_epi32:Int128(a:Float128)="_mm_cvtps_epi32"
+	Function _mm_cvtsi128_si32:Int(a:Int128)="_mm_cvtsi128_si32"
+	Function _mm_cvtsi128_si64:Long(a:Int128)="_mm_cvtsi128_si64"
+	Function _mm_cvtsi128_si64x:Long(a:Int128)="_mm_cvtsi128_si64x"
+	Function _mm_cvtsi32_si128:Int128(a:Int)="_mm_cvtsi32_si128"
+	Function _mm_cvtsi64_si128:Int128(a:Long)="_mm_cvtsi64_si128"
+	Function _mm_cvtsi64x_si128:Int128(a:Long)="_mm_cvtsi64x_si128"
+	Function _mm_cvttps_epi32:Int128(a:Float128)="_mm_cvttps_epi32"
+	Function _mm_extract_epi16:Int(a:Int128,imm8:Int)="_mm_extract_epi16"
+	Function _mm_insert_epi16:Int128(a:Int128,i:Int,imm8:Int)="_mm_insert_epi16"
+	Function _mm_lfence()="_mm_lfence"
+	Function _mm_load_si128:Int128(mem_addr:Int128 Ptr)="_mm_load_si128"
+	Function _mm_loadl_epi64:Int128(mem_addr:Int128 Ptr)="_mm_loadl_epi64"
+	Function _mm_loadu_si128:Int128(mem_addr:Int128 Ptr)="_mm_loadu_si128"
+	Function _mm_madd_epi16:Int128(a:Int128,b:Int128)="_mm_madd_epi16"
+	Function _mm_maskmoveu_si128(a:Int128,mask:Int128,mem_addr:Byte Ptr)="_mm_maskmoveu_si128"
+	Function _mm_max_epi16:Int128(a:Int128,b:Int128)="_mm_max_epi16"
+	Function _mm_max_epu8:Int128(a:Int128,b:Int128)="_mm_max_epu8"
+	Function _mm_mfence()="_mm_mfence"
+	Function _mm_min_epi16:Int128(a:Int128,b:Int128)="_mm_min_epi16"
+	Function _mm_min_epu8:Int128(a:Int128,b:Int128)="_mm_min_epu8"
+	Function _mm_move_epi64:Int128(a:Int128)="_mm_move_epi64"
+	Function _mm_movemask_epi8:Int(a:Int128)="_mm_movemask_epi8"
+	Function _mm_mul_epu32:Int128(a:Int128,b:Int128)="_mm_mul_epu32"
+	Function _mm_mulhi_epi16:Int128(a:Int128,b:Int128)="_mm_mulhi_epi16"
+	Function _mm_mulhi_epu16:Int128(a:Int128,b:Int128)="_mm_mulhi_epu16"
+	Function _mm_mullo_epi16:Int128(a:Int128,b:Int128)="_mm_mullo_epi16"
+	Function _mm_or_si128:Int128(a:Int128,b:Int128)="_mm_or_si128"
+	Function _mm_packs_epi16:Int128(a:Int128,b:Int128)="_mm_packs_epi16"
+	Function _mm_packs_epi32:Int128(a:Int128,b:Int128)="_mm_packs_epi32"
+	Function _mm_packus_epi16:Int128(a:Int128,b:Int128)="_mm_packus_epi16"
+	Function _mm_pause()="_mm_pause"
+	Function _mm_sad_epu8:Int128(a:Int128,b:Int128)="_mm_sad_epu8"
+	Function _mm_set_epi32:Int128(e3:Int,e2:Int,e1:Int,e0:Int)="_mm_set_epi32"
+	Function _mm_set_epi64x:Int128(e1:Long,e0:Long)="_mm_set_epi64x"
+	Function _mm_set1_epi32:Int128(a:Int)="_mm_set1_epi32"
+	Function _mm_set1_epi64x:Int128(a:Long)="_mm_set1_epi64x"
+	Function _mm_setr_epi32:Int128(e3:Int,e2:Int,e1:Int,e0:Int)="_mm_setr_epi32"
+	Function _mm_setzero_si128:Int128()="_mm_setzero_si128"
+	Function _mm_shuffle_epi32:Int128(a:Int128,imm8:Int)="_mm_shuffle_epi32"
+	Function _mm_shufflehi_epi16:Int128(a:Int128,imm8:Int)="_mm_shufflehi_epi16"
+	Function _mm_shufflelo_epi16:Int128(a:Int128,imm8:Int)="_mm_shufflelo_epi16"
+	Function _mm_sll_epi16:Int128(a:Int128,count:Int128)="_mm_sll_epi16"
+	Function _mm_sll_epi32:Int128(a:Int128,count:Int128)="_mm_sll_epi32"
+	Function _mm_sll_epi64:Int128(a:Int128,count:Int128)="_mm_sll_epi64"
+	Function _mm_slli_epi16:Int128(a:Int128,imm8:Int)="_mm_slli_epi16"
+	Function _mm_slli_epi32:Int128(a:Int128,imm8:Int)="_mm_slli_epi32"
+	Function _mm_slli_epi64:Int128(a:Int128,imm8:Int)="_mm_slli_epi64"
+	Function _mm_slli_si128:Int128(a:Int128,imm8:Int)="_mm_slli_si128"
+	Function _mm_sra_epi16:Int128(a:Int128,count:Int128)="_mm_sra_epi16"
+	Function _mm_sra_epi32:Int128(a:Int128,count:Int128)="_mm_sra_epi32"
+	Function _mm_srai_epi16:Int128(a:Int128,imm8:Int)="_mm_srai_epi16"
+	Function _mm_srai_epi32:Int128(a:Int128,imm8:Int)="_mm_srai_epi32"
+	Function _mm_srl_epi16:Int128(a:Int128,count:Int128)="_mm_srl_epi16"
+	Function _mm_srl_epi32:Int128(a:Int128,count:Int128)="_mm_srl_epi32"
+	Function _mm_srl_epi64:Int128(a:Int128,count:Int128)="_mm_srl_epi64"
+	Function _mm_srli_epi16:Int128(a:Int128,imm8:Int)="_mm_srli_epi16"
+	Function _mm_srli_epi32:Int128(a:Int128,imm8:Int)="_mm_srli_epi32"
+	Function _mm_srli_epi64:Int128(a:Int128,imm8:Int)="_mm_srli_epi64"
+	Function _mm_srli_si128:Int128(a:Int128,imm8:Int)="_mm_srli_si128"
+	Function _mm_store_si128(mem_addr:Int128 Ptr,a:Int128)="_mm_store_si128"
+	Function _mm_storel_epi64(mem_addr:Int128 Ptr,a:Int128)="_mm_storel_epi64"
+	Function _mm_storeu_si128(mem_addr:Int128 Ptr,a:Int128)="_mm_storeu_si128"
+	Function _mm_stream_si128(mem_addr:Int128 Ptr,a:Int128)="_mm_stream_si128"
+	Function _mm_stream_si32(mem_addr:Int Ptr,a:Int)="_mm_stream_si32"
+	Function _mm_stream_si64(mem_addr:Long Ptr,a:Long)="_mm_stream_si64"
+	Function _mm_sub_epi16:Int128(a:Int128,b:Int128)="_mm_sub_epi16"
+	Function _mm_sub_epi32:Int128(a:Int128,b:Int128)="_mm_sub_epi32"
+	Function _mm_sub_epi64:Int128(a:Int128,b:Int128)="_mm_sub_epi64"
+	Function _mm_sub_epi8:Int128(a:Int128,b:Int128)="_mm_sub_epi8"
+	Function _mm_subs_epi16:Int128(a:Int128,b:Int128)="_mm_subs_epi16"
+	Function _mm_subs_epi8:Int128(a:Int128,b:Int128)="_mm_subs_epi8"
+	Function _mm_subs_epu16:Int128(a:Int128,b:Int128)="_mm_subs_epu16"
+	Function _mm_subs_epu8:Int128(a:Int128,b:Int128)="_mm_subs_epu8"
+	Function _mm_unpackhi_epi16:Int128(a:Int128,b:Int128)="_mm_unpackhi_epi16"
+	Function _mm_unpackhi_epi32:Int128(a:Int128,b:Int128)="_mm_unpackhi_epi32"
+	Function _mm_unpackhi_epi64:Int128(a:Int128,b:Int128)="_mm_unpackhi_epi64"
+	Function _mm_unpackhi_epi8:Int128(a:Int128,b:Int128)="_mm_unpackhi_epi8"
+	Function _mm_unpacklo_epi16:Int128(a:Int128,b:Int128)="_mm_unpacklo_epi16"
+	Function _mm_unpacklo_epi32:Int128(a:Int128,b:Int128)="_mm_unpacklo_epi32"
+	Function _mm_unpacklo_epi64:Int128(a:Int128,b:Int128)="_mm_unpacklo_epi64"
+	Function _mm_unpacklo_epi8:Int128(a:Int128,b:Int128)="_mm_unpacklo_epi8"
+	Function _mm_xor_si128:Int128(a:Int128,b:Int128)="_mm_xor_si128"
+EndExtern

+ 28 - 0
xmmintrin.mod/sse2.x

@@ -0,0 +1,28 @@
+void _mm_clflush(void* )!
+__m128d _mm_load_pd(double* )!
+__m128d _mm_load_pd1(double* )!
+__m128d _mm_load_sd(double* )!
+__m128i _mm_load_si128(__m128i* )!
+__m128d _mm_load1_pd(double* )!
+__m128d _mm_loadh_pd(__m128d ,double* )!
+__m128i _mm_loadl_epi64(__m128i* )!
+__m128d _mm_loadl_pd(__m128d ,double* )!
+__m128d _mm_loadr_pd(double* )!
+__m128d _mm_loadu_pd(double* )!
+__m128i _mm_loadu_si128(__m128i* )!
+void _mm_maskmoveu_si128(__m128i ,__m128i ,char* )!
+void _mm_store_pd(double* ,__m128d )!
+void _mm_store_pd1(double* ,__m128d )!
+void _mm_store_sd(double* ,__m128d )!
+void _mm_store_si128(__m128i* ,__m128i )!
+void _mm_store1_pd(double* ,__m128d )!
+void _mm_storeh_pd(double* ,__m128d )!
+void _mm_storel_epi64(__m128i* ,__m128i )!
+void _mm_storel_pd(double* ,__m128d )!
+void _mm_storer_pd(double* ,__m128d )!
+void _mm_storeu_pd(double* ,__m128d )!
+void _mm_storeu_si128(__m128i* ,__m128i )!
+void _mm_stream_pd(double* ,__m128d )!
+void _mm_stream_si128(__m128i* ,__m128i )!
+void _mm_stream_si32(int* ,int )!
+void _mm_stream_si64(__int64* ,__int64 )!

+ 9 - 0
xmmintrin.mod/sse3.bmx

@@ -0,0 +1,9 @@
+SuperStrict
+Extern
+	Function _mm_addsub_ps:Float128(a:Float128,b:Float128)="_mm_addsub_ps"
+	Function _mm_hadd_ps:Float128(a:Float128,b:Float128)="_mm_hadd_ps"
+	Function _mm_hsub_ps:Float128(a:Float128,b:Float128)="_mm_hsub_ps"
+	Function _mm_lddqu_si128:Int128(mem_addr:Int128 Ptr)="_mm_lddqu_si128"
+	Function _mm_movehdup_ps:Float128(a:Float128)="_mm_movehdup_ps"
+	Function _mm_moveldup_ps:Float128(a:Float128)="_mm_moveldup_ps"
+EndExtern

+ 2 - 0
xmmintrin.mod/sse3.x

@@ -0,0 +1,2 @@
+__m128i _mm_lddqu_si128(__m128i* )!
+__m128d _mm_loaddup_pd(double* )!

+ 48 - 0
xmmintrin.mod/sse41.bmx

@@ -0,0 +1,48 @@
+SuperStrict
+Extern
+	Function _mm_blend_epi16:Int128(a:Int128,b:Int128,imm8:Int)="_mm_blend_epi16"
+	Function _mm_blend_ps:Float128(a:Float128,b:Float128,imm8:Int)="_mm_blend_ps"
+	Function _mm_blendv_epi8:Int128(a:Int128,b:Int128,mask:Int128)="_mm_blendv_epi8"
+	Function _mm_blendv_ps:Float128(a:Float128,b:Float128,mask:Float128)="_mm_blendv_ps"
+	Function _mm_cmpeq_epi64:Int128(a:Int128,b:Int128)="_mm_cmpeq_epi64"
+	Function _mm_cvtepi16_epi32:Int128(a:Int128)="_mm_cvtepi16_epi32"
+	Function _mm_cvtepi16_epi64:Int128(a:Int128)="_mm_cvtepi16_epi64"
+	Function _mm_cvtepi32_epi64:Int128(a:Int128)="_mm_cvtepi32_epi64"
+	Function _mm_cvtepi8_epi16:Int128(a:Int128)="_mm_cvtepi8_epi16"
+	Function _mm_cvtepi8_epi32:Int128(a:Int128)="_mm_cvtepi8_epi32"
+	Function _mm_cvtepi8_epi64:Int128(a:Int128)="_mm_cvtepi8_epi64"
+	Function _mm_cvtepu16_epi32:Int128(a:Int128)="_mm_cvtepu16_epi32"
+	Function _mm_cvtepu16_epi64:Int128(a:Int128)="_mm_cvtepu16_epi64"
+	Function _mm_cvtepu32_epi64:Int128(a:Int128)="_mm_cvtepu32_epi64"
+	Function _mm_cvtepu8_epi16:Int128(a:Int128)="_mm_cvtepu8_epi16"
+	Function _mm_cvtepu8_epi32:Int128(a:Int128)="_mm_cvtepu8_epi32"
+	Function _mm_cvtepu8_epi64:Int128(a:Int128)="_mm_cvtepu8_epi64"
+	Function _mm_dp_ps:Float128(a:Float128,b:Float128,imm8:Int)="_mm_dp_ps"
+	Function _mm_extract_epi32:Int(a:Int128,imm8:Int)="_mm_extract_epi32"
+	Function _mm_extract_epi64:Long(a:Int128,imm8:Int)="_mm_extract_epi64"
+	Function _mm_extract_epi8:Int(a:Int128,imm8:Int)="_mm_extract_epi8"
+	Function _mm_extract_ps:Int(a:Float128,imm8:Int)="_mm_extract_ps"
+	Function _mm_insert_epi32:Int128(a:Int128,i:Int,imm8:Int)="_mm_insert_epi32"
+	Function _mm_insert_epi64:Int128(a:Int128,i:Long,imm8:Int)="_mm_insert_epi64"
+	Function _mm_insert_epi8:Int128(a:Int128,i:Int,imm8:Int)="_mm_insert_epi8"
+	Function _mm_insert_ps:Float128(a:Float128,b:Float128,imm8:Int)="_mm_insert_ps"
+	Function _mm_max_epi32:Int128(a:Int128,b:Int128)="_mm_max_epi32"
+	Function _mm_max_epi8:Int128(a:Int128,b:Int128)="_mm_max_epi8"
+	Function _mm_max_epu16:Int128(a:Int128,b:Int128)="_mm_max_epu16"
+	Function _mm_max_epu32:Int128(a:Int128,b:Int128)="_mm_max_epu32"
+	Function _mm_min_epi32:Int128(a:Int128,b:Int128)="_mm_min_epi32"
+	Function _mm_min_epi8:Int128(a:Int128,b:Int128)="_mm_min_epi8"
+	Function _mm_min_epu16:Int128(a:Int128,b:Int128)="_mm_min_epu16"
+	Function _mm_min_epu32:Int128(a:Int128,b:Int128)="_mm_min_epu32"
+	Function _mm_minpos_epu16:Int128(a:Int128)="_mm_minpos_epu16"
+	Function _mm_mpsadbw_epu8:Int128(a:Int128,b:Int128,imm8:Int)="_mm_mpsadbw_epu8"
+	Function _mm_mul_epi32:Int128(a:Int128,b:Int128)="_mm_mul_epi32"
+	Function _mm_mullo_epi32:Int128(a:Int128,b:Int128)="_mm_mullo_epi32"
+	Function _mm_packus_epi32:Int128(a:Int128,b:Int128)="_mm_packus_epi32"
+	Function _mm_round_ps:Float128(a:Float128,rounding:Int)="_mm_round_ps"
+	Function _mm_round_ss:Float128(a:Float128,b:Float128,rounding:Int)="_mm_round_ss"
+	Function _mm_stream_load_si128:Int128(mem_addr:Int128 Ptr)="_mm_stream_load_si128"
+	Function _mm_testc_si128:Int(a:Int128,b:Int128)="_mm_testc_si128"
+	Function _mm_testnzc_si128:Int(a:Int128,b:Int128)="_mm_testnzc_si128"
+	Function _mm_testz_si128:Int(a:Int128,b:Int128)="_mm_testz_si128"
+EndExtern

+ 1 - 0
xmmintrin.mod/sse41.x

@@ -0,0 +1 @@
+__m128i _mm_stream_load_si128(__m128i* )!

+ 20 - 0
xmmintrin.mod/sse42.bmx

@@ -0,0 +1,20 @@
+SuperStrict
+Extern
+	Function _mm_cmpestra:Int(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestra"
+	Function _mm_cmpestrc:Int(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestrc"
+	Function _mm_cmpestri:Int(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestri"
+	Function _mm_cmpestrm:Int128(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestrm"
+	Function _mm_cmpestro:Int(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestro"
+	Function _mm_cmpestrs:Int(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestrs"
+	Function _mm_cmpestrz:Int(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestrz"
+	Function _mm_cmpgt_epi64:Int128(a:Int128,b:Int128)="_mm_cmpgt_epi64"
+	Function _mm_cmpistra:Int(a:Int128,b:Int128,imm8:Int)="_mm_cmpistra"
+	Function _mm_cmpistrc:Int(a:Int128,b:Int128,imm8:Int)="_mm_cmpistrc"
+	Function _mm_cmpistri:Int(a:Int128,b:Int128,imm8:Int)="_mm_cmpistri"
+	Function _mm_cmpistrm:Int128(a:Int128,b:Int128,imm8:Int)="_mm_cmpistrm"
+	Function _mm_cmpistro:Int(a:Int128,b:Int128,imm8:Int)="_mm_cmpistro"
+	Function _mm_cmpistrs:Int(a:Int128,b:Int128,imm8:Int)="_mm_cmpistrs"
+	Function _mm_cmpistrz:Int(a:Int128,b:Int128,imm8:Int)="_mm_cmpistrz"
+	Function _mm_crc32_u32:UInt(crc:UInt,v:UInt)="_mm_crc32_u32"
+	Function _mm_crc32_u64:ULong(crc:ULong,v:ULong)="_mm_crc32_u64"
+EndExtern

+ 0 - 0
xmmintrin.mod/sse42.x


+ 19 - 0
xmmintrin.mod/ssse3.bmx

@@ -0,0 +1,19 @@
+SuperStrict
+Extern
+	Function _mm_abs_epi16:Int128(a:Int128)="_mm_abs_epi16"
+	Function _mm_abs_epi32:Int128(a:Int128)="_mm_abs_epi32"
+	Function _mm_abs_epi8:Int128(a:Int128)="_mm_abs_epi8"
+	Function _mm_alignr_epi8:Int128(a:Int128,b:Int128,count:Int)="_mm_alignr_epi8"
+	Function _mm_hadd_epi16:Int128(a:Int128,b:Int128)="_mm_hadd_epi16"
+	Function _mm_hadd_epi32:Int128(a:Int128,b:Int128)="_mm_hadd_epi32"
+	Function _mm_hadds_epi16:Int128(a:Int128,b:Int128)="_mm_hadds_epi16"
+	Function _mm_hsub_epi16:Int128(a:Int128,b:Int128)="_mm_hsub_epi16"
+	Function _mm_hsub_epi32:Int128(a:Int128,b:Int128)="_mm_hsub_epi32"
+	Function _mm_hsubs_epi16:Int128(a:Int128,b:Int128)="_mm_hsubs_epi16"
+	Function _mm_maddubs_epi16:Int128(a:Int128,b:Int128)="_mm_maddubs_epi16"
+	Function _mm_mulhrs_epi16:Int128(a:Int128,b:Int128)="_mm_mulhrs_epi16"
+	Function _mm_shuffle_epi8:Int128(a:Int128,b:Int128)="_mm_shuffle_epi8"
+	Function _mm_sign_epi16:Int128(a:Int128,b:Int128)="_mm_sign_epi16"
+	Function _mm_sign_epi32:Int128(a:Int128,b:Int128)="_mm_sign_epi32"
+	Function _mm_sign_epi8:Int128(a:Int128,b:Int128)="_mm_sign_epi8"
+EndExtern

+ 0 - 0
xmmintrin.mod/ssse3.x


+ 33 - 0
xmmintrin.mod/xmmintrin.bmx

@@ -0,0 +1,33 @@
+'  Copyright (C) 2016 David JJ Camp
+'
+'  This software is provided 'as-is', without any express or implied
+'  warranty.  In no event will the authors be held liable for any damages
+'  arising from the use of this software.
+'
+'  Permission is granted to anyone to use this software for any purpose,
+'  including commercial applications, and to alter it and redistribute it
+'  freely, subject to the following restrictions:
+'
+'  1. The origin of this software must not be misrepresented; you must not
+'     claim that you wrote the original software. If you use this software
+'     in a product, an acknowledgment in the product documentation would be
+'     appreciated but is not required.
+'  2. Altered source versions must be plainly marked as such, and must not be
+'     misrepresented as being the original software.
+'  3. This notice may not be removed or altered from any source distribution.
+
+SuperStrict
+
+Module pub.xmmintrin
+
+ModuleInfo "Version: 1.00"
+ModuleInfo "Author: David JJ Camp"
+ModuleInfo "License: zlib/libpng"
+ModuleInfo "Copyright: David JJ Camp"
+
+Import "sse.bmx"
+Import "sse2.bmx"
+Import "sse3.bmx"
+Import "ssse3.bmx"
+Import "sse41.bmx"
+Import "sse42.bmx"