Browse Source

Updated to include Double128, Float64.
Made module x64 only.
Added module code generator.

woollybah 9 years ago
parent
commit
71934fc0c1

+ 171 - 108
xmmintrin.mod/sse.bmx

@@ -1,108 +1,171 @@
-SuperStrict
-Extern
-	Function _mm_add_ps:Float128(a:Float128,b:Float128)="_mm_add_ps"
-	Function _mm_add_ss:Float128(a:Float128,b:Float128)="_mm_add_ss"
-	Function _mm_and_ps:Float128(a:Float128,b:Float128)="_mm_and_ps"
-	Function _mm_andnot_ps:Float128(a:Float128,b:Float128)="_mm_andnot_ps"
-	Function _mm_cmpeq_ps:Float128(a:Float128,b:Float128)="_mm_cmpeq_ps"
-	Function _mm_cmpeq_ss:Float128(a:Float128,b:Float128)="_mm_cmpeq_ss"
-	Function _mm_cmpge_ps:Float128(a:Float128,b:Float128)="_mm_cmpge_ps"
-	Function _mm_cmpge_ss:Float128(a:Float128,b:Float128)="_mm_cmpge_ss"
-	Function _mm_cmpgt_ps:Float128(a:Float128,b:Float128)="_mm_cmpgt_ps"
-	Function _mm_cmpgt_ss:Float128(a:Float128,b:Float128)="_mm_cmpgt_ss"
-	Function _mm_cmple_ps:Float128(a:Float128,b:Float128)="_mm_cmple_ps"
-	Function _mm_cmple_ss:Float128(a:Float128,b:Float128)="_mm_cmple_ss"
-	Function _mm_cmplt_ps:Float128(a:Float128,b:Float128)="_mm_cmplt_ps"
-	Function _mm_cmplt_ss:Float128(a:Float128,b:Float128)="_mm_cmplt_ss"
-	Function _mm_cmpneq_ps:Float128(a:Float128,b:Float128)="_mm_cmpneq_ps"
-	Function _mm_cmpneq_ss:Float128(a:Float128,b:Float128)="_mm_cmpneq_ss"
-	Function _mm_cmpnge_ps:Float128(a:Float128,b:Float128)="_mm_cmpnge_ps"
-	Function _mm_cmpnge_ss:Float128(a:Float128,b:Float128)="_mm_cmpnge_ss"
-	Function _mm_cmpngt_ps:Float128(a:Float128,b:Float128)="_mm_cmpngt_ps"
-	Function _mm_cmpngt_ss:Float128(a:Float128,b:Float128)="_mm_cmpngt_ss"
-	Function _mm_cmpnle_ps:Float128(a:Float128,b:Float128)="_mm_cmpnle_ps"
-	Function _mm_cmpnle_ss:Float128(a:Float128,b:Float128)="_mm_cmpnle_ss"
-	Function _mm_cmpnlt_ps:Float128(a:Float128,b:Float128)="_mm_cmpnlt_ps"
-	Function _mm_cmpnlt_ss:Float128(a:Float128,b:Float128)="_mm_cmpnlt_ss"
-	Function _mm_cmpord_ps:Float128(a:Float128,b:Float128)="_mm_cmpord_ps"
-	Function _mm_cmpord_ss:Float128(a:Float128,b:Float128)="_mm_cmpord_ss"
-	Function _mm_cmpunord_ps:Float128(a:Float128,b:Float128)="_mm_cmpunord_ps"
-	Function _mm_cmpunord_ss:Float128(a:Float128,b:Float128)="_mm_cmpunord_ss"
-	Function _mm_comieq_ss:Int(a:Float128,b:Float128)="_mm_comieq_ss"
-	Function _mm_comige_ss:Int(a:Float128,b:Float128)="_mm_comige_ss"
-	Function _mm_comigt_ss:Int(a:Float128,b:Float128)="_mm_comigt_ss"
-	Function _mm_comile_ss:Int(a:Float128,b:Float128)="_mm_comile_ss"
-	Function _mm_comilt_ss:Int(a:Float128,b:Float128)="_mm_comilt_ss"
-	Function _mm_comineq_ss:Int(a:Float128,b:Float128)="_mm_comineq_ss"
-	Function _mm_cvt_si2ss:Float128(a:Float128,b:Int)="_mm_cvt_si2ss"
-	Function _mm_cvt_ss2si:Int(a:Float128)="_mm_cvt_ss2si"
-	Function _mm_cvtsi32_ss:Float128(a:Float128,b:Int)="_mm_cvtsi32_ss"
-	Function _mm_cvtsi64_ss:Float128(a:Float128,b:Long)="_mm_cvtsi64_ss"
-	Function _mm_cvtss_f32:Float(a:Float128)="_mm_cvtss_f32"
-	Function _mm_cvtss_si32:Int(a:Float128)="_mm_cvtss_si32"
-	Function _mm_cvtss_si64:Long(a:Float128)="_mm_cvtss_si64"
-	Function _mm_cvtt_ss2si:Int(a:Float128)="_mm_cvtt_ss2si"
-	Function _mm_cvttss_si32:Int(a:Float128)="_mm_cvttss_si32"
-	Function _mm_cvttss_si64:Long(a:Float128)="_mm_cvttss_si64"
-	Function _mm_div_ps:Float128(a:Float128,b:Float128)="_mm_div_ps"
-	Function _mm_div_ss:Float128(a:Float128,b:Float128)="_mm_div_ss"
-	Function _MM_GET_EXCEPTION_MASK:UInt()="_MM_GET_EXCEPTION_MASK"
-	Function _MM_GET_EXCEPTION_STATE:UInt()="_MM_GET_EXCEPTION_STATE"
-	Function _MM_GET_FLUSH_ZERO_MODE:UInt()="_MM_GET_FLUSH_ZERO_MODE"
-	Function _MM_GET_ROUNDING_MODE:UInt()="_MM_GET_ROUNDING_MODE"
-	Function _mm_getcsr:UInt()="_mm_getcsr"
-	Function _mm_load_ps:Float128(mem_addr:Float Ptr)="_mm_load_ps"
-	Function _mm_load_ps1:Float128(mem_addr:Float Ptr)="_mm_load_ps1"
-	Function _mm_load_ss:Float128(mem_addr:Float Ptr)="_mm_load_ss"
-	Function _mm_load1_ps:Float128(mem_addr:Float Ptr)="_mm_load1_ps"
-	Function _mm_loadr_ps:Float128(mem_addr:Float Ptr)="_mm_loadr_ps"
-	Function _mm_loadu_ps:Float128(mem_addr:Float Ptr)="_mm_loadu_ps"
-	Function _mm_max_ps:Float128(a:Float128,b:Float128)="_mm_max_ps"
-	Function _mm_max_ss:Float128(a:Float128,b:Float128)="_mm_max_ss"
-	Function _mm_min_ps:Float128(a:Float128,b:Float128)="_mm_min_ps"
-	Function _mm_min_ss:Float128(a:Float128,b:Float128)="_mm_min_ss"
-	Function _mm_move_ss:Float128(a:Float128,b:Float128)="_mm_move_ss"
-	Function _mm_movehl_ps:Float128(a:Float128,b:Float128)="_mm_movehl_ps"
-	Function _mm_movelh_ps:Float128(a:Float128,b:Float128)="_mm_movelh_ps"
-	Function _mm_movemask_ps:Int(a:Float128)="_mm_movemask_ps"
-	Function _mm_mul_ps:Float128(a:Float128,b:Float128)="_mm_mul_ps"
-	Function _mm_mul_ss:Float128(a:Float128,b:Float128)="_mm_mul_ss"
-	Function _mm_or_ps:Float128(a:Float128,b:Float128)="_mm_or_ps"
-	Function _mm_prefetch(p:Byte Ptr,i:Int)="_mm_prefetch"
-	Function _mm_rcp_ps:Float128(a:Float128)="_mm_rcp_ps"
-	Function _mm_rcp_ss:Float128(a:Float128)="_mm_rcp_ss"
-	Function _mm_rsqrt_ps:Float128(a:Float128)="_mm_rsqrt_ps"
-	Function _mm_rsqrt_ss:Float128(a:Float128)="_mm_rsqrt_ss"
-	Function _MM_SET_EXCEPTION_MASK(a:UInt)="_MM_SET_EXCEPTION_MASK"
-	Function _MM_SET_EXCEPTION_STATE(a:UInt)="_MM_SET_EXCEPTION_STATE"
-	Function _MM_SET_FLUSH_ZERO_MODE(a:UInt)="_MM_SET_FLUSH_ZERO_MODE"
-	Function _mm_set_ps:Float128(e3:Float,e2:Float,e1:Float,e0:Float)="_mm_set_ps"
-	Function _mm_set_ps1:Float128(a:Float)="_mm_set_ps1"
-	Function _MM_SET_ROUNDING_MODE(a:UInt)="_MM_SET_ROUNDING_MODE"
-	Function _mm_set_ss:Float128(a:Float)="_mm_set_ss"
-	Function _mm_set1_ps:Float128(a:Float)="_mm_set1_ps"
-	Function _mm_setcsr(a:UInt)="_mm_setcsr"
-	Function _mm_setr_ps:Float128(e3:Float,e2:Float,e1:Float,e0:Float)="_mm_setr_ps"
-	Function _mm_setzero_ps:Float128()="_mm_setzero_ps"
-	Function _mm_sfence()="_mm_sfence"
-	Function _mm_sqrt_ps:Float128(a:Float128)="_mm_sqrt_ps"
-	Function _mm_sqrt_ss:Float128(a:Float128)="_mm_sqrt_ss"
-	Function _mm_store_ps(mem_addr:Float Ptr,a:Float128)="_mm_store_ps"
-	Function _mm_store_ps1(mem_addr:Float Ptr,a:Float128)="_mm_store_ps1"
-	Function _mm_store_ss(mem_addr:Float Ptr,a:Float128)="_mm_store_ss"
-	Function _mm_store1_ps(mem_addr:Float Ptr,a:Float128)="_mm_store1_ps"
-	Function _mm_storer_ps(mem_addr:Float Ptr,a:Float128)="_mm_storer_ps"
-	Function _mm_storeu_ps(mem_addr:Float Ptr,a:Float128)="_mm_storeu_ps"
-	Function _mm_stream_ps(mem_addr:Float Ptr,a:Float128)="_mm_stream_ps"
-	Function _mm_sub_ps:Float128(a:Float128,b:Float128)="_mm_sub_ps"
-	Function _mm_sub_ss:Float128(a:Float128,b:Float128)="_mm_sub_ss"
-	Function _mm_ucomieq_ss:Int(a:Float128,b:Float128)="_mm_ucomieq_ss"
-	Function _mm_ucomige_ss:Int(a:Float128,b:Float128)="_mm_ucomige_ss"
-	Function _mm_ucomigt_ss:Int(a:Float128,b:Float128)="_mm_ucomigt_ss"
-	Function _mm_ucomile_ss:Int(a:Float128,b:Float128)="_mm_ucomile_ss"
-	Function _mm_ucomilt_ss:Int(a:Float128,b:Float128)="_mm_ucomilt_ss"
-	Function _mm_ucomineq_ss:Int(a:Float128,b:Float128)="_mm_ucomineq_ss"
-	Function _mm_unpackhi_ps:Float128(a:Float128,b:Float128)="_mm_unpackhi_ps"
-	Function _mm_unpacklo_ps:Float128(a:Float128,b:Float128)="_mm_unpacklo_ps"
-	Function _mm_xor_ps:Float128(a:Float128,b:Float128)="_mm_xor_ps"
-EndExtern
+' Copyright (c) 2016 David JJ Camp
+' 
+' This software is provided 'as-is', without any express or implied
+' warranty. In no event will the authors be held liable for any damages
+' arising from the use of this software.
+'
+' Permission is granted to anyone to use this software for any purpose,
+' including commercial applications, and to alter it and redistribute it
+' freely, subject to the following restrictions:
+'
+' 1. The origin of this software must not be misrepresented; you must not
+'    claim that you wrote the original software. If you use this software
+'    in a product, an acknowledgement in the product documentation would be
+'    appreciated but is not required.
+' 2. Altered source versions must be plainly marked as such, and must not be
+'    misrepresented as being the original software.
+' 3. This notice may not be removed or altered from any source distribution.
+'
+SuperStrict
+Extern
+	Function _mm_add_ps:Float128(a:Float128,b:Float128)="_mm_add_ps"
+	Function _mm_add_ss:Float128(a:Float128,b:Float128)="_mm_add_ss"
+	Function _mm_and_ps:Float128(a:Float128,b:Float128)="_mm_and_ps"
+	Function _mm_andnot_ps:Float128(a:Float128,b:Float128)="_mm_andnot_ps"
+	Function _mm_avg_pu16:Float64(a:Float64,b:Float64)="_mm_avg_pu16"
+	Function _mm_avg_pu8:Float64(a:Float64,b:Float64)="_mm_avg_pu8"
+	Function _mm_cmpeq_ps:Float128(a:Float128,b:Float128)="_mm_cmpeq_ps"
+	Function _mm_cmpeq_ss:Float128(a:Float128,b:Float128)="_mm_cmpeq_ss"
+	Function _mm_cmpge_ps:Float128(a:Float128,b:Float128)="_mm_cmpge_ps"
+	Function _mm_cmpge_ss:Float128(a:Float128,b:Float128)="_mm_cmpge_ss"
+	Function _mm_cmpgt_ps:Float128(a:Float128,b:Float128)="_mm_cmpgt_ps"
+	Function _mm_cmpgt_ss:Float128(a:Float128,b:Float128)="_mm_cmpgt_ss"
+	Function _mm_cmple_ps:Float128(a:Float128,b:Float128)="_mm_cmple_ps"
+	Function _mm_cmple_ss:Float128(a:Float128,b:Float128)="_mm_cmple_ss"
+	Function _mm_cmplt_ps:Float128(a:Float128,b:Float128)="_mm_cmplt_ps"
+	Function _mm_cmplt_ss:Float128(a:Float128,b:Float128)="_mm_cmplt_ss"
+	Function _mm_cmpneq_ps:Float128(a:Float128,b:Float128)="_mm_cmpneq_ps"
+	Function _mm_cmpneq_ss:Float128(a:Float128,b:Float128)="_mm_cmpneq_ss"
+	Function _mm_cmpnge_ps:Float128(a:Float128,b:Float128)="_mm_cmpnge_ps"
+	Function _mm_cmpnge_ss:Float128(a:Float128,b:Float128)="_mm_cmpnge_ss"
+	Function _mm_cmpngt_ps:Float128(a:Float128,b:Float128)="_mm_cmpngt_ps"
+	Function _mm_cmpngt_ss:Float128(a:Float128,b:Float128)="_mm_cmpngt_ss"
+	Function _mm_cmpnle_ps:Float128(a:Float128,b:Float128)="_mm_cmpnle_ps"
+	Function _mm_cmpnle_ss:Float128(a:Float128,b:Float128)="_mm_cmpnle_ss"
+	Function _mm_cmpnlt_ps:Float128(a:Float128,b:Float128)="_mm_cmpnlt_ps"
+	Function _mm_cmpnlt_ss:Float128(a:Float128,b:Float128)="_mm_cmpnlt_ss"
+	Function _mm_cmpord_ps:Float128(a:Float128,b:Float128)="_mm_cmpord_ps"
+	Function _mm_cmpord_ss:Float128(a:Float128,b:Float128)="_mm_cmpord_ss"
+	Function _mm_cmpunord_ps:Float128(a:Float128,b:Float128)="_mm_cmpunord_ps"
+	Function _mm_cmpunord_ss:Float128(a:Float128,b:Float128)="_mm_cmpunord_ss"
+	Function _mm_comieq_ss:Int(a:Float128,b:Float128)="_mm_comieq_ss"
+	Function _mm_comige_ss:Int(a:Float128,b:Float128)="_mm_comige_ss"
+	Function _mm_comigt_ss:Int(a:Float128,b:Float128)="_mm_comigt_ss"
+	Function _mm_comile_ss:Int(a:Float128,b:Float128)="_mm_comile_ss"
+	Function _mm_comilt_ss:Int(a:Float128,b:Float128)="_mm_comilt_ss"
+	Function _mm_comineq_ss:Int(a:Float128,b:Float128)="_mm_comineq_ss"
+	Function _mm_cvt_pi2ps:Float128(a:Float128,b:Float64)="_mm_cvt_pi2ps"
+	Function _mm_cvt_ps2pi:Float64(a:Float128)="_mm_cvt_ps2pi"
+	Function _mm_cvt_si2ss:Float128(a:Float128,b:Int)="_mm_cvt_si2ss"
+	Function _mm_cvt_ss2si:Int(a:Float128)="_mm_cvt_ss2si"
+	Function _mm_cvtpi16_ps:Float128(a:Float64)="_mm_cvtpi16_ps"
+	Function _mm_cvtpi32_ps:Float128(a:Float128,b:Float64)="_mm_cvtpi32_ps"
+	Function _mm_cvtpi32x2_ps:Float128(a:Float64,b:Float64)="_mm_cvtpi32x2_ps"
+	Function _mm_cvtpi8_ps:Float128(a:Float64)="_mm_cvtpi8_ps"
+	Function _mm_cvtps_pi16:Float64(a:Float128)="_mm_cvtps_pi16"
+	Function _mm_cvtps_pi32:Float64(a:Float128)="_mm_cvtps_pi32"
+	Function _mm_cvtps_pi8:Float64(a:Float128)="_mm_cvtps_pi8"
+	Function _mm_cvtpu16_ps:Float128(a:Float64)="_mm_cvtpu16_ps"
+	Function _mm_cvtpu8_ps:Float128(a:Float64)="_mm_cvtpu8_ps"
+	Function _mm_cvtsi32_ss:Float128(a:Float128,b:Int)="_mm_cvtsi32_ss"
+	Function _mm_cvtsi64_ss:Float128(a:Float128,b:Long)="_mm_cvtsi64_ss"
+	Function _mm_cvtss_f32:Float(a:Float128)="_mm_cvtss_f32"
+	Function _mm_cvtss_si32:Int(a:Float128)="_mm_cvtss_si32"
+	Function _mm_cvtss_si64:Long(a:Float128)="_mm_cvtss_si64"
+	Function _mm_cvtt_ps2pi:Float64(a:Float128)="_mm_cvtt_ps2pi"
+	Function _mm_cvtt_ss2si:Int(a:Float128)="_mm_cvtt_ss2si"
+	Function _mm_cvttps_pi32:Float64(a:Float128)="_mm_cvttps_pi32"
+	Function _mm_cvttss_si32:Int(a:Float128)="_mm_cvttss_si32"
+	Function _mm_cvttss_si64:Long(a:Float128)="_mm_cvttss_si64"
+	Function _mm_div_ps:Float128(a:Float128,b:Float128)="_mm_div_ps"
+	Function _mm_div_ss:Float128(a:Float128,b:Float128)="_mm_div_ss"
+	Function _mm_extract_pi16:Int(a:Float64,imm8:Int)="_mm_extract_pi16"
+	Function _MM_GET_EXCEPTION_MASK:UInt()="_MM_GET_EXCEPTION_MASK"
+	Function _MM_GET_EXCEPTION_STATE:UInt()="_MM_GET_EXCEPTION_STATE"
+	Function _MM_GET_FLUSH_ZERO_MODE:UInt()="_MM_GET_FLUSH_ZERO_MODE"
+	Function _MM_GET_ROUNDING_MODE:UInt()="_MM_GET_ROUNDING_MODE"
+	Function _mm_getcsr:UInt()="_mm_getcsr"
+	Function _mm_insert_pi16:Float64(a:Float64,i:Int,imm8:Int)="_mm_insert_pi16"
+	Function _mm_load_ps:Float128(mem_addr:Float Ptr)="_mm_load_ps"
+	Function _mm_load_ps1:Float128(mem_addr:Float Ptr)="_mm_load_ps1"
+	Function _mm_load_ss:Float128(mem_addr:Float Ptr)="_mm_load_ss"
+	Function _mm_load1_ps:Float128(mem_addr:Float Ptr)="_mm_load1_ps"
+	Function _mm_loadh_pi:Float128(a:Float128,mem_addr:Float64 Ptr)="_mm_loadh_pi"
+	Function _mm_loadl_pi:Float128(a:Float128,mem_addr:Float64 Ptr)="_mm_loadl_pi"
+	Function _mm_loadr_ps:Float128(mem_addr:Float Ptr)="_mm_loadr_ps"
+	Function _mm_loadu_ps:Float128(mem_addr:Float Ptr)="_mm_loadu_ps"
+	Function _mm_maskmove_si64(a:Float64,mask:Float64,mem_addr:Byte Ptr)="_mm_maskmove_si64"
+	Function _m_maskmovq(a:Float64,mask:Float64,mem_addr:Byte Ptr)="_m_maskmovq"
+	Function _mm_max_pi16:Float64(a:Float64,b:Float64)="_mm_max_pi16"
+	Function _mm_max_ps:Float128(a:Float128,b:Float128)="_mm_max_ps"
+	Function _mm_max_pu8:Float64(a:Float64,b:Float64)="_mm_max_pu8"
+	Function _mm_max_ss:Float128(a:Float128,b:Float128)="_mm_max_ss"
+	Function _mm_min_pi16:Float64(a:Float64,b:Float64)="_mm_min_pi16"
+	Function _mm_min_ps:Float128(a:Float128,b:Float128)="_mm_min_ps"
+	Function _mm_min_pu8:Float64(a:Float64,b:Float64)="_mm_min_pu8"
+	Function _mm_min_ss:Float128(a:Float128,b:Float128)="_mm_min_ss"
+	Function _mm_move_ss:Float128(a:Float128,b:Float128)="_mm_move_ss"
+	Function _mm_movehl_ps:Float128(a:Float128,b:Float128)="_mm_movehl_ps"
+	Function _mm_movelh_ps:Float128(a:Float128,b:Float128)="_mm_movelh_ps"
+	Function _mm_movemask_pi8:Int(a:Float64)="_mm_movemask_pi8"
+	Function _mm_movemask_ps:Int(a:Float128)="_mm_movemask_ps"
+	Function _mm_mul_ps:Float128(a:Float128,b:Float128)="_mm_mul_ps"
+	Function _mm_mul_ss:Float128(a:Float128,b:Float128)="_mm_mul_ss"
+	Function _mm_mulhi_pu16:Float64(a:Float64,b:Float64)="_mm_mulhi_pu16"
+	Function _mm_or_ps:Float128(a:Float128,b:Float128)="_mm_or_ps"
+	Function _m_pavgb:Float64(a:Float64,b:Float64)="_m_pavgb"
+	Function _m_pavgw:Float64(a:Float64,b:Float64)="_m_pavgw"
+	Function _m_pextrw:Int(a:Float64,imm8:Int)="_m_pextrw"
+	Function _m_pinsrw:Float64(a:Float64,i:Int,imm8:Int)="_m_pinsrw"
+	Function _m_pmaxsw:Float64(a:Float64,b:Float64)="_m_pmaxsw"
+	Function _m_pmaxub:Float64(a:Float64,b:Float64)="_m_pmaxub"
+	Function _m_pminsw:Float64(a:Float64,b:Float64)="_m_pminsw"
+	Function _m_pminub:Float64(a:Float64,b:Float64)="_m_pminub"
+	Function _m_pmovmskb:Int(a:Float64)="_m_pmovmskb"
+	Function _m_pmulhuw:Float64(a:Float64,b:Float64)="_m_pmulhuw"
+	Function _mm_prefetch(p:Byte Ptr,i:Int)="_mm_prefetch"
+	Function _m_psadbw:Float64(a:Float64,b:Float64)="_m_psadbw"
+	Function _m_pshufw:Float64(a:Float64,imm8:Int)="_m_pshufw"
+	Function _mm_rcp_ps:Float128(a:Float128)="_mm_rcp_ps"
+	Function _mm_rcp_ss:Float128(a:Float128)="_mm_rcp_ss"
+	Function _mm_rsqrt_ps:Float128(a:Float128)="_mm_rsqrt_ps"
+	Function _mm_rsqrt_ss:Float128(a:Float128)="_mm_rsqrt_ss"
+	Function _mm_sad_pu8:Float64(a:Float64,b:Float64)="_mm_sad_pu8"
+	Function _MM_SET_EXCEPTION_MASK(a:UInt)="_MM_SET_EXCEPTION_MASK"
+	Function _MM_SET_EXCEPTION_STATE(a:UInt)="_MM_SET_EXCEPTION_STATE"
+	Function _MM_SET_FLUSH_ZERO_MODE(a:UInt)="_MM_SET_FLUSH_ZERO_MODE"
+	Function _mm_set_ps:Float128(e3:Float,e2:Float,e1:Float,e0:Float)="_mm_set_ps"
+	Function _mm_set_ps1:Float128(a:Float)="_mm_set_ps1"
+	Function _MM_SET_ROUNDING_MODE(a:UInt)="_MM_SET_ROUNDING_MODE"
+	Function _mm_set_ss:Float128(a:Float)="_mm_set_ss"
+	Function _mm_set1_ps:Float128(a:Float)="_mm_set1_ps"
+	Function _mm_setcsr(a:UInt)="_mm_setcsr"
+	Function _mm_setr_ps:Float128(e3:Float,e2:Float,e1:Float,e0:Float)="_mm_setr_ps"
+	Function _mm_setzero_ps:Float128()="_mm_setzero_ps"
+	Function _mm_sfence()="_mm_sfence"
+	Function _mm_shuffle_pi16:Float64(a:Float64,imm8:Int)="_mm_shuffle_pi16"
+	Function _mm_shuffle_ps:Float128(a:Float128,b:Float128,imm8:UInt)="_mm_shuffle_ps"
+	Function _mm_sqrt_ps:Float128(a:Float128)="_mm_sqrt_ps"
+	Function _mm_sqrt_ss:Float128(a:Float128)="_mm_sqrt_ss"
+	Function _mm_store_ps(mem_addr:Float Ptr,a:Float128)="_mm_store_ps"
+	Function _mm_store_ps1(mem_addr:Float Ptr,a:Float128)="_mm_store_ps1"
+	Function _mm_store_ss(mem_addr:Float Ptr,a:Float128)="_mm_store_ss"
+	Function _mm_store1_ps(mem_addr:Float Ptr,a:Float128)="_mm_store1_ps"
+	Function _mm_storeh_pi(mem_addr:Float64 Ptr,a:Float128)="_mm_storeh_pi"
+	Function _mm_storel_pi(mem_addr:Float64 Ptr,a:Float128)="_mm_storel_pi"
+	Function _mm_storer_ps(mem_addr:Float Ptr,a:Float128)="_mm_storer_ps"
+	Function _mm_storeu_ps(mem_addr:Float Ptr,a:Float128)="_mm_storeu_ps"
+	Function _mm_stream_pi(mem_addr:Float64 Ptr,a:Float64)="_mm_stream_pi"
+	Function _mm_stream_ps(mem_addr:Float Ptr,a:Float128)="_mm_stream_ps"
+	Function _mm_sub_ps:Float128(a:Float128,b:Float128)="_mm_sub_ps"
+	Function _mm_sub_ss:Float128(a:Float128,b:Float128)="_mm_sub_ss"
+	Function _mm_ucomieq_ss:Int(a:Float128,b:Float128)="_mm_ucomieq_ss"
+	Function _mm_ucomige_ss:Int(a:Float128,b:Float128)="_mm_ucomige_ss"
+	Function _mm_ucomigt_ss:Int(a:Float128,b:Float128)="_mm_ucomigt_ss"
+	Function _mm_ucomile_ss:Int(a:Float128,b:Float128)="_mm_ucomile_ss"
+	Function _mm_ucomilt_ss:Int(a:Float128,b:Float128)="_mm_ucomilt_ss"
+	Function _mm_ucomineq_ss:Int(a:Float128,b:Float128)="_mm_ucomineq_ss"
+	Function _mm_unpackhi_ps:Float128(a:Float128,b:Float128)="_mm_unpackhi_ps"
+	Function _mm_unpacklo_ps:Float128(a:Float128,b:Float128)="_mm_unpacklo_ps"
+	Function _mm_xor_ps:Float128(a:Float128,b:Float128)="_mm_xor_ps"
+EndExtern

+ 150 - 21
xmmintrin.mod/sse.x

@@ -1,21 +1,150 @@
-__m128 _mm_load_ps(float* )!
-__m128 _mm_load_ps1(float* )!
-__m128 _mm_load_ss(float* )!
-__m128 _mm_load1_ps(float* )!
-__m128 _mm_loadh_pi(__m128 ,__m64* )!
-__m128 _mm_loadl_pi(__m128 ,__m64* )!
-__m128 _mm_loadr_ps(float* )!
-__m128 _mm_loadu_ps(float* )!
-void _mm_maskmove_si64(__m64 ,__m64 ,char* )!
-void _m_maskmovq(__m64 ,__m64 ,char* )!
-void _mm_prefetch(char* ,int )!
-void _mm_store_ps(float* ,__m128 )!
-void _mm_store_ps1(float* ,__m128 )!
-void _mm_store_ss(float* ,__m128 )!
-void _mm_store1_ps(float* ,__m128 )!
-void _mm_storeh_pi(__m64* ,__m128 )!
-void _mm_storel_pi(__m64* ,__m128 )!
-void _mm_storer_ps(float* ,__m128 )!
-void _mm_storeu_ps(float* ,__m128 )!
-void _mm_stream_pi(__m64* ,__m64 )!
-void _mm_stream_ps(float* ,__m128 )!
+__m128 _mm_add_ps(__m128 ,__m128 )!
+__m128 _mm_add_ss(__m128 ,__m128 )!
+__m128 _mm_and_ps(__m128 ,__m128 )!
+__m128 _mm_andnot_ps(__m128 ,__m128 )!
+__m64 _mm_avg_pu16(__m64 ,__m64 )!
+__m64 _mm_avg_pu8(__m64 ,__m64 )!
+__m128 _mm_cmpeq_ps(__m128 ,__m128 )!
+__m128 _mm_cmpeq_ss(__m128 ,__m128 )!
+__m128 _mm_cmpge_ps(__m128 ,__m128 )!
+__m128 _mm_cmpge_ss(__m128 ,__m128 )!
+__m128 _mm_cmpgt_ps(__m128 ,__m128 )!
+__m128 _mm_cmpgt_ss(__m128 ,__m128 )!
+__m128 _mm_cmple_ps(__m128 ,__m128 )!
+__m128 _mm_cmple_ss(__m128 ,__m128 )!
+__m128 _mm_cmplt_ps(__m128 ,__m128 )!
+__m128 _mm_cmplt_ss(__m128 ,__m128 )!
+__m128 _mm_cmpneq_ps(__m128 ,__m128 )!
+__m128 _mm_cmpneq_ss(__m128 ,__m128 )!
+__m128 _mm_cmpnge_ps(__m128 ,__m128 )!
+__m128 _mm_cmpnge_ss(__m128 ,__m128 )!
+__m128 _mm_cmpngt_ps(__m128 ,__m128 )!
+__m128 _mm_cmpngt_ss(__m128 ,__m128 )!
+__m128 _mm_cmpnle_ps(__m128 ,__m128 )!
+__m128 _mm_cmpnle_ss(__m128 ,__m128 )!
+__m128 _mm_cmpnlt_ps(__m128 ,__m128 )!
+__m128 _mm_cmpnlt_ss(__m128 ,__m128 )!
+__m128 _mm_cmpord_ps(__m128 ,__m128 )!
+__m128 _mm_cmpord_ss(__m128 ,__m128 )!
+__m128 _mm_cmpunord_ps(__m128 ,__m128 )!
+__m128 _mm_cmpunord_ss(__m128 ,__m128 )!
+int _mm_comieq_ss(__m128 ,__m128 )!
+int _mm_comige_ss(__m128 ,__m128 )!
+int _mm_comigt_ss(__m128 ,__m128 )!
+int _mm_comile_ss(__m128 ,__m128 )!
+int _mm_comilt_ss(__m128 ,__m128 )!
+int _mm_comineq_ss(__m128 ,__m128 )!
+__m128 _mm_cvt_pi2ps(__m128 ,__m64 )!
+__m64 _mm_cvt_ps2pi(__m128 )!
+__m128 _mm_cvt_si2ss(__m128 ,int )!
+int _mm_cvt_ss2si(__m128 )!
+__m128 _mm_cvtpi16_ps(__m64 )!
+__m128 _mm_cvtpi32_ps(__m128 ,__m64 )!
+__m128 _mm_cvtpi32x2_ps(__m64 ,__m64 )!
+__m128 _mm_cvtpi8_ps(__m64 )!
+__m64 _mm_cvtps_pi16(__m128 )!
+__m64 _mm_cvtps_pi32(__m128 )!
+__m64 _mm_cvtps_pi8(__m128 )!
+__m128 _mm_cvtpu16_ps(__m64 )!
+__m128 _mm_cvtpu8_ps(__m64 )!
+__m128 _mm_cvtsi32_ss(__m128 ,int )!
+__m128 _mm_cvtsi64_ss(__m128 ,__int64 )!
+float _mm_cvtss_f32(__m128 )!
+int _mm_cvtss_si32(__m128 )!
+__int64 _mm_cvtss_si64(__m128 )!
+__m64 _mm_cvtt_ps2pi(__m128 )!
+int _mm_cvtt_ss2si(__m128 )!
+__m64 _mm_cvttps_pi32(__m128 )!
+int _mm_cvttss_si32(__m128 )!
+__int64 _mm_cvttss_si64(__m128 )!
+__m128 _mm_div_ps(__m128 ,__m128 )!
+__m128 _mm_div_ss(__m128 ,__m128 )!
+int _mm_extract_pi16(__m64 ,int )!
+unsigned int _MM_GET_EXCEPTION_MASK()!
+unsigned int _MM_GET_EXCEPTION_STATE()!
+unsigned int _MM_GET_FLUSH_ZERO_MODE()!
+unsigned int _MM_GET_ROUNDING_MODE()!
+unsigned int _mm_getcsr(void )!
+__m64 _mm_insert_pi16(__m64 ,int ,int )!
+__m128 _mm_load_ps(float* )!
+__m128 _mm_load_ps1(float* )!
+__m128 _mm_load_ss(float* )!
+__m128 _mm_load1_ps(float* )!
+__m128 _mm_loadh_pi(__m128 ,__m64* )!
+__m128 _mm_loadl_pi(__m128 ,__m64* )!
+__m128 _mm_loadr_ps(float* )!
+__m128 _mm_loadu_ps(float* )!
+void _mm_maskmove_si64(__m64 ,__m64 ,char* )!
+void _m_maskmovq(__m64 ,__m64 ,char* )!
+__m64 _mm_max_pi16(__m64 ,__m64 )!
+__m128 _mm_max_ps(__m128 ,__m128 )!
+__m64 _mm_max_pu8(__m64 ,__m64 )!
+__m128 _mm_max_ss(__m128 ,__m128 )!
+__m64 _mm_min_pi16(__m64 ,__m64 )!
+__m128 _mm_min_ps(__m128 ,__m128 )!
+__m64 _mm_min_pu8(__m64 ,__m64 )!
+__m128 _mm_min_ss(__m128 ,__m128 )!
+__m128 _mm_move_ss(__m128 ,__m128 )!
+__m128 _mm_movehl_ps(__m128 ,__m128 )!
+__m128 _mm_movelh_ps(__m128 ,__m128 )!
+int _mm_movemask_pi8(__m64 )!
+int _mm_movemask_ps(__m128 )!
+__m128 _mm_mul_ps(__m128 ,__m128 )!
+__m128 _mm_mul_ss(__m128 ,__m128 )!
+__m64 _mm_mulhi_pu16(__m64 ,__m64 )!
+__m128 _mm_or_ps(__m128 ,__m128 )!
+__m64 _m_pavgb(__m64 ,__m64 )!
+__m64 _m_pavgw(__m64 ,__m64 )!
+int _m_pextrw(__m64 ,int )!
+__m64 _m_pinsrw(__m64 ,int ,int )!
+__m64 _m_pmaxsw(__m64 ,__m64 )!
+__m64 _m_pmaxub(__m64 ,__m64 )!
+__m64 _m_pminsw(__m64 ,__m64 )!
+__m64 _m_pminub(__m64 ,__m64 )!
+int _m_pmovmskb(__m64 )!
+__m64 _m_pmulhuw(__m64 ,__m64 )!
+void _mm_prefetch(char* ,int )!
+__m64 _m_psadbw(__m64 ,__m64 )!
+__m64 _m_pshufw(__m64 ,int )!
+__m128 _mm_rcp_ps(__m128 )!
+__m128 _mm_rcp_ss(__m128 )!
+__m128 _mm_rsqrt_ps(__m128 )!
+__m128 _mm_rsqrt_ss(__m128 )!
+__m64 _mm_sad_pu8(__m64 ,__m64 )!
+void _MM_SET_EXCEPTION_MASK(unsigned int )!
+void _MM_SET_EXCEPTION_STATE(unsigned int )!
+void _MM_SET_FLUSH_ZERO_MODE(unsigned int )!
+__m128 _mm_set_ps(float ,float ,float ,float )!
+__m128 _mm_set_ps1(float )!
+void _MM_SET_ROUNDING_MODE(unsigned int )!
+__m128 _mm_set_ss(float )!
+__m128 _mm_set1_ps(float )!
+void _mm_setcsr(unsigned int )!
+__m128 _mm_setr_ps(float ,float ,float ,float )!
+__m128 _mm_setzero_ps(void )!
+void _mm_sfence(void )!
+__m64 _mm_shuffle_pi16(__m64 ,int )!
+__m128 _mm_shuffle_ps(__m128 ,__m128 ,unsigned int )!
+__m128 _mm_sqrt_ps(__m128 )!
+__m128 _mm_sqrt_ss(__m128 )!
+void _mm_store_ps(float* ,__m128 )!
+void _mm_store_ps1(float* ,__m128 )!
+void _mm_store_ss(float* ,__m128 )!
+void _mm_store1_ps(float* ,__m128 )!
+void _mm_storeh_pi(__m64* ,__m128 )!
+void _mm_storel_pi(__m64* ,__m128 )!
+void _mm_storer_ps(float* ,__m128 )!
+void _mm_storeu_ps(float* ,__m128 )!
+void _mm_stream_pi(__m64* ,__m64 )!
+void _mm_stream_ps(float* ,__m128 )!
+__m128 _mm_sub_ps(__m128 ,__m128 )!
+__m128 _mm_sub_ss(__m128 ,__m128 )!
+int _mm_ucomieq_ss(__m128 ,__m128 )!
+int _mm_ucomige_ss(__m128 ,__m128 )!
+int _mm_ucomigt_ss(__m128 ,__m128 )!
+int _mm_ucomile_ss(__m128 ,__m128 )!
+int _mm_ucomilt_ss(__m128 ,__m128 )!
+int _mm_ucomineq_ss(__m128 ,__m128 )!
+__m128 _mm_unpackhi_ps(__m128 ,__m128 )!
+__m128 _mm_unpacklo_ps(__m128 ,__m128 )!
+__m128 _mm_xor_ps(__m128 ,__m128 )!

+ 251 - 113
xmmintrin.mod/sse2.bmx

@@ -1,113 +1,251 @@
-SuperStrict
-Extern
-	Function _mm_add_epi16:Int128(a:Int128,b:Int128)="_mm_add_epi16"
-	Function _mm_add_epi32:Int128(a:Int128,b:Int128)="_mm_add_epi32"
-	Function _mm_add_epi64:Int128(a:Int128,b:Int128)="_mm_add_epi64"
-	Function _mm_add_epi8:Int128(a:Int128,b:Int128)="_mm_add_epi8"
-	Function _mm_adds_epi16:Int128(a:Int128,b:Int128)="_mm_adds_epi16"
-	Function _mm_adds_epi8:Int128(a:Int128,b:Int128)="_mm_adds_epi8"
-	Function _mm_adds_epu16:Int128(a:Int128,b:Int128)="_mm_adds_epu16"
-	Function _mm_adds_epu8:Int128(a:Int128,b:Int128)="_mm_adds_epu8"
-	Function _mm_and_si128:Int128(a:Int128,b:Int128)="_mm_and_si128"
-	Function _mm_andnot_si128:Int128(a:Int128,b:Int128)="_mm_andnot_si128"
-	Function _mm_avg_epu16:Int128(a:Int128,b:Int128)="_mm_avg_epu16"
-	Function _mm_avg_epu8:Int128(a:Int128,b:Int128)="_mm_avg_epu8"
-	Function _mm_bslli_si128:Int128(a:Int128,imm8:Int)="_mm_bslli_si128"
-	Function _mm_bsrli_si128:Int128(a:Int128,imm8:Int)="_mm_bsrli_si128"
-	Function _mm_castps_si128:Int128(a:Float128)="_mm_castps_si128"
-	Function _mm_castsi128_ps:Float128(a:Int128)="_mm_castsi128_ps"
-	Function _mm_clflush(p:Byte Ptr)="_mm_clflush"
-	Function _mm_cmpeq_epi16:Int128(a:Int128,b:Int128)="_mm_cmpeq_epi16"
-	Function _mm_cmpeq_epi32:Int128(a:Int128,b:Int128)="_mm_cmpeq_epi32"
-	Function _mm_cmpeq_epi8:Int128(a:Int128,b:Int128)="_mm_cmpeq_epi8"
-	Function _mm_cmpgt_epi16:Int128(a:Int128,b:Int128)="_mm_cmpgt_epi16"
-	Function _mm_cmpgt_epi32:Int128(a:Int128,b:Int128)="_mm_cmpgt_epi32"
-	Function _mm_cmpgt_epi8:Int128(a:Int128,b:Int128)="_mm_cmpgt_epi8"
-	Function _mm_cmplt_epi16:Int128(a:Int128,b:Int128)="_mm_cmplt_epi16"
-	Function _mm_cmplt_epi32:Int128(a:Int128,b:Int128)="_mm_cmplt_epi32"
-	Function _mm_cmplt_epi8:Int128(a:Int128,b:Int128)="_mm_cmplt_epi8"
-	Function _mm_cvtepi32_ps:Float128(a:Int128)="_mm_cvtepi32_ps"
-	Function _mm_cvtps_epi32:Int128(a:Float128)="_mm_cvtps_epi32"
-	Function _mm_cvtsi128_si32:Int(a:Int128)="_mm_cvtsi128_si32"
-	Function _mm_cvtsi128_si64:Long(a:Int128)="_mm_cvtsi128_si64"
-	Function _mm_cvtsi128_si64x:Long(a:Int128)="_mm_cvtsi128_si64x"
-	Function _mm_cvtsi32_si128:Int128(a:Int)="_mm_cvtsi32_si128"
-	Function _mm_cvtsi64_si128:Int128(a:Long)="_mm_cvtsi64_si128"
-	Function _mm_cvtsi64x_si128:Int128(a:Long)="_mm_cvtsi64x_si128"
-	Function _mm_cvttps_epi32:Int128(a:Float128)="_mm_cvttps_epi32"
-	Function _mm_extract_epi16:Int(a:Int128,imm8:Int)="_mm_extract_epi16"
-	Function _mm_insert_epi16:Int128(a:Int128,i:Int,imm8:Int)="_mm_insert_epi16"
-	Function _mm_lfence()="_mm_lfence"
-	Function _mm_load_si128:Int128(mem_addr:Int128 Ptr)="_mm_load_si128"
-	Function _mm_loadl_epi64:Int128(mem_addr:Int128 Ptr)="_mm_loadl_epi64"
-	Function _mm_loadu_si128:Int128(mem_addr:Int128 Ptr)="_mm_loadu_si128"
-	Function _mm_madd_epi16:Int128(a:Int128,b:Int128)="_mm_madd_epi16"
-	Function _mm_maskmoveu_si128(a:Int128,mask:Int128,mem_addr:Byte Ptr)="_mm_maskmoveu_si128"
-	Function _mm_max_epi16:Int128(a:Int128,b:Int128)="_mm_max_epi16"
-	Function _mm_max_epu8:Int128(a:Int128,b:Int128)="_mm_max_epu8"
-	Function _mm_mfence()="_mm_mfence"
-	Function _mm_min_epi16:Int128(a:Int128,b:Int128)="_mm_min_epi16"
-	Function _mm_min_epu8:Int128(a:Int128,b:Int128)="_mm_min_epu8"
-	Function _mm_move_epi64:Int128(a:Int128)="_mm_move_epi64"
-	Function _mm_movemask_epi8:Int(a:Int128)="_mm_movemask_epi8"
-	Function _mm_mul_epu32:Int128(a:Int128,b:Int128)="_mm_mul_epu32"
-	Function _mm_mulhi_epi16:Int128(a:Int128,b:Int128)="_mm_mulhi_epi16"
-	Function _mm_mulhi_epu16:Int128(a:Int128,b:Int128)="_mm_mulhi_epu16"
-	Function _mm_mullo_epi16:Int128(a:Int128,b:Int128)="_mm_mullo_epi16"
-	Function _mm_or_si128:Int128(a:Int128,b:Int128)="_mm_or_si128"
-	Function _mm_packs_epi16:Int128(a:Int128,b:Int128)="_mm_packs_epi16"
-	Function _mm_packs_epi32:Int128(a:Int128,b:Int128)="_mm_packs_epi32"
-	Function _mm_packus_epi16:Int128(a:Int128,b:Int128)="_mm_packus_epi16"
-	Function _mm_pause()="_mm_pause"
-	Function _mm_sad_epu8:Int128(a:Int128,b:Int128)="_mm_sad_epu8"
-	Function _mm_set_epi32:Int128(e3:Int,e2:Int,e1:Int,e0:Int)="_mm_set_epi32"
-	Function _mm_set_epi64x:Int128(e1:Long,e0:Long)="_mm_set_epi64x"
-	Function _mm_set1_epi32:Int128(a:Int)="_mm_set1_epi32"
-	Function _mm_set1_epi64x:Int128(a:Long)="_mm_set1_epi64x"
-	Function _mm_setr_epi32:Int128(e3:Int,e2:Int,e1:Int,e0:Int)="_mm_setr_epi32"
-	Function _mm_setzero_si128:Int128()="_mm_setzero_si128"
-	Function _mm_shuffle_epi32:Int128(a:Int128,imm8:Int)="_mm_shuffle_epi32"
-	Function _mm_shufflehi_epi16:Int128(a:Int128,imm8:Int)="_mm_shufflehi_epi16"
-	Function _mm_shufflelo_epi16:Int128(a:Int128,imm8:Int)="_mm_shufflelo_epi16"
-	Function _mm_sll_epi16:Int128(a:Int128,count:Int128)="_mm_sll_epi16"
-	Function _mm_sll_epi32:Int128(a:Int128,count:Int128)="_mm_sll_epi32"
-	Function _mm_sll_epi64:Int128(a:Int128,count:Int128)="_mm_sll_epi64"
-	Function _mm_slli_epi16:Int128(a:Int128,imm8:Int)="_mm_slli_epi16"
-	Function _mm_slli_epi32:Int128(a:Int128,imm8:Int)="_mm_slli_epi32"
-	Function _mm_slli_epi64:Int128(a:Int128,imm8:Int)="_mm_slli_epi64"
-	Function _mm_slli_si128:Int128(a:Int128,imm8:Int)="_mm_slli_si128"
-	Function _mm_sra_epi16:Int128(a:Int128,count:Int128)="_mm_sra_epi16"
-	Function _mm_sra_epi32:Int128(a:Int128,count:Int128)="_mm_sra_epi32"
-	Function _mm_srai_epi16:Int128(a:Int128,imm8:Int)="_mm_srai_epi16"
-	Function _mm_srai_epi32:Int128(a:Int128,imm8:Int)="_mm_srai_epi32"
-	Function _mm_srl_epi16:Int128(a:Int128,count:Int128)="_mm_srl_epi16"
-	Function _mm_srl_epi32:Int128(a:Int128,count:Int128)="_mm_srl_epi32"
-	Function _mm_srl_epi64:Int128(a:Int128,count:Int128)="_mm_srl_epi64"
-	Function _mm_srli_epi16:Int128(a:Int128,imm8:Int)="_mm_srli_epi16"
-	Function _mm_srli_epi32:Int128(a:Int128,imm8:Int)="_mm_srli_epi32"
-	Function _mm_srli_epi64:Int128(a:Int128,imm8:Int)="_mm_srli_epi64"
-	Function _mm_srli_si128:Int128(a:Int128,imm8:Int)="_mm_srli_si128"
-	Function _mm_store_si128(mem_addr:Int128 Ptr,a:Int128)="_mm_store_si128"
-	Function _mm_storel_epi64(mem_addr:Int128 Ptr,a:Int128)="_mm_storel_epi64"
-	Function _mm_storeu_si128(mem_addr:Int128 Ptr,a:Int128)="_mm_storeu_si128"
-	Function _mm_stream_si128(mem_addr:Int128 Ptr,a:Int128)="_mm_stream_si128"
-	Function _mm_stream_si32(mem_addr:Int Ptr,a:Int)="_mm_stream_si32"
-	Function _mm_stream_si64(mem_addr:Long Ptr,a:Long)="_mm_stream_si64"
-	Function _mm_sub_epi16:Int128(a:Int128,b:Int128)="_mm_sub_epi16"
-	Function _mm_sub_epi32:Int128(a:Int128,b:Int128)="_mm_sub_epi32"
-	Function _mm_sub_epi64:Int128(a:Int128,b:Int128)="_mm_sub_epi64"
-	Function _mm_sub_epi8:Int128(a:Int128,b:Int128)="_mm_sub_epi8"
-	Function _mm_subs_epi16:Int128(a:Int128,b:Int128)="_mm_subs_epi16"
-	Function _mm_subs_epi8:Int128(a:Int128,b:Int128)="_mm_subs_epi8"
-	Function _mm_subs_epu16:Int128(a:Int128,b:Int128)="_mm_subs_epu16"
-	Function _mm_subs_epu8:Int128(a:Int128,b:Int128)="_mm_subs_epu8"
-	Function _mm_unpackhi_epi16:Int128(a:Int128,b:Int128)="_mm_unpackhi_epi16"
-	Function _mm_unpackhi_epi32:Int128(a:Int128,b:Int128)="_mm_unpackhi_epi32"
-	Function _mm_unpackhi_epi64:Int128(a:Int128,b:Int128)="_mm_unpackhi_epi64"
-	Function _mm_unpackhi_epi8:Int128(a:Int128,b:Int128)="_mm_unpackhi_epi8"
-	Function _mm_unpacklo_epi16:Int128(a:Int128,b:Int128)="_mm_unpacklo_epi16"
-	Function _mm_unpacklo_epi32:Int128(a:Int128,b:Int128)="_mm_unpacklo_epi32"
-	Function _mm_unpacklo_epi64:Int128(a:Int128,b:Int128)="_mm_unpacklo_epi64"
-	Function _mm_unpacklo_epi8:Int128(a:Int128,b:Int128)="_mm_unpacklo_epi8"
-	Function _mm_xor_si128:Int128(a:Int128,b:Int128)="_mm_xor_si128"
-EndExtern
+' Copyright (c) 2016 David JJ Camp
+' 
+' This software is provided 'as-is', without any express or implied
+' warranty. In no event will the authors be held liable for any damages
+' arising from the use of this software.
+'
+' Permission is granted to anyone to use this software for any purpose,
+' including commercial applications, and to alter it and redistribute it
+' freely, subject to the following restrictions:
+'
+' 1. The origin of this software must not be misrepresented; you must not
+'    claim that you wrote the original software. If you use this software
+'    in a product, an acknowledgement in the product documentation would be
+'    appreciated but is not required.
+' 2. Altered source versions must be plainly marked as such, and must not be
+'    misrepresented as being the original software.
+' 3. This notice may not be removed or altered from any source distribution.
+'
+SuperStrict
+Extern
+	Function _mm_add_epi16:Int128(a:Int128,b:Int128)="_mm_add_epi16"
+	Function _mm_add_epi32:Int128(a:Int128,b:Int128)="_mm_add_epi32"
+	Function _mm_add_epi64:Int128(a:Int128,b:Int128)="_mm_add_epi64"
+	Function _mm_add_epi8:Int128(a:Int128,b:Int128)="_mm_add_epi8"
+	Function _mm_add_pd:Double128(a:Double128,b:Double128)="_mm_add_pd"
+	Function _mm_add_sd:Double128(a:Double128,b:Double128)="_mm_add_sd"
+	Function _mm_add_si64:Float64(a:Float64,b:Float64)="_mm_add_si64"
+	Function _mm_adds_epi16:Int128(a:Int128,b:Int128)="_mm_adds_epi16"
+	Function _mm_adds_epi8:Int128(a:Int128,b:Int128)="_mm_adds_epi8"
+	Function _mm_adds_epu16:Int128(a:Int128,b:Int128)="_mm_adds_epu16"
+	Function _mm_adds_epu8:Int128(a:Int128,b:Int128)="_mm_adds_epu8"
+	Function _mm_and_pd:Double128(a:Double128,b:Double128)="_mm_and_pd"
+	Function _mm_and_si128:Int128(a:Int128,b:Int128)="_mm_and_si128"
+	Function _mm_andnot_pd:Double128(a:Double128,b:Double128)="_mm_andnot_pd"
+	Function _mm_andnot_si128:Int128(a:Int128,b:Int128)="_mm_andnot_si128"
+	Function _mm_avg_epu16:Int128(a:Int128,b:Int128)="_mm_avg_epu16"
+	Function _mm_avg_epu8:Int128(a:Int128,b:Int128)="_mm_avg_epu8"
+	Function _mm_bslli_si128:Int128(a:Int128,imm8:Int)="_mm_bslli_si128"
+	Function _mm_bsrli_si128:Int128(a:Int128,imm8:Int)="_mm_bsrli_si128"
+	Function _mm_castpd_ps:Float128(a:Double128)="_mm_castpd_ps"
+	Function _mm_castpd_si128:Int128(a:Double128)="_mm_castpd_si128"
+	Function _mm_castps_pd:Double128(a:Float128)="_mm_castps_pd"
+	Function _mm_castps_si128:Int128(a:Float128)="_mm_castps_si128"
+	Function _mm_castsi128_pd:Double128(a:Int128)="_mm_castsi128_pd"
+	Function _mm_castsi128_ps:Float128(a:Int128)="_mm_castsi128_ps"
+	Function _mm_clflush(p:Byte Ptr)="_mm_clflush"
+	Function _mm_cmpeq_epi16:Int128(a:Int128,b:Int128)="_mm_cmpeq_epi16"
+	Function _mm_cmpeq_epi32:Int128(a:Int128,b:Int128)="_mm_cmpeq_epi32"
+	Function _mm_cmpeq_epi8:Int128(a:Int128,b:Int128)="_mm_cmpeq_epi8"
+	Function _mm_cmpeq_pd:Double128(a:Double128,b:Double128)="_mm_cmpeq_pd"
+	Function _mm_cmpeq_sd:Double128(a:Double128,b:Double128)="_mm_cmpeq_sd"
+	Function _mm_cmpge_pd:Double128(a:Double128,b:Double128)="_mm_cmpge_pd"
+	Function _mm_cmpge_sd:Double128(a:Double128,b:Double128)="_mm_cmpge_sd"
+	Function _mm_cmpgt_epi16:Int128(a:Int128,b:Int128)="_mm_cmpgt_epi16"
+	Function _mm_cmpgt_epi32:Int128(a:Int128,b:Int128)="_mm_cmpgt_epi32"
+	Function _mm_cmpgt_epi8:Int128(a:Int128,b:Int128)="_mm_cmpgt_epi8"
+	Function _mm_cmpgt_pd:Double128(a:Double128,b:Double128)="_mm_cmpgt_pd"
+	Function _mm_cmpgt_sd:Double128(a:Double128,b:Double128)="_mm_cmpgt_sd"
+	Function _mm_cmple_pd:Double128(a:Double128,b:Double128)="_mm_cmple_pd"
+	Function _mm_cmple_sd:Double128(a:Double128,b:Double128)="_mm_cmple_sd"
+	Function _mm_cmplt_epi16:Int128(a:Int128,b:Int128)="_mm_cmplt_epi16"
+	Function _mm_cmplt_epi32:Int128(a:Int128,b:Int128)="_mm_cmplt_epi32"
+	Function _mm_cmplt_epi8:Int128(a:Int128,b:Int128)="_mm_cmplt_epi8"
+	Function _mm_cmplt_pd:Double128(a:Double128,b:Double128)="_mm_cmplt_pd"
+	Function _mm_cmplt_sd:Double128(a:Double128,b:Double128)="_mm_cmplt_sd"
+	Function _mm_cmpneq_pd:Double128(a:Double128,b:Double128)="_mm_cmpneq_pd"
+	Function _mm_cmpneq_sd:Double128(a:Double128,b:Double128)="_mm_cmpneq_sd"
+	Function _mm_cmpnge_pd:Double128(a:Double128,b:Double128)="_mm_cmpnge_pd"
+	Function _mm_cmpnge_sd:Double128(a:Double128,b:Double128)="_mm_cmpnge_sd"
+	Function _mm_cmpngt_pd:Double128(a:Double128,b:Double128)="_mm_cmpngt_pd"
+	Function _mm_cmpngt_sd:Double128(a:Double128,b:Double128)="_mm_cmpngt_sd"
+	Function _mm_cmpnle_pd:Double128(a:Double128,b:Double128)="_mm_cmpnle_pd"
+	Function _mm_cmpnle_sd:Double128(a:Double128,b:Double128)="_mm_cmpnle_sd"
+	Function _mm_cmpnlt_pd:Double128(a:Double128,b:Double128)="_mm_cmpnlt_pd"
+	Function _mm_cmpnlt_sd:Double128(a:Double128,b:Double128)="_mm_cmpnlt_sd"
+	Function _mm_cmpord_pd:Double128(a:Double128,b:Double128)="_mm_cmpord_pd"
+	Function _mm_cmpord_sd:Double128(a:Double128,b:Double128)="_mm_cmpord_sd"
+	Function _mm_cmpunord_pd:Double128(a:Double128,b:Double128)="_mm_cmpunord_pd"
+	Function _mm_cmpunord_sd:Double128(a:Double128,b:Double128)="_mm_cmpunord_sd"
+	Function _mm_comieq_sd:Int(a:Double128,b:Double128)="_mm_comieq_sd"
+	Function _mm_comige_sd:Int(a:Double128,b:Double128)="_mm_comige_sd"
+	Function _mm_comigt_sd:Int(a:Double128,b:Double128)="_mm_comigt_sd"
+	Function _mm_comile_sd:Int(a:Double128,b:Double128)="_mm_comile_sd"
+	Function _mm_comilt_sd:Int(a:Double128,b:Double128)="_mm_comilt_sd"
+	Function _mm_comineq_sd:Int(a:Double128,b:Double128)="_mm_comineq_sd"
+	Function _mm_cvtepi32_pd:Double128(a:Int128)="_mm_cvtepi32_pd"
+	Function _mm_cvtepi32_ps:Float128(a:Int128)="_mm_cvtepi32_ps"
+	Function _mm_cvtpd_epi32:Int128(a:Double128)="_mm_cvtpd_epi32"
+	Function _mm_cvtpd_pi32:Float64(a:Double128)="_mm_cvtpd_pi32"
+	Function _mm_cvtpd_ps:Float128(a:Double128)="_mm_cvtpd_ps"
+	Function _mm_cvtpi32_pd:Double128(a:Float64)="_mm_cvtpi32_pd"
+	Function _mm_cvtps_epi32:Int128(a:Float128)="_mm_cvtps_epi32"
+	Function _mm_cvtps_pd:Double128(a:Float128)="_mm_cvtps_pd"
+	Function _mm_cvtsd_f64:Double(a:Double128)="_mm_cvtsd_f64"
+	Function _mm_cvtsd_si32:Int(a:Double128)="_mm_cvtsd_si32"
+	Function _mm_cvtsd_si64:Long(a:Double128)="_mm_cvtsd_si64"
+	Function _mm_cvtsd_si64x:Long(a:Double128)="_mm_cvtsd_si64x"
+	Function _mm_cvtsd_ss:Float128(a:Float128,b:Double128)="_mm_cvtsd_ss"
+	Function _mm_cvtsi128_si32:Int(a:Int128)="_mm_cvtsi128_si32"
+	Function _mm_cvtsi128_si64:Long(a:Int128)="_mm_cvtsi128_si64"
+	Function _mm_cvtsi128_si64x:Long(a:Int128)="_mm_cvtsi128_si64x"
+	Function _mm_cvtsi32_sd:Double128(a:Double128,b:Int)="_mm_cvtsi32_sd"
+	Function _mm_cvtsi32_si128:Int128(a:Int)="_mm_cvtsi32_si128"
+	Function _mm_cvtsi64_sd:Double128(a:Double128,b:Long)="_mm_cvtsi64_sd"
+	Function _mm_cvtsi64_si128:Int128(a:Long)="_mm_cvtsi64_si128"
+	Function _mm_cvtsi64x_sd:Double128(a:Double128,b:Long)="_mm_cvtsi64x_sd"
+	Function _mm_cvtsi64x_si128:Int128(a:Long)="_mm_cvtsi64x_si128"
+	Function _mm_cvtss_sd:Double128(a:Double128,b:Float128)="_mm_cvtss_sd"
+	Function _mm_cvttpd_epi32:Int128(a:Double128)="_mm_cvttpd_epi32"
+	Function _mm_cvttpd_pi32:Float64(a:Double128)="_mm_cvttpd_pi32"
+	Function _mm_cvttps_epi32:Int128(a:Float128)="_mm_cvttps_epi32"
+	Function _mm_cvttsd_si32:Int(a:Double128)="_mm_cvttsd_si32"
+	Function _mm_cvttsd_si64:Long(a:Double128)="_mm_cvttsd_si64"
+	Function _mm_cvttsd_si64x:Long(a:Double128)="_mm_cvttsd_si64x"
+	Function _mm_div_pd:Double128(a:Double128,b:Double128)="_mm_div_pd"
+	Function _mm_div_sd:Double128(a:Double128,b:Double128)="_mm_div_sd"
+	Function _mm_extract_epi16:Int(a:Int128,imm8:Int)="_mm_extract_epi16"
+	Function _mm_insert_epi16:Int128(a:Int128,i:Int,imm8:Int)="_mm_insert_epi16"
+	Function _mm_lfence()="_mm_lfence"
+	Function _mm_load_pd:Double128(mem_addr:Double Ptr)="_mm_load_pd"
+	Function _mm_load_pd1:Double128(mem_addr:Double Ptr)="_mm_load_pd1"
+	Function _mm_load_sd:Double128(mem_addr:Double Ptr)="_mm_load_sd"
+	Function _mm_load_si128:Int128(mem_addr:Int128 Ptr)="_mm_load_si128"
+	Function _mm_load1_pd:Double128(mem_addr:Double Ptr)="_mm_load1_pd"
+	Function _mm_loadh_pd:Double128(a:Double128,mem_addr:Double Ptr)="_mm_loadh_pd"
+	Function _mm_loadl_epi64:Int128(mem_addr:Int128 Ptr)="_mm_loadl_epi64"
+	Function _mm_loadl_pd:Double128(a:Double128,mem_addr:Double Ptr)="_mm_loadl_pd"
+	Function _mm_loadr_pd:Double128(mem_addr:Double Ptr)="_mm_loadr_pd"
+	Function _mm_loadu_pd:Double128(mem_addr:Double Ptr)="_mm_loadu_pd"
+	Function _mm_loadu_si128:Int128(mem_addr:Int128 Ptr)="_mm_loadu_si128"
+	Function _mm_madd_epi16:Int128(a:Int128,b:Int128)="_mm_madd_epi16"
+	Function _mm_maskmoveu_si128(a:Int128,mask:Int128,mem_addr:Byte Ptr)="_mm_maskmoveu_si128"
+	Function _mm_max_epi16:Int128(a:Int128,b:Int128)="_mm_max_epi16"
+	Function _mm_max_epu8:Int128(a:Int128,b:Int128)="_mm_max_epu8"
+	Function _mm_max_pd:Double128(a:Double128,b:Double128)="_mm_max_pd"
+	Function _mm_max_sd:Double128(a:Double128,b:Double128)="_mm_max_sd"
+	Function _mm_mfence()="_mm_mfence"
+	Function _mm_min_epi16:Int128(a:Int128,b:Int128)="_mm_min_epi16"
+	Function _mm_min_epu8:Int128(a:Int128,b:Int128)="_mm_min_epu8"
+	Function _mm_min_pd:Double128(a:Double128,b:Double128)="_mm_min_pd"
+	Function _mm_min_sd:Double128(a:Double128,b:Double128)="_mm_min_sd"
+	Function _mm_move_epi64:Int128(a:Int128)="_mm_move_epi64"
+	Function _mm_move_sd:Double128(a:Double128,b:Double128)="_mm_move_sd"
+	Function _mm_movemask_epi8:Int(a:Int128)="_mm_movemask_epi8"
+	Function _mm_movemask_pd:Int(a:Double128)="_mm_movemask_pd"
+	Function _mm_movepi64_pi64:Float64(a:Int128)="_mm_movepi64_pi64"
+	Function _mm_movpi64_epi64:Int128(a:Float64)="_mm_movpi64_epi64"
+	Function _mm_mul_epu32:Int128(a:Int128,b:Int128)="_mm_mul_epu32"
+	Function _mm_mul_pd:Double128(a:Double128,b:Double128)="_mm_mul_pd"
+	Function _mm_mul_sd:Double128(a:Double128,b:Double128)="_mm_mul_sd"
+	Function _mm_mul_su32:Float64(a:Float64,b:Float64)="_mm_mul_su32"
+	Function _mm_mulhi_epi16:Int128(a:Int128,b:Int128)="_mm_mulhi_epi16"
+	Function _mm_mulhi_epu16:Int128(a:Int128,b:Int128)="_mm_mulhi_epu16"
+	Function _mm_mullo_epi16:Int128(a:Int128,b:Int128)="_mm_mullo_epi16"
+	Function _mm_or_pd:Double128(a:Double128,b:Double128)="_mm_or_pd"
+	Function _mm_or_si128:Int128(a:Int128,b:Int128)="_mm_or_si128"
+	Function _mm_packs_epi16:Int128(a:Int128,b:Int128)="_mm_packs_epi16"
+	Function _mm_packs_epi32:Int128(a:Int128,b:Int128)="_mm_packs_epi32"
+	Function _mm_packus_epi16:Int128(a:Int128,b:Int128)="_mm_packus_epi16"
+	Function _mm_pause()="_mm_pause"
+	Function _mm_sad_epu8:Int128(a:Int128,b:Int128)="_mm_sad_epu8"
+	Function _mm_set_epi16:Int128(e7:Short,e6:Short,e5:Short,e4:Short,e3:Short,e2:Short,e1:Short,e0:Short)="_mm_set_epi16"
+	Function _mm_set_epi32:Int128(e3:Int,e2:Int,e1:Int,e0:Int)="_mm_set_epi32"
+	Function _mm_set_epi64:Int128(e1:Float64,e0:Float64)="_mm_set_epi64"
+	Function _mm_set_epi64x:Int128(e1:Long,e0:Long)="_mm_set_epi64x"
+	Function _mm_set_epi8:Int128(e15:Byte,e14:Byte,e13:Byte,e12:Byte,e11:Byte,e10:Byte,e9:Byte,e8:Byte,e7:Byte,e6:Byte,e5:Byte,e4:Byte,e3:Byte,e2:Byte,e1:Byte,e0:Byte)="_mm_set_epi8"
+	Function _mm_set_pd:Double128(e1:Double,e0:Double)="_mm_set_pd"
+	Function _mm_set_pd1:Double128(a:Double)="_mm_set_pd1"
+	Function _mm_set_sd:Double128(a:Double)="_mm_set_sd"
+	Function _mm_set1_epi16:Int128(a:Short)="_mm_set1_epi16"
+	Function _mm_set1_epi32:Int128(a:Int)="_mm_set1_epi32"
+	Function _mm_set1_epi64:Int128(a:Float64)="_mm_set1_epi64"
+	Function _mm_set1_epi64x:Int128(a:Long)="_mm_set1_epi64x"
+	Function _mm_set1_epi8:Int128(a:Byte)="_mm_set1_epi8"
+	Function _mm_set1_pd:Double128(a:Double)="_mm_set1_pd"
+	Function _mm_setr_epi16:Int128(e7:Short,e6:Short,e5:Short,e4:Short,e3:Short,e2:Short,e1:Short,e0:Short)="_mm_setr_epi16"
+	Function _mm_setr_epi32:Int128(e3:Int,e2:Int,e1:Int,e0:Int)="_mm_setr_epi32"
+	Function _mm_setr_epi64:Int128(e1:Float64,e0:Float64)="_mm_setr_epi64"
+	Function _mm_setr_epi8:Int128(e15:Byte,e14:Byte,e13:Byte,e12:Byte,e11:Byte,e10:Byte,e9:Byte,e8:Byte,e7:Byte,e6:Byte,e5:Byte,e4:Byte,e3:Byte,e2:Byte,e1:Byte,e0:Byte)="_mm_setr_epi8"
+	Function _mm_setr_pd:Double128(e1:Double,e0:Double)="_mm_setr_pd"
+	Function _mm_setzero_pd:Double128()="_mm_setzero_pd"
+	Function _mm_setzero_si128:Int128()="_mm_setzero_si128"
+	Function _mm_shuffle_epi32:Int128(a:Int128,imm8:Int)="_mm_shuffle_epi32"
+	Function _mm_shuffle_pd:Double128(a:Double128,b:Double128,imm8:Int)="_mm_shuffle_pd"
+	Function _mm_shufflehi_epi16:Int128(a:Int128,imm8:Int)="_mm_shufflehi_epi16"
+	Function _mm_shufflelo_epi16:Int128(a:Int128,imm8:Int)="_mm_shufflelo_epi16"
+	Function _mm_sll_epi16:Int128(a:Int128,count:Int128)="_mm_sll_epi16"
+	Function _mm_sll_epi32:Int128(a:Int128,count:Int128)="_mm_sll_epi32"
+	Function _mm_sll_epi64:Int128(a:Int128,count:Int128)="_mm_sll_epi64"
+	Function _mm_slli_epi16:Int128(a:Int128,imm8:Int)="_mm_slli_epi16"
+	Function _mm_slli_epi32:Int128(a:Int128,imm8:Int)="_mm_slli_epi32"
+	Function _mm_slli_epi64:Int128(a:Int128,imm8:Int)="_mm_slli_epi64"
+	Function _mm_slli_si128:Int128(a:Int128,imm8:Int)="_mm_slli_si128"
+	Function _mm_sqrt_pd:Double128(a:Double128)="_mm_sqrt_pd"
+	Function _mm_sqrt_sd:Double128(a:Double128,b:Double128)="_mm_sqrt_sd"
+	Function _mm_sra_epi16:Int128(a:Int128,count:Int128)="_mm_sra_epi16"
+	Function _mm_sra_epi32:Int128(a:Int128,count:Int128)="_mm_sra_epi32"
+	Function _mm_srai_epi16:Int128(a:Int128,imm8:Int)="_mm_srai_epi16"
+	Function _mm_srai_epi32:Int128(a:Int128,imm8:Int)="_mm_srai_epi32"
+	Function _mm_srl_epi16:Int128(a:Int128,count:Int128)="_mm_srl_epi16"
+	Function _mm_srl_epi32:Int128(a:Int128,count:Int128)="_mm_srl_epi32"
+	Function _mm_srl_epi64:Int128(a:Int128,count:Int128)="_mm_srl_epi64"
+	Function _mm_srli_epi16:Int128(a:Int128,imm8:Int)="_mm_srli_epi16"
+	Function _mm_srli_epi32:Int128(a:Int128,imm8:Int)="_mm_srli_epi32"
+	Function _mm_srli_epi64:Int128(a:Int128,imm8:Int)="_mm_srli_epi64"
+	Function _mm_srli_si128:Int128(a:Int128,imm8:Int)="_mm_srli_si128"
+	Function _mm_store_pd(mem_addr:Double Ptr,a:Double128)="_mm_store_pd"
+	Function _mm_store_pd1(mem_addr:Double Ptr,a:Double128)="_mm_store_pd1"
+	Function _mm_store_sd(mem_addr:Double Ptr,a:Double128)="_mm_store_sd"
+	Function _mm_store_si128(mem_addr:Int128 Ptr,a:Int128)="_mm_store_si128"
+	Function _mm_store1_pd(mem_addr:Double Ptr,a:Double128)="_mm_store1_pd"
+	Function _mm_storeh_pd(mem_addr:Double Ptr,a:Double128)="_mm_storeh_pd"
+	Function _mm_storel_epi64(mem_addr:Int128 Ptr,a:Int128)="_mm_storel_epi64"
+	Function _mm_storel_pd(mem_addr:Double Ptr,a:Double128)="_mm_storel_pd"
+	Function _mm_storer_pd(mem_addr:Double Ptr,a:Double128)="_mm_storer_pd"
+	Function _mm_storeu_pd(mem_addr:Double Ptr,a:Double128)="_mm_storeu_pd"
+	Function _mm_storeu_si128(mem_addr:Int128 Ptr,a:Int128)="_mm_storeu_si128"
+	Function _mm_stream_pd(mem_addr:Double Ptr,a:Double128)="_mm_stream_pd"
+	Function _mm_stream_si128(mem_addr:Int128 Ptr,a:Int128)="_mm_stream_si128"
+	Function _mm_stream_si32(mem_addr:Int Ptr,a:Int)="_mm_stream_si32"
+	Function _mm_stream_si64(mem_addr:Long Ptr,a:Long)="_mm_stream_si64"
+	Function _mm_sub_epi16:Int128(a:Int128,b:Int128)="_mm_sub_epi16"
+	Function _mm_sub_epi32:Int128(a:Int128,b:Int128)="_mm_sub_epi32"
+	Function _mm_sub_epi64:Int128(a:Int128,b:Int128)="_mm_sub_epi64"
+	Function _mm_sub_epi8:Int128(a:Int128,b:Int128)="_mm_sub_epi8"
+	Function _mm_sub_pd:Double128(a:Double128,b:Double128)="_mm_sub_pd"
+	Function _mm_sub_sd:Double128(a:Double128,b:Double128)="_mm_sub_sd"
+	Function _mm_sub_si64:Float64(a:Float64,b:Float64)="_mm_sub_si64"
+	Function _mm_subs_epi16:Int128(a:Int128,b:Int128)="_mm_subs_epi16"
+	Function _mm_subs_epi8:Int128(a:Int128,b:Int128)="_mm_subs_epi8"
+	Function _mm_subs_epu16:Int128(a:Int128,b:Int128)="_mm_subs_epu16"
+	Function _mm_subs_epu8:Int128(a:Int128,b:Int128)="_mm_subs_epu8"
+	Function _mm_ucomieq_sd:Int(a:Double128,b:Double128)="_mm_ucomieq_sd"
+	Function _mm_ucomige_sd:Int(a:Double128,b:Double128)="_mm_ucomige_sd"
+	Function _mm_ucomigt_sd:Int(a:Double128,b:Double128)="_mm_ucomigt_sd"
+	Function _mm_ucomile_sd:Int(a:Double128,b:Double128)="_mm_ucomile_sd"
+	Function _mm_ucomilt_sd:Int(a:Double128,b:Double128)="_mm_ucomilt_sd"
+	Function _mm_ucomineq_sd:Int(a:Double128,b:Double128)="_mm_ucomineq_sd"
+	Function _mm_unpackhi_epi16:Int128(a:Int128,b:Int128)="_mm_unpackhi_epi16"
+	Function _mm_unpackhi_epi32:Int128(a:Int128,b:Int128)="_mm_unpackhi_epi32"
+	Function _mm_unpackhi_epi64:Int128(a:Int128,b:Int128)="_mm_unpackhi_epi64"
+	Function _mm_unpackhi_epi8:Int128(a:Int128,b:Int128)="_mm_unpackhi_epi8"
+	Function _mm_unpackhi_pd:Double128(a:Double128,b:Double128)="_mm_unpackhi_pd"
+	Function _mm_unpacklo_epi16:Int128(a:Int128,b:Int128)="_mm_unpacklo_epi16"
+	Function _mm_unpacklo_epi32:Int128(a:Int128,b:Int128)="_mm_unpacklo_epi32"
+	Function _mm_unpacklo_epi64:Int128(a:Int128,b:Int128)="_mm_unpacklo_epi64"
+	Function _mm_unpacklo_epi8:Int128(a:Int128,b:Int128)="_mm_unpacklo_epi8"
+	Function _mm_unpacklo_pd:Double128(a:Double128,b:Double128)="_mm_unpacklo_pd"
+	Function _mm_xor_pd:Double128(a:Double128,b:Double128)="_mm_xor_pd"
+	Function _mm_xor_si128:Int128(a:Int128,b:Int128)="_mm_xor_si128"
+EndExtern

+ 230 - 28
xmmintrin.mod/sse2.x

@@ -1,28 +1,230 @@
-void _mm_clflush(void* )!
-__m128d _mm_load_pd(double* )!
-__m128d _mm_load_pd1(double* )!
-__m128d _mm_load_sd(double* )!
-__m128i _mm_load_si128(__m128i* )!
-__m128d _mm_load1_pd(double* )!
-__m128d _mm_loadh_pd(__m128d ,double* )!
-__m128i _mm_loadl_epi64(__m128i* )!
-__m128d _mm_loadl_pd(__m128d ,double* )!
-__m128d _mm_loadr_pd(double* )!
-__m128d _mm_loadu_pd(double* )!
-__m128i _mm_loadu_si128(__m128i* )!
-void _mm_maskmoveu_si128(__m128i ,__m128i ,char* )!
-void _mm_store_pd(double* ,__m128d )!
-void _mm_store_pd1(double* ,__m128d )!
-void _mm_store_sd(double* ,__m128d )!
-void _mm_store_si128(__m128i* ,__m128i )!
-void _mm_store1_pd(double* ,__m128d )!
-void _mm_storeh_pd(double* ,__m128d )!
-void _mm_storel_epi64(__m128i* ,__m128i )!
-void _mm_storel_pd(double* ,__m128d )!
-void _mm_storer_pd(double* ,__m128d )!
-void _mm_storeu_pd(double* ,__m128d )!
-void _mm_storeu_si128(__m128i* ,__m128i )!
-void _mm_stream_pd(double* ,__m128d )!
-void _mm_stream_si128(__m128i* ,__m128i )!
-void _mm_stream_si32(int* ,int )!
-void _mm_stream_si64(__int64* ,__int64 )!
+__m128i _mm_add_epi16(__m128i ,__m128i )!
+__m128i _mm_add_epi32(__m128i ,__m128i )!
+__m128i _mm_add_epi64(__m128i ,__m128i )!
+__m128i _mm_add_epi8(__m128i ,__m128i )!
+__m128d _mm_add_pd(__m128d ,__m128d )!
+__m128d _mm_add_sd(__m128d ,__m128d )!
+__m64 _mm_add_si64(__m64 ,__m64 )!
+__m128i _mm_adds_epi16(__m128i ,__m128i )!
+__m128i _mm_adds_epi8(__m128i ,__m128i )!
+__m128i _mm_adds_epu16(__m128i ,__m128i )!
+__m128i _mm_adds_epu8(__m128i ,__m128i )!
+__m128d _mm_and_pd(__m128d ,__m128d )!
+__m128i _mm_and_si128(__m128i ,__m128i )!
+__m128d _mm_andnot_pd(__m128d ,__m128d )!
+__m128i _mm_andnot_si128(__m128i ,__m128i )!
+__m128i _mm_avg_epu16(__m128i ,__m128i )!
+__m128i _mm_avg_epu8(__m128i ,__m128i )!
+__m128i _mm_bslli_si128(__m128i ,int )!
+__m128i _mm_bsrli_si128(__m128i ,int )!
+__m128 _mm_castpd_ps(__m128d )!
+__m128i _mm_castpd_si128(__m128d )!
+__m128d _mm_castps_pd(__m128 )!
+__m128i _mm_castps_si128(__m128 )!
+__m128d _mm_castsi128_pd(__m128i )!
+__m128 _mm_castsi128_ps(__m128i )!
+void _mm_clflush(void* )!
+__m128i _mm_cmpeq_epi16(__m128i ,__m128i )!
+__m128i _mm_cmpeq_epi32(__m128i ,__m128i )!
+__m128i _mm_cmpeq_epi8(__m128i ,__m128i )!
+__m128d _mm_cmpeq_pd(__m128d ,__m128d )!
+__m128d _mm_cmpeq_sd(__m128d ,__m128d )!
+__m128d _mm_cmpge_pd(__m128d ,__m128d )!
+__m128d _mm_cmpge_sd(__m128d ,__m128d )!
+__m128i _mm_cmpgt_epi16(__m128i ,__m128i )!
+__m128i _mm_cmpgt_epi32(__m128i ,__m128i )!
+__m128i _mm_cmpgt_epi8(__m128i ,__m128i )!
+__m128d _mm_cmpgt_pd(__m128d ,__m128d )!
+__m128d _mm_cmpgt_sd(__m128d ,__m128d )!
+__m128d _mm_cmple_pd(__m128d ,__m128d )!
+__m128d _mm_cmple_sd(__m128d ,__m128d )!
+__m128i _mm_cmplt_epi16(__m128i ,__m128i )!
+__m128i _mm_cmplt_epi32(__m128i ,__m128i )!
+__m128i _mm_cmplt_epi8(__m128i ,__m128i )!
+__m128d _mm_cmplt_pd(__m128d ,__m128d )!
+__m128d _mm_cmplt_sd(__m128d ,__m128d )!
+__m128d _mm_cmpneq_pd(__m128d ,__m128d )!
+__m128d _mm_cmpneq_sd(__m128d ,__m128d )!
+__m128d _mm_cmpnge_pd(__m128d ,__m128d )!
+__m128d _mm_cmpnge_sd(__m128d ,__m128d )!
+__m128d _mm_cmpngt_pd(__m128d ,__m128d )!
+__m128d _mm_cmpngt_sd(__m128d ,__m128d )!
+__m128d _mm_cmpnle_pd(__m128d ,__m128d )!
+__m128d _mm_cmpnle_sd(__m128d ,__m128d )!
+__m128d _mm_cmpnlt_pd(__m128d ,__m128d )!
+__m128d _mm_cmpnlt_sd(__m128d ,__m128d )!
+__m128d _mm_cmpord_pd(__m128d ,__m128d )!
+__m128d _mm_cmpord_sd(__m128d ,__m128d )!
+__m128d _mm_cmpunord_pd(__m128d ,__m128d )!
+__m128d _mm_cmpunord_sd(__m128d ,__m128d )!
+int _mm_comieq_sd(__m128d ,__m128d )!
+int _mm_comige_sd(__m128d ,__m128d )!
+int _mm_comigt_sd(__m128d ,__m128d )!
+int _mm_comile_sd(__m128d ,__m128d )!
+int _mm_comilt_sd(__m128d ,__m128d )!
+int _mm_comineq_sd(__m128d ,__m128d )!
+__m128d _mm_cvtepi32_pd(__m128i )!
+__m128 _mm_cvtepi32_ps(__m128i )!
+__m128i _mm_cvtpd_epi32(__m128d )!
+__m64 _mm_cvtpd_pi32(__m128d )!
+__m128 _mm_cvtpd_ps(__m128d )!
+__m128d _mm_cvtpi32_pd(__m64 )!
+__m128i _mm_cvtps_epi32(__m128 )!
+__m128d _mm_cvtps_pd(__m128 )!
+double _mm_cvtsd_f64(__m128d )!
+int _mm_cvtsd_si32(__m128d )!
+__int64 _mm_cvtsd_si64(__m128d )!
+__int64 _mm_cvtsd_si64x(__m128d )!
+__m128 _mm_cvtsd_ss(__m128 ,__m128d )!
+int _mm_cvtsi128_si32(__m128i )!
+__int64 _mm_cvtsi128_si64(__m128i )!
+__int64 _mm_cvtsi128_si64x(__m128i )!
+__m128d _mm_cvtsi32_sd(__m128d ,int )!
+__m128i _mm_cvtsi32_si128(int )!
+__m128d _mm_cvtsi64_sd(__m128d ,__int64 )!
+__m128i _mm_cvtsi64_si128(__int64 )!
+__m128d _mm_cvtsi64x_sd(__m128d ,__int64 )!
+__m128i _mm_cvtsi64x_si128(__int64 )!
+__m128d _mm_cvtss_sd(__m128d ,__m128 )!
+__m128i _mm_cvttpd_epi32(__m128d )!
+__m64 _mm_cvttpd_pi32(__m128d )!
+__m128i _mm_cvttps_epi32(__m128 )!
+int _mm_cvttsd_si32(__m128d )!
+__int64 _mm_cvttsd_si64(__m128d )!
+__int64 _mm_cvttsd_si64x(__m128d )!
+__m128d _mm_div_pd(__m128d ,__m128d )!
+__m128d _mm_div_sd(__m128d ,__m128d )!
+int _mm_extract_epi16(__m128i ,int )!
+__m128i _mm_insert_epi16(__m128i ,int ,int )!
+void _mm_lfence(void )!
+__m128d _mm_load_pd(double* )!
+__m128d _mm_load_pd1(double* )!
+__m128d _mm_load_sd(double* )!
+__m128i _mm_load_si128(__m128i* )!
+__m128d _mm_load1_pd(double* )!
+__m128d _mm_loadh_pd(__m128d ,double* )!
+__m128i _mm_loadl_epi64(__m128i* )!
+__m128d _mm_loadl_pd(__m128d ,double* )!
+__m128d _mm_loadr_pd(double* )!
+__m128d _mm_loadu_pd(double* )!
+__m128i _mm_loadu_si128(__m128i* )!
+__m128i _mm_madd_epi16(__m128i ,__m128i )!
+void _mm_maskmoveu_si128(__m128i ,__m128i ,char* )!
+__m128i _mm_max_epi16(__m128i ,__m128i )!
+__m128i _mm_max_epu8(__m128i ,__m128i )!
+__m128d _mm_max_pd(__m128d ,__m128d )!
+__m128d _mm_max_sd(__m128d ,__m128d )!
+void _mm_mfence(void )!
+__m128i _mm_min_epi16(__m128i ,__m128i )!
+__m128i _mm_min_epu8(__m128i ,__m128i )!
+__m128d _mm_min_pd(__m128d ,__m128d )!
+__m128d _mm_min_sd(__m128d ,__m128d )!
+__m128i _mm_move_epi64(__m128i )!
+__m128d _mm_move_sd(__m128d ,__m128d )!
+int _mm_movemask_epi8(__m128i )!
+int _mm_movemask_pd(__m128d )!
+__m64 _mm_movepi64_pi64(__m128i )!
+__m128i _mm_movpi64_epi64(__m64 )!
+__m128i _mm_mul_epu32(__m128i ,__m128i )!
+__m128d _mm_mul_pd(__m128d ,__m128d )!
+__m128d _mm_mul_sd(__m128d ,__m128d )!
+__m64 _mm_mul_su32(__m64 ,__m64 )!
+__m128i _mm_mulhi_epi16(__m128i ,__m128i )!
+__m128i _mm_mulhi_epu16(__m128i ,__m128i )!
+__m128i _mm_mullo_epi16(__m128i ,__m128i )!
+__m128d _mm_or_pd(__m128d ,__m128d )!
+__m128i _mm_or_si128(__m128i ,__m128i )!
+__m128i _mm_packs_epi16(__m128i ,__m128i )!
+__m128i _mm_packs_epi32(__m128i ,__m128i )!
+__m128i _mm_packus_epi16(__m128i ,__m128i )!
+void _mm_pause(void )!
+__m128i _mm_sad_epu8(__m128i ,__m128i )!
+__m128i _mm_set_epi16(short ,short ,short ,short ,short ,short ,short ,short )!
+__m128i _mm_set_epi32(int ,int ,int ,int )!
+__m128i _mm_set_epi64(__m64 ,__m64 )!
+__m128i _mm_set_epi64x(__int64 ,__int64 )!
+__m128i _mm_set_epi8(char ,char ,char ,char ,char ,char ,char ,char ,char ,char ,char ,char ,char ,char ,char ,char )!
+__m128d _mm_set_pd(double ,double )!
+__m128d _mm_set_pd1(double )!
+__m128d _mm_set_sd(double )!
+__m128i _mm_set1_epi16(short )!
+__m128i _mm_set1_epi32(int )!
+__m128i _mm_set1_epi64(__m64 )!
+__m128i _mm_set1_epi64x(__int64 )!
+__m128i _mm_set1_epi8(char )!
+__m128d _mm_set1_pd(double )!
+__m128i _mm_setr_epi16(short ,short ,short ,short ,short ,short ,short ,short )!
+__m128i _mm_setr_epi32(int ,int ,int ,int )!
+__m128i _mm_setr_epi64(__m64 ,__m64 )!
+__m128i _mm_setr_epi8(char ,char ,char ,char ,char ,char ,char ,char ,char ,char ,char ,char ,char ,char ,char ,char )!
+__m128d _mm_setr_pd(double ,double )!
+__m128d _mm_setzero_pd(void )!
+__m128i _mm_setzero_si128()!
+__m128i _mm_shuffle_epi32(__m128i ,int )!
+__m128d _mm_shuffle_pd(__m128d ,__m128d ,int )!
+__m128i _mm_shufflehi_epi16(__m128i ,int )!
+__m128i _mm_shufflelo_epi16(__m128i ,int )!
+__m128i _mm_sll_epi16(__m128i ,__m128i )!
+__m128i _mm_sll_epi32(__m128i ,__m128i )!
+__m128i _mm_sll_epi64(__m128i ,__m128i )!
+__m128i _mm_slli_epi16(__m128i ,int )!
+__m128i _mm_slli_epi32(__m128i ,int )!
+__m128i _mm_slli_epi64(__m128i ,int )!
+__m128i _mm_slli_si128(__m128i ,int )!
+__m128d _mm_sqrt_pd(__m128d )!
+__m128d _mm_sqrt_sd(__m128d ,__m128d )!
+__m128i _mm_sra_epi16(__m128i ,__m128i )!
+__m128i _mm_sra_epi32(__m128i ,__m128i )!
+__m128i _mm_srai_epi16(__m128i ,int )!
+__m128i _mm_srai_epi32(__m128i ,int )!
+__m128i _mm_srl_epi16(__m128i ,__m128i )!
+__m128i _mm_srl_epi32(__m128i ,__m128i )!
+__m128i _mm_srl_epi64(__m128i ,__m128i )!
+__m128i _mm_srli_epi16(__m128i ,int )!
+__m128i _mm_srli_epi32(__m128i ,int )!
+__m128i _mm_srli_epi64(__m128i ,int )!
+__m128i _mm_srli_si128(__m128i ,int )!
+void _mm_store_pd(double* ,__m128d )!
+void _mm_store_pd1(double* ,__m128d )!
+void _mm_store_sd(double* ,__m128d )!
+void _mm_store_si128(__m128i* ,__m128i )!
+void _mm_store1_pd(double* ,__m128d )!
+void _mm_storeh_pd(double* ,__m128d )!
+void _mm_storel_epi64(__m128i* ,__m128i )!
+void _mm_storel_pd(double* ,__m128d )!
+void _mm_storer_pd(double* ,__m128d )!
+void _mm_storeu_pd(double* ,__m128d )!
+void _mm_storeu_si128(__m128i* ,__m128i )!
+void _mm_stream_pd(double* ,__m128d )!
+void _mm_stream_si128(__m128i* ,__m128i )!
+void _mm_stream_si32(int* ,int )!
+void _mm_stream_si64(__int64* ,__int64 )!
+__m128i _mm_sub_epi16(__m128i ,__m128i )!
+__m128i _mm_sub_epi32(__m128i ,__m128i )!
+__m128i _mm_sub_epi64(__m128i ,__m128i )!
+__m128i _mm_sub_epi8(__m128i ,__m128i )!
+__m128d _mm_sub_pd(__m128d ,__m128d )!
+__m128d _mm_sub_sd(__m128d ,__m128d )!
+__m64 _mm_sub_si64(__m64 ,__m64 )!
+__m128i _mm_subs_epi16(__m128i ,__m128i )!
+__m128i _mm_subs_epi8(__m128i ,__m128i )!
+__m128i _mm_subs_epu16(__m128i ,__m128i )!
+__m128i _mm_subs_epu8(__m128i ,__m128i )!
+int _mm_ucomieq_sd(__m128d ,__m128d )!
+int _mm_ucomige_sd(__m128d ,__m128d )!
+int _mm_ucomigt_sd(__m128d ,__m128d )!
+int _mm_ucomile_sd(__m128d ,__m128d )!
+int _mm_ucomilt_sd(__m128d ,__m128d )!
+int _mm_ucomineq_sd(__m128d ,__m128d )!
+__m128i _mm_unpackhi_epi16(__m128i ,__m128i )!
+__m128i _mm_unpackhi_epi32(__m128i ,__m128i )!
+__m128i _mm_unpackhi_epi64(__m128i ,__m128i )!
+__m128i _mm_unpackhi_epi8(__m128i ,__m128i )!
+__m128d _mm_unpackhi_pd(__m128d ,__m128d )!
+__m128i _mm_unpacklo_epi16(__m128i ,__m128i )!
+__m128i _mm_unpacklo_epi32(__m128i ,__m128i )!
+__m128i _mm_unpacklo_epi64(__m128i ,__m128i )!
+__m128i _mm_unpacklo_epi8(__m128i ,__m128i )!
+__m128d _mm_unpacklo_pd(__m128d ,__m128d )!
+__m128d _mm_xor_pd(__m128d ,__m128d )!
+__m128i _mm_xor_si128(__m128i ,__m128i )!

+ 32 - 9
xmmintrin.mod/sse3.bmx

@@ -1,9 +1,32 @@
-SuperStrict
-Extern
-	Function _mm_addsub_ps:Float128(a:Float128,b:Float128)="_mm_addsub_ps"
-	Function _mm_hadd_ps:Float128(a:Float128,b:Float128)="_mm_hadd_ps"
-	Function _mm_hsub_ps:Float128(a:Float128,b:Float128)="_mm_hsub_ps"
-	Function _mm_lddqu_si128:Int128(mem_addr:Int128 Ptr)="_mm_lddqu_si128"
-	Function _mm_movehdup_ps:Float128(a:Float128)="_mm_movehdup_ps"
-	Function _mm_moveldup_ps:Float128(a:Float128)="_mm_moveldup_ps"
-EndExtern
+' Copyright (c) 2016 David JJ Camp
+' 
+' This software is provided 'as-is', without any express or implied
+' warranty. In no event will the authors be held liable for any damages
+' arising from the use of this software.
+'
+' Permission is granted to anyone to use this software for any purpose,
+' including commercial applications, and to alter it and redistribute it
+' freely, subject to the following restrictions:
+'
+' 1. The origin of this software must not be misrepresented; you must not
+'    claim that you wrote the original software. If you use this software
+'    in a product, an acknowledgement in the product documentation would be
+'    appreciated but is not required.
+' 2. Altered source versions must be plainly marked as such, and must not be
+'    misrepresented as being the original software.
+' 3. This notice may not be removed or altered from any source distribution.
+'
+SuperStrict
+Extern
+	Function _mm_addsub_pd:Double128(a:Double128,b:Double128)="_mm_addsub_pd"
+	Function _mm_addsub_ps:Float128(a:Float128,b:Float128)="_mm_addsub_ps"
+	Function _mm_hadd_pd:Double128(a:Double128,b:Double128)="_mm_hadd_pd"
+	Function _mm_hadd_ps:Float128(a:Float128,b:Float128)="_mm_hadd_ps"
+	Function _mm_hsub_pd:Double128(a:Double128,b:Double128)="_mm_hsub_pd"
+	Function _mm_hsub_ps:Float128(a:Float128,b:Float128)="_mm_hsub_ps"
+	Function _mm_lddqu_si128:Int128(mem_addr:Int128 Ptr)="_mm_lddqu_si128"
+	Function _mm_loaddup_pd:Double128(mem_addr:Double Ptr)="_mm_loaddup_pd"
+	Function _mm_movedup_pd:Double128(a:Double128)="_mm_movedup_pd"
+	Function _mm_movehdup_ps:Float128(a:Float128)="_mm_movehdup_ps"
+	Function _mm_moveldup_ps:Float128(a:Float128)="_mm_moveldup_ps"
+EndExtern

+ 11 - 2
xmmintrin.mod/sse3.x

@@ -1,2 +1,11 @@
-__m128i _mm_lddqu_si128(__m128i* )!
-__m128d _mm_loaddup_pd(double* )!
+__m128d _mm_addsub_pd(__m128d ,__m128d )!
+__m128 _mm_addsub_ps(__m128 ,__m128 )!
+__m128d _mm_hadd_pd(__m128d ,__m128d )!
+__m128 _mm_hadd_ps(__m128 ,__m128 )!
+__m128d _mm_hsub_pd(__m128d ,__m128d )!
+__m128 _mm_hsub_ps(__m128 ,__m128 )!
+__m128i _mm_lddqu_si128(__m128i* )!
+__m128d _mm_loaddup_pd(double* )!
+__m128d _mm_movedup_pd(__m128d )!
+__m128 _mm_movehdup_ps(__m128 )!
+__m128 _mm_moveldup_ps(__m128 )!

+ 82 - 48
xmmintrin.mod/sse41.bmx

@@ -1,48 +1,82 @@
-SuperStrict
-Extern
-	Function _mm_blend_epi16:Int128(a:Int128,b:Int128,imm8:Int)="_mm_blend_epi16"
-	Function _mm_blend_ps:Float128(a:Float128,b:Float128,imm8:Int)="_mm_blend_ps"
-	Function _mm_blendv_epi8:Int128(a:Int128,b:Int128,mask:Int128)="_mm_blendv_epi8"
-	Function _mm_blendv_ps:Float128(a:Float128,b:Float128,mask:Float128)="_mm_blendv_ps"
-	Function _mm_cmpeq_epi64:Int128(a:Int128,b:Int128)="_mm_cmpeq_epi64"
-	Function _mm_cvtepi16_epi32:Int128(a:Int128)="_mm_cvtepi16_epi32"
-	Function _mm_cvtepi16_epi64:Int128(a:Int128)="_mm_cvtepi16_epi64"
-	Function _mm_cvtepi32_epi64:Int128(a:Int128)="_mm_cvtepi32_epi64"
-	Function _mm_cvtepi8_epi16:Int128(a:Int128)="_mm_cvtepi8_epi16"
-	Function _mm_cvtepi8_epi32:Int128(a:Int128)="_mm_cvtepi8_epi32"
-	Function _mm_cvtepi8_epi64:Int128(a:Int128)="_mm_cvtepi8_epi64"
-	Function _mm_cvtepu16_epi32:Int128(a:Int128)="_mm_cvtepu16_epi32"
-	Function _mm_cvtepu16_epi64:Int128(a:Int128)="_mm_cvtepu16_epi64"
-	Function _mm_cvtepu32_epi64:Int128(a:Int128)="_mm_cvtepu32_epi64"
-	Function _mm_cvtepu8_epi16:Int128(a:Int128)="_mm_cvtepu8_epi16"
-	Function _mm_cvtepu8_epi32:Int128(a:Int128)="_mm_cvtepu8_epi32"
-	Function _mm_cvtepu8_epi64:Int128(a:Int128)="_mm_cvtepu8_epi64"
-	Function _mm_dp_ps:Float128(a:Float128,b:Float128,imm8:Int)="_mm_dp_ps"
-	Function _mm_extract_epi32:Int(a:Int128,imm8:Int)="_mm_extract_epi32"
-	Function _mm_extract_epi64:Long(a:Int128,imm8:Int)="_mm_extract_epi64"
-	Function _mm_extract_epi8:Int(a:Int128,imm8:Int)="_mm_extract_epi8"
-	Function _mm_extract_ps:Int(a:Float128,imm8:Int)="_mm_extract_ps"
-	Function _mm_insert_epi32:Int128(a:Int128,i:Int,imm8:Int)="_mm_insert_epi32"
-	Function _mm_insert_epi64:Int128(a:Int128,i:Long,imm8:Int)="_mm_insert_epi64"
-	Function _mm_insert_epi8:Int128(a:Int128,i:Int,imm8:Int)="_mm_insert_epi8"
-	Function _mm_insert_ps:Float128(a:Float128,b:Float128,imm8:Int)="_mm_insert_ps"
-	Function _mm_max_epi32:Int128(a:Int128,b:Int128)="_mm_max_epi32"
-	Function _mm_max_epi8:Int128(a:Int128,b:Int128)="_mm_max_epi8"
-	Function _mm_max_epu16:Int128(a:Int128,b:Int128)="_mm_max_epu16"
-	Function _mm_max_epu32:Int128(a:Int128,b:Int128)="_mm_max_epu32"
-	Function _mm_min_epi32:Int128(a:Int128,b:Int128)="_mm_min_epi32"
-	Function _mm_min_epi8:Int128(a:Int128,b:Int128)="_mm_min_epi8"
-	Function _mm_min_epu16:Int128(a:Int128,b:Int128)="_mm_min_epu16"
-	Function _mm_min_epu32:Int128(a:Int128,b:Int128)="_mm_min_epu32"
-	Function _mm_minpos_epu16:Int128(a:Int128)="_mm_minpos_epu16"
-	Function _mm_mpsadbw_epu8:Int128(a:Int128,b:Int128,imm8:Int)="_mm_mpsadbw_epu8"
-	Function _mm_mul_epi32:Int128(a:Int128,b:Int128)="_mm_mul_epi32"
-	Function _mm_mullo_epi32:Int128(a:Int128,b:Int128)="_mm_mullo_epi32"
-	Function _mm_packus_epi32:Int128(a:Int128,b:Int128)="_mm_packus_epi32"
-	Function _mm_round_ps:Float128(a:Float128,rounding:Int)="_mm_round_ps"
-	Function _mm_round_ss:Float128(a:Float128,b:Float128,rounding:Int)="_mm_round_ss"
-	Function _mm_stream_load_si128:Int128(mem_addr:Int128 Ptr)="_mm_stream_load_si128"
-	Function _mm_testc_si128:Int(a:Int128,b:Int128)="_mm_testc_si128"
-	Function _mm_testnzc_si128:Int(a:Int128,b:Int128)="_mm_testnzc_si128"
-	Function _mm_testz_si128:Int(a:Int128,b:Int128)="_mm_testz_si128"
-EndExtern
+' Copyright (c) 2016 David JJ Camp
+' 
+' This software is provided 'as-is', without any express or implied
+' warranty. In no event will the authors be held liable for any damages
+' arising from the use of this software.
+'
+' Permission is granted to anyone to use this software for any purpose,
+' including commercial applications, and to alter it and redistribute it
+' freely, subject to the following restrictions:
+'
+' 1. The origin of this software must not be misrepresented; you must not
+'    claim that you wrote the original software. If you use this software
+'    in a product, an acknowledgement in the product documentation would be
+'    appreciated but is not required.
+' 2. Altered source versions must be plainly marked as such, and must not be
+'    misrepresented as being the original software.
+' 3. This notice may not be removed or altered from any source distribution.
+'
+SuperStrict
+Extern
+	Function _mm_blend_epi16:Int128(a:Int128,b:Int128,imm8:Int)="_mm_blend_epi16"
+	Function _mm_blend_pd:Double128(a:Double128,b:Double128,imm8:Int)="_mm_blend_pd"
+	Function _mm_blend_ps:Float128(a:Float128,b:Float128,imm8:Int)="_mm_blend_ps"
+	Function _mm_blendv_epi8:Int128(a:Int128,b:Int128,mask:Int128)="_mm_blendv_epi8"
+	Function _mm_blendv_pd:Double128(a:Double128,b:Double128,mask:Double128)="_mm_blendv_pd"
+	Function _mm_blendv_ps:Float128(a:Float128,b:Float128,mask:Float128)="_mm_blendv_ps"
+	Function _mm_ceil_pd:Double128(a:Double128)="_mm_ceil_pd"
+	Function _mm_ceil_ps:Float128(a:Float128)="_mm_ceil_ps"
+	Function _mm_ceil_sd:Double128(a:Double128,b:Double128)="_mm_ceil_sd"
+	Function _mm_ceil_ss:Float128(a:Float128,b:Float128)="_mm_ceil_ss"
+	Function _mm_cmpeq_epi64:Int128(a:Int128,b:Int128)="_mm_cmpeq_epi64"
+	Function _mm_cvtepi16_epi32:Int128(a:Int128)="_mm_cvtepi16_epi32"
+	Function _mm_cvtepi16_epi64:Int128(a:Int128)="_mm_cvtepi16_epi64"
+	Function _mm_cvtepi32_epi64:Int128(a:Int128)="_mm_cvtepi32_epi64"
+	Function _mm_cvtepi8_epi16:Int128(a:Int128)="_mm_cvtepi8_epi16"
+	Function _mm_cvtepi8_epi32:Int128(a:Int128)="_mm_cvtepi8_epi32"
+	Function _mm_cvtepi8_epi64:Int128(a:Int128)="_mm_cvtepi8_epi64"
+	Function _mm_cvtepu16_epi32:Int128(a:Int128)="_mm_cvtepu16_epi32"
+	Function _mm_cvtepu16_epi64:Int128(a:Int128)="_mm_cvtepu16_epi64"
+	Function _mm_cvtepu32_epi64:Int128(a:Int128)="_mm_cvtepu32_epi64"
+	Function _mm_cvtepu8_epi16:Int128(a:Int128)="_mm_cvtepu8_epi16"
+	Function _mm_cvtepu8_epi32:Int128(a:Int128)="_mm_cvtepu8_epi32"
+	Function _mm_cvtepu8_epi64:Int128(a:Int128)="_mm_cvtepu8_epi64"
+	Function _mm_dp_pd:Double128(a:Double128,b:Double128,imm8:Int)="_mm_dp_pd"
+	Function _mm_dp_ps:Float128(a:Float128,b:Float128,imm8:Int)="_mm_dp_ps"
+	Function _mm_extract_epi32:Int(a:Int128,imm8:Int)="_mm_extract_epi32"
+	Function _mm_extract_epi64:Long(a:Int128,imm8:Int)="_mm_extract_epi64"
+	Function _mm_extract_epi8:Int(a:Int128,imm8:Int)="_mm_extract_epi8"
+	Function _mm_extract_ps:Int(a:Float128,imm8:Int)="_mm_extract_ps"
+	Function _mm_floor_pd:Double128(a:Double128)="_mm_floor_pd"
+	Function _mm_floor_ps:Float128(a:Float128)="_mm_floor_ps"
+	Function _mm_floor_sd:Double128(a:Double128,b:Double128)="_mm_floor_sd"
+	Function _mm_floor_ss:Float128(a:Float128,b:Float128)="_mm_floor_ss"
+	Function _mm_insert_epi32:Int128(a:Int128,i:Int,imm8:Int)="_mm_insert_epi32"
+	Function _mm_insert_epi64:Int128(a:Int128,i:Long,imm8:Int)="_mm_insert_epi64"
+	Function _mm_insert_epi8:Int128(a:Int128,i:Int,imm8:Int)="_mm_insert_epi8"
+	Function _mm_insert_ps:Float128(a:Float128,b:Float128,imm8:Int)="_mm_insert_ps"
+	Function _mm_max_epi32:Int128(a:Int128,b:Int128)="_mm_max_epi32"
+	Function _mm_max_epi8:Int128(a:Int128,b:Int128)="_mm_max_epi8"
+	Function _mm_max_epu16:Int128(a:Int128,b:Int128)="_mm_max_epu16"
+	Function _mm_max_epu32:Int128(a:Int128,b:Int128)="_mm_max_epu32"
+	Function _mm_min_epi32:Int128(a:Int128,b:Int128)="_mm_min_epi32"
+	Function _mm_min_epi8:Int128(a:Int128,b:Int128)="_mm_min_epi8"
+	Function _mm_min_epu16:Int128(a:Int128,b:Int128)="_mm_min_epu16"
+	Function _mm_min_epu32:Int128(a:Int128,b:Int128)="_mm_min_epu32"
+	Function _mm_minpos_epu16:Int128(a:Int128)="_mm_minpos_epu16"
+	Function _mm_mpsadbw_epu8:Int128(a:Int128,b:Int128,imm8:Int)="_mm_mpsadbw_epu8"
+	Function _mm_mul_epi32:Int128(a:Int128,b:Int128)="_mm_mul_epi32"
+	Function _mm_mullo_epi32:Int128(a:Int128,b:Int128)="_mm_mullo_epi32"
+	Function _mm_packus_epi32:Int128(a:Int128,b:Int128)="_mm_packus_epi32"
+	Function _mm_round_pd:Double128(a:Double128,rounding:Int)="_mm_round_pd"
+	Function _mm_round_ps:Float128(a:Float128,rounding:Int)="_mm_round_ps"
+	Function _mm_round_sd:Double128(a:Double128,b:Double128,rounding:Int)="_mm_round_sd"
+	Function _mm_round_ss:Float128(a:Float128,b:Float128,rounding:Int)="_mm_round_ss"
+	Function _mm_stream_load_si128:Int128(mem_addr:Int128 Ptr)="_mm_stream_load_si128"
+	Function _mm_test_all_ones:Int(a:Int128)="_mm_test_all_ones"
+	Function _mm_test_all_zeros:Int(a:Int128,mask:Int128)="_mm_test_all_zeros"
+	Function _mm_test_mix_ones_zeros:Int(a:Int128,mask:Int128)="_mm_test_mix_ones_zeros"
+	Function _mm_testc_si128:Int(a:Int128,b:Int128)="_mm_testc_si128"
+	Function _mm_testnzc_si128:Int(a:Int128,b:Int128)="_mm_testnzc_si128"
+	Function _mm_testz_si128:Int(a:Int128,b:Int128)="_mm_testz_si128"
+EndExtern

+ 61 - 1
xmmintrin.mod/sse41.x

@@ -1 +1,61 @@
-__m128i _mm_stream_load_si128(__m128i* )!
+__m128i _mm_blend_epi16(__m128i ,__m128i ,int )!
+__m128d _mm_blend_pd(__m128d ,__m128d ,int )!
+__m128 _mm_blend_ps(__m128 ,__m128 ,int )!
+__m128i _mm_blendv_epi8(__m128i ,__m128i ,__m128i )!
+__m128d _mm_blendv_pd(__m128d ,__m128d ,__m128d )!
+__m128 _mm_blendv_ps(__m128 ,__m128 ,__m128 )!
+__m128d _mm_ceil_pd(__m128d )!
+__m128 _mm_ceil_ps(__m128 )!
+__m128d _mm_ceil_sd(__m128d ,__m128d )!
+__m128 _mm_ceil_ss(__m128 ,__m128 )!
+__m128i _mm_cmpeq_epi64(__m128i ,__m128i )!
+__m128i _mm_cvtepi16_epi32(__m128i )!
+__m128i _mm_cvtepi16_epi64(__m128i )!
+__m128i _mm_cvtepi32_epi64(__m128i )!
+__m128i _mm_cvtepi8_epi16(__m128i )!
+__m128i _mm_cvtepi8_epi32(__m128i )!
+__m128i _mm_cvtepi8_epi64(__m128i )!
+__m128i _mm_cvtepu16_epi32(__m128i )!
+__m128i _mm_cvtepu16_epi64(__m128i )!
+__m128i _mm_cvtepu32_epi64(__m128i )!
+__m128i _mm_cvtepu8_epi16(__m128i )!
+__m128i _mm_cvtepu8_epi32(__m128i )!
+__m128i _mm_cvtepu8_epi64(__m128i )!
+__m128d _mm_dp_pd(__m128d ,__m128d ,int )!
+__m128 _mm_dp_ps(__m128 ,__m128 ,int )!
+int _mm_extract_epi32(__m128i ,int )!
+__int64 _mm_extract_epi64(__m128i ,int )!
+int _mm_extract_epi8(__m128i ,int )!
+int _mm_extract_ps(__m128 ,int )!
+__m128d _mm_floor_pd(__m128d )!
+__m128 _mm_floor_ps(__m128 )!
+__m128d _mm_floor_sd(__m128d ,__m128d )!
+__m128 _mm_floor_ss(__m128 ,__m128 )!
+__m128i _mm_insert_epi32(__m128i ,int ,int )!
+__m128i _mm_insert_epi64(__m128i ,__int64 ,int )!
+__m128i _mm_insert_epi8(__m128i ,int ,int )!
+__m128 _mm_insert_ps(__m128 ,__m128 ,int )!
+__m128i _mm_max_epi32(__m128i ,__m128i )!
+__m128i _mm_max_epi8(__m128i ,__m128i )!
+__m128i _mm_max_epu16(__m128i ,__m128i )!
+__m128i _mm_max_epu32(__m128i ,__m128i )!
+__m128i _mm_min_epi32(__m128i ,__m128i )!
+__m128i _mm_min_epi8(__m128i ,__m128i )!
+__m128i _mm_min_epu16(__m128i ,__m128i )!
+__m128i _mm_min_epu32(__m128i ,__m128i )!
+__m128i _mm_minpos_epu16(__m128i )!
+__m128i _mm_mpsadbw_epu8(__m128i ,__m128i ,int )!
+__m128i _mm_mul_epi32(__m128i ,__m128i )!
+__m128i _mm_mullo_epi32(__m128i ,__m128i )!
+__m128i _mm_packus_epi32(__m128i ,__m128i )!
+__m128d _mm_round_pd(__m128d ,int )!
+__m128 _mm_round_ps(__m128 ,int )!
+__m128d _mm_round_sd(__m128d ,__m128d ,int )!
+__m128 _mm_round_ss(__m128 ,__m128 ,int )!
+__m128i _mm_stream_load_si128(__m128i* )!
+int _mm_test_all_ones(__m128i )!
+int _mm_test_all_zeros(__m128i ,__m128i )!
+int _mm_test_mix_ones_zeros(__m128i ,__m128i )!
+int _mm_testc_si128(__m128i ,__m128i )!
+int _mm_testnzc_si128(__m128i ,__m128i )!
+int _mm_testz_si128(__m128i ,__m128i )!

+ 38 - 20
xmmintrin.mod/sse42.bmx

@@ -1,20 +1,38 @@
-SuperStrict
-Extern
-	Function _mm_cmpestra:Int(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestra"
-	Function _mm_cmpestrc:Int(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestrc"
-	Function _mm_cmpestri:Int(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestri"
-	Function _mm_cmpestrm:Int128(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestrm"
-	Function _mm_cmpestro:Int(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestro"
-	Function _mm_cmpestrs:Int(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestrs"
-	Function _mm_cmpestrz:Int(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestrz"
-	Function _mm_cmpgt_epi64:Int128(a:Int128,b:Int128)="_mm_cmpgt_epi64"
-	Function _mm_cmpistra:Int(a:Int128,b:Int128,imm8:Int)="_mm_cmpistra"
-	Function _mm_cmpistrc:Int(a:Int128,b:Int128,imm8:Int)="_mm_cmpistrc"
-	Function _mm_cmpistri:Int(a:Int128,b:Int128,imm8:Int)="_mm_cmpistri"
-	Function _mm_cmpistrm:Int128(a:Int128,b:Int128,imm8:Int)="_mm_cmpistrm"
-	Function _mm_cmpistro:Int(a:Int128,b:Int128,imm8:Int)="_mm_cmpistro"
-	Function _mm_cmpistrs:Int(a:Int128,b:Int128,imm8:Int)="_mm_cmpistrs"
-	Function _mm_cmpistrz:Int(a:Int128,b:Int128,imm8:Int)="_mm_cmpistrz"
-	Function _mm_crc32_u32:UInt(crc:UInt,v:UInt)="_mm_crc32_u32"
-	Function _mm_crc32_u64:ULong(crc:ULong,v:ULong)="_mm_crc32_u64"
-EndExtern
+' Copyright (c) 2016 David JJ Camp
+' 
+' This software is provided 'as-is', without any express or implied
+' warranty. In no event will the authors be held liable for any damages
+' arising from the use of this software.
+'
+' Permission is granted to anyone to use this software for any purpose,
+' including commercial applications, and to alter it and redistribute it
+' freely, subject to the following restrictions:
+'
+' 1. The origin of this software must not be misrepresented; you must not
+'    claim that you wrote the original software. If you use this software
+'    in a product, an acknowledgement in the product documentation would be
+'    appreciated but is not required.
+' 2. Altered source versions must be plainly marked as such, and must not be
+'    misrepresented as being the original software.
+' 3. This notice may not be removed or altered from any source distribution.
+'
+SuperStrict
+Extern
+	Function _mm_cmpestra:Int(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestra"
+	Function _mm_cmpestrc:Int(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestrc"
+	Function _mm_cmpestri:Int(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestri"
+	Function _mm_cmpestrm:Int128(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestrm"
+	Function _mm_cmpestro:Int(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestro"
+	Function _mm_cmpestrs:Int(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestrs"
+	Function _mm_cmpestrz:Int(a:Int128,la:Int,b:Int128,lb:Int,imm8:Int)="_mm_cmpestrz"
+	Function _mm_cmpgt_epi64:Int128(a:Int128,b:Int128)="_mm_cmpgt_epi64"
+	Function _mm_cmpistra:Int(a:Int128,b:Int128,imm8:Int)="_mm_cmpistra"
+	Function _mm_cmpistrc:Int(a:Int128,b:Int128,imm8:Int)="_mm_cmpistrc"
+	Function _mm_cmpistri:Int(a:Int128,b:Int128,imm8:Int)="_mm_cmpistri"
+	Function _mm_cmpistrm:Int128(a:Int128,b:Int128,imm8:Int)="_mm_cmpistrm"
+	Function _mm_cmpistro:Int(a:Int128,b:Int128,imm8:Int)="_mm_cmpistro"
+	Function _mm_cmpistrs:Int(a:Int128,b:Int128,imm8:Int)="_mm_cmpistrs"
+	Function _mm_cmpistrz:Int(a:Int128,b:Int128,imm8:Int)="_mm_cmpistrz"
+	Function _mm_crc32_u32:UInt(crc:UInt,v:UInt)="_mm_crc32_u32"
+	Function _mm_crc32_u64:ULong(crc:ULong,v:ULong)="_mm_crc32_u64"
+EndExtern

+ 19 - 0
xmmintrin.mod/sse42.x

@@ -0,0 +1,19 @@
+int _mm_cmpestra(__m128i ,int ,__m128i ,int ,int )!
+int _mm_cmpestrc(__m128i ,int ,__m128i ,int ,int )!
+int _mm_cmpestri(__m128i ,int ,__m128i ,int ,int )!
+__m128i _mm_cmpestrm(__m128i ,int ,__m128i ,int ,int )!
+int _mm_cmpestro(__m128i ,int ,__m128i ,int ,int )!
+int _mm_cmpestrs(__m128i ,int ,__m128i ,int ,int )!
+int _mm_cmpestrz(__m128i ,int ,__m128i ,int ,int )!
+__m128i _mm_cmpgt_epi64(__m128i ,__m128i )!
+int _mm_cmpistra(__m128i ,__m128i ,int )!
+int _mm_cmpistrc(__m128i ,__m128i ,int )!
+int _mm_cmpistri(__m128i ,__m128i ,int )!
+__m128i _mm_cmpistrm(__m128i ,__m128i ,int )!
+int _mm_cmpistro(__m128i ,__m128i ,int )!
+int _mm_cmpistrs(__m128i ,__m128i ,int )!
+int _mm_cmpistrz(__m128i ,__m128i ,int )!
+unsigned int _mm_crc32_u16(unsigned int ,unsigned short )!
+unsigned int _mm_crc32_u32(unsigned int ,unsigned int )!
+unsigned __int64 _mm_crc32_u64(unsigned __int64 ,unsigned __int64 )!
+unsigned int _mm_crc32_u8(unsigned int ,unsigned char )!

+ 53 - 19
xmmintrin.mod/ssse3.bmx

@@ -1,19 +1,53 @@
-SuperStrict
-Extern
-	Function _mm_abs_epi16:Int128(a:Int128)="_mm_abs_epi16"
-	Function _mm_abs_epi32:Int128(a:Int128)="_mm_abs_epi32"
-	Function _mm_abs_epi8:Int128(a:Int128)="_mm_abs_epi8"
-	Function _mm_alignr_epi8:Int128(a:Int128,b:Int128,count:Int)="_mm_alignr_epi8"
-	Function _mm_hadd_epi16:Int128(a:Int128,b:Int128)="_mm_hadd_epi16"
-	Function _mm_hadd_epi32:Int128(a:Int128,b:Int128)="_mm_hadd_epi32"
-	Function _mm_hadds_epi16:Int128(a:Int128,b:Int128)="_mm_hadds_epi16"
-	Function _mm_hsub_epi16:Int128(a:Int128,b:Int128)="_mm_hsub_epi16"
-	Function _mm_hsub_epi32:Int128(a:Int128,b:Int128)="_mm_hsub_epi32"
-	Function _mm_hsubs_epi16:Int128(a:Int128,b:Int128)="_mm_hsubs_epi16"
-	Function _mm_maddubs_epi16:Int128(a:Int128,b:Int128)="_mm_maddubs_epi16"
-	Function _mm_mulhrs_epi16:Int128(a:Int128,b:Int128)="_mm_mulhrs_epi16"
-	Function _mm_shuffle_epi8:Int128(a:Int128,b:Int128)="_mm_shuffle_epi8"
-	Function _mm_sign_epi16:Int128(a:Int128,b:Int128)="_mm_sign_epi16"
-	Function _mm_sign_epi32:Int128(a:Int128,b:Int128)="_mm_sign_epi32"
-	Function _mm_sign_epi8:Int128(a:Int128,b:Int128)="_mm_sign_epi8"
-EndExtern
+' Copyright (c) 2016 David JJ Camp
+' 
+' This software is provided 'as-is', without any express or implied
+' warranty. In no event will the authors be held liable for any damages
+' arising from the use of this software.
+'
+' Permission is granted to anyone to use this software for any purpose,
+' including commercial applications, and to alter it and redistribute it
+' freely, subject to the following restrictions:
+'
+' 1. The origin of this software must not be misrepresented; you must not
+'    claim that you wrote the original software. If you use this software
+'    in a product, an acknowledgement in the product documentation would be
+'    appreciated but is not required.
+' 2. Altered source versions must be plainly marked as such, and must not be
+'    misrepresented as being the original software.
+' 3. This notice may not be removed or altered from any source distribution.
+'
+SuperStrict
+Extern
+	Function _mm_abs_epi16:Int128(a:Int128)="_mm_abs_epi16"
+	Function _mm_abs_epi32:Int128(a:Int128)="_mm_abs_epi32"
+	Function _mm_abs_epi8:Int128(a:Int128)="_mm_abs_epi8"
+	Function _mm_abs_pi16:Float64(a:Float64)="_mm_abs_pi16"
+	Function _mm_abs_pi32:Float64(a:Float64)="_mm_abs_pi32"
+	Function _mm_abs_pi8:Float64(a:Float64)="_mm_abs_pi8"
+	Function _mm_alignr_epi8:Int128(a:Int128,b:Int128,count:Int)="_mm_alignr_epi8"
+	Function _mm_alignr_pi8:Float64(a:Float64,b:Float64,count:Int)="_mm_alignr_pi8"
+	Function _mm_hadd_epi16:Int128(a:Int128,b:Int128)="_mm_hadd_epi16"
+	Function _mm_hadd_epi32:Int128(a:Int128,b:Int128)="_mm_hadd_epi32"
+	Function _mm_hadd_pi16:Float64(a:Float64,b:Float64)="_mm_hadd_pi16"
+	Function _mm_hadd_pi32:Float64(a:Float64,b:Float64)="_mm_hadd_pi32"
+	Function _mm_hadds_epi16:Int128(a:Int128,b:Int128)="_mm_hadds_epi16"
+	Function _mm_hadds_pi16:Float64(a:Float64,b:Float64)="_mm_hadds_pi16"
+	Function _mm_hsub_epi16:Int128(a:Int128,b:Int128)="_mm_hsub_epi16"
+	Function _mm_hsub_epi32:Int128(a:Int128,b:Int128)="_mm_hsub_epi32"
+	Function _mm_hsub_pi16:Float64(a:Float64,b:Float64)="_mm_hsub_pi16"
+	Function _mm_hsub_pi32:Float64(a:Float64,b:Float64)="_mm_hsub_pi32"
+	Function _mm_hsubs_epi16:Int128(a:Int128,b:Int128)="_mm_hsubs_epi16"
+	Function _mm_hsubs_pi16:Float64(a:Float64,b:Float64)="_mm_hsubs_pi16"
+	Function _mm_maddubs_epi16:Int128(a:Int128,b:Int128)="_mm_maddubs_epi16"
+	Function _mm_maddubs_pi16:Float64(a:Float64,b:Float64)="_mm_maddubs_pi16"
+	Function _mm_mulhrs_epi16:Int128(a:Int128,b:Int128)="_mm_mulhrs_epi16"
+	Function _mm_mulhrs_pi16:Float64(a:Float64,b:Float64)="_mm_mulhrs_pi16"
+	Function _mm_shuffle_epi8:Int128(a:Int128,b:Int128)="_mm_shuffle_epi8"
+	Function _mm_shuffle_pi8:Float64(a:Float64,b:Float64)="_mm_shuffle_pi8"
+	Function _mm_sign_epi16:Int128(a:Int128,b:Int128)="_mm_sign_epi16"
+	Function _mm_sign_epi32:Int128(a:Int128,b:Int128)="_mm_sign_epi32"
+	Function _mm_sign_epi8:Int128(a:Int128,b:Int128)="_mm_sign_epi8"
+	Function _mm_sign_pi16:Float64(a:Float64,b:Float64)="_mm_sign_pi16"
+	Function _mm_sign_pi32:Float64(a:Float64,b:Float64)="_mm_sign_pi32"
+	Function _mm_sign_pi8:Float64(a:Float64,b:Float64)="_mm_sign_pi8"
+EndExtern

+ 32 - 0
xmmintrin.mod/ssse3.x

@@ -0,0 +1,32 @@
+__m128i _mm_abs_epi16(__m128i )!
+__m128i _mm_abs_epi32(__m128i )!
+__m128i _mm_abs_epi8(__m128i )!
+__m64 _mm_abs_pi16(__m64 )!
+__m64 _mm_abs_pi32(__m64 )!
+__m64 _mm_abs_pi8(__m64 )!
+__m128i _mm_alignr_epi8(__m128i ,__m128i ,int )!
+__m64 _mm_alignr_pi8(__m64 ,__m64 ,int )!
+__m128i _mm_hadd_epi16(__m128i ,__m128i )!
+__m128i _mm_hadd_epi32(__m128i ,__m128i )!
+__m64 _mm_hadd_pi16(__m64 ,__m64 )!
+__m64 _mm_hadd_pi32(__m64 ,__m64 )!
+__m128i _mm_hadds_epi16(__m128i ,__m128i )!
+__m64 _mm_hadds_pi16(__m64 ,__m64 )!
+__m128i _mm_hsub_epi16(__m128i ,__m128i )!
+__m128i _mm_hsub_epi32(__m128i ,__m128i )!
+__m64 _mm_hsub_pi16(__m64 ,__m64 )!
+__m64 _mm_hsub_pi32(__m64 ,__m64 )!
+__m128i _mm_hsubs_epi16(__m128i ,__m128i )!
+__m64 _mm_hsubs_pi16(__m64 ,__m64 )!
+__m128i _mm_maddubs_epi16(__m128i ,__m128i )!
+__m64 _mm_maddubs_pi16(__m64 ,__m64 )!
+__m128i _mm_mulhrs_epi16(__m128i ,__m128i )!
+__m64 _mm_mulhrs_pi16(__m64 ,__m64 )!
+__m128i _mm_shuffle_epi8(__m128i ,__m128i )!
+__m64 _mm_shuffle_pi8(__m64 ,__m64 )!
+__m128i _mm_sign_epi16(__m128i ,__m128i )!
+__m128i _mm_sign_epi32(__m128i ,__m128i )!
+__m128i _mm_sign_epi8(__m128i ,__m128i )!
+__m64 _mm_sign_pi16(__m64 ,__m64 )!
+__m64 _mm_sign_pi32(__m64 ,__m64 )!
+__m64 _mm_sign_pi8(__m64 ,__m64 )!

+ 375 - 0
xmmintrin.mod/tools/generate.bmx

@@ -0,0 +1,375 @@
+' Copyright (c) 2016 David JJ Camp
+' 
+' This software is provided 'as-is', without any express or implied
+' warranty. In no event will the authors be held liable for any damages
+' arising from the use of this software.
+'
+' Permission is granted to anyone to use this software for any purpose,
+' including commercial applications, and to alter it and redistribute it
+' freely, subject to the following restrictions:
+'
+' 1. The origin of this software must not be misrepresented; you must not
+'    claim that you wrote the original software. If you use this software
+'    in a product, an acknowledgement in the product documentation would be
+'    appreciated but is not required.
+' 2. Altered source versions must be plainly marked as such, and must not be
+'    misrepresented as being the original software.
+' 3. This notice may not be removed or altered from any source distribution.
+'
+
+' This little utility takes the sse*.txt files and creates NG compatible versions
+
+SuperStrict
+
+Framework brl.standardio
+Import brl.filesystem
+
+Local SSE_VERSIONS:String[] = [ "sse","sse2","sse3","ssse3","sse41","sse42" ]
+Local modulefile:String = "../xmmintrin.bmx"
+
+
+
+BackupFile(modulefile)
+
+Local headerdata:String  = "' Copyright (c) 2016 David JJ Camp~n"
+headerdata :+ "' ~n"
+headerdata :+ "' This software is provided 'as-is', without any express or implied~n"
+headerdata :+ "' warranty. In no event will the authors be held liable for any damages~n"
+headerdata :+ "' arising from the use of this software.~n"
+headerdata :+ "'~n"
+headerdata :+ "' Permission is granted to anyone to use this software for any purpose,~n"
+headerdata :+ "' including commercial applications, and to alter it and redistribute it~n"
+headerdata :+ "' freely, subject to the following restrictions:~n"
+headerdata :+ "'~n"
+headerdata :+ "' 1. The origin of this software must not be misrepresented; you must not~n"
+headerdata :+ "'    claim that you wrote the original software. If you use this software~n"
+headerdata :+ "'    in a product, an acknowledgement in the product documentation would be~n"
+headerdata :+ "'    appreciated but is not required.~n"
+headerdata :+ "' 2. Altered source versions must be plainly marked as such, and must not be~n"
+headerdata :+ "'    misrepresented as being the original software.~n"
+headerdata :+ "' 3. This notice may not be removed or altered from any source distribution.~n"
+headerdata :+ "'"
+
+
+Local moduledata$ = headerdata + "~n"
+moduledata :+ "SuperStrict~n~n"
+moduledata :+ "Rem~n"
+moduledata :+ "bbdoc: SIMD intrinsics for x64.~n"
+moduledata :+ "End Rem~n"
+moduledata :+ "Module pub.xmmintrin~n~n"
+moduledata :+ "ModuleInfo ~qVersion: 1.00~q~n"
+moduledata :+ "ModuleInfo ~qAuthor: David JJ Camp~q~n"
+moduledata :+ "ModuleInfo ~qLicense: zlib/libpng~q~n"
+moduledata :+ "ModuleInfo ~qCopyright: David JJ Camp~q~n~n"
+moduledata :+ "?x64~n"
+
+For Local version$ = EachIn SSE_VERSIONS
+	Local file:String = version + ".bmx"
+	Local path:String = "../" + file
+	Local xfile:String = "../" + version + ".x"
+
+	BackupFile(path)
+	
+	If FileType(version+".txt")
+		Local parser:tparser = New tparser(LoadText(version+".txt"))
+		
+		Local outstream:TStream = WriteFile(path)
+		Local xstream:TStream = WriteFile(xfile)
+		
+		WriteLine(outstream, headerdata)
+		WriteLine(outstream,"SuperStrict")
+		WriteLine(outstream,"Extern") 
+		parser.parse(outstream,xstream)
+		WriteLine(outstream,"EndExtern")
+
+		CloseFile outstream
+		CloseFile xstream
+		
+		If FileType(path)
+			moduledata :+ "Import ~q" + file + "~q"
+?win32
+			moduledata :+ "~r~n"
+?Not win32
+			moduledata :+ "~n"
+?
+		EndIf
+
+	EndIf
+Next
+
+moduledata :+ "?~n"
+
+SaveText moduledata,modulefile
+
+Type tparser
+	Field _data$
+	Field _length:Int
+	Field _posa:Int,_posb:Int
+	Field _token$
+	
+	Field _xfunc$
+	Field _xpos:Int
+
+	Field _out$
+	
+	Method New(data$)
+		_data = data
+		_length = data.length
+		_out = "~t"
+
+		NextToken()
+	EndMethod
+	
+	Method Parse(outfile:TStream,xfile:TStream)
+		While _posb < _length
+			If _token = "'"
+				SkipComment()
+				NextToken
+				Continue
+			
+			ElseIf IsEol()
+				SkipEol()
+				NextToken
+				Continue
+
+			Else
+				' take a chance on all macros beginning with '_MM_
+				If _token[..4] = "_MM_"
+					ParseMacro()
+				Else
+					ParseFunction()
+					FilterOutputToXFile(xfile)
+					FilterOutputToFile(outfile)
+					_out = "~t"
+					_xfunc = ""
+				EndIf
+
+			EndIf
+		Wend
+	EndMethod
+	
+	Method FilterOutputToXFile(xfile:TStream)
+		WriteLine xfile,_xfunc
+	EndMethod
+	
+	Method FilterOutputToFile(outfile:TStream)
+		If _out.contains("UShort")
+		ElseIf _out.contains("UByte")
+		Else
+			WriteLine outfile,_out
+		EndIf
+	EndMethod
+	
+	Method ParseMacro()
+		' what to do here? skip for the time being
+		While Not IsEol() And _posb < _length
+			_posb :+ 1
+		Wend
+		_posa = _posb
+	EndMethod
+	
+	Method ParseFunction()
+		Local rettype$
+		Local funcname$
+		
+		rettype = ParseType()
+		
+		Assert(_token.length)
+		funcname = _token
+		_xfunc :+ funcname
+
+		Emit("Function ")
+		Emit(_token)
+
+		If rettype
+			Emit(":")
+			Emit(rettype)
+		EndIf
+	
+		NextToken
+		Assert(_token = "(")
+		Emit("(")
+		_xfunc :+ "("
+		
+		nexttoken
+
+		Local param$
+		If _token <> ")"
+			param = ParseParameter()
+			Emit(param)
+
+			While _token <> ")"
+				Assert(_token = ",")
+				Emit(",")
+				_xfunc :+ ","
+				NextToken
+
+				Local param$ = ParseParameter()
+				Emit(param)
+			Wend
+		EndIf
+	
+		Emit(")")
+		_xfunc :+ ")!"
+		Emit("=~q" + funcname + "~q")
+
+		NextToken
+	EndMethod
+	
+	Method ParseType$()
+		_xpos = _posa
+		Local t$
+		Select _token
+		Case "__m64"
+			t = "Float64"
+				
+		Case "__m128"
+			t = "Float128"
+			
+		Case "__m128i"
+			t = "Int128"
+			
+		Case "__m128d"
+			t = "Double128"
+				
+		Case "__int64"
+			t = "Long"
+
+		Case "void"
+			t = "void"
+			
+		Case "char"
+			t = "Byte"
+			
+		Case "short"
+			t = "Short"
+
+		Case "int"
+			t = "Int"
+
+		Case "float"
+			t = "Float"
+
+		Case "double"
+			t = "Double"
+		
+		Case "unsigned"
+			_xfunc :+ "unsigned "
+			nexttoken
+			t = "U" + ParseType()
+			Return t
+			
+		Case "const"
+			nexttoken
+			t :+ ParseType()
+			Return t
+			
+		Default
+			DebugStop
+		EndSelect
+
+		_xfunc :+ _data[_xpos.._posb]
+		_xpos = _posb
+		
+		nexttoken
+		If _token = "const" nexttoken			
+		If _token = "*"
+			_xfunc :+ _token
+
+			If t = "void" t = "Byte"
+			t :+ " Ptr"
+			nexttoken
+		EndIf
+		If t = "void" t = ""
+
+		_xfunc :+ " "
+		
+		Return t
+	EndMethod
+	
+	Method ParseParameter$()
+		Local t$ = ParseType()		
+		If t = "" And ( _token = "," Or _token = ")" ) Return ""
+		
+		Assert(_token.length)
+		Local name$ = _token
+		nexttoken
+		
+		Return name + ":" + t
+	EndMethod
+	
+	Method Emit(s$)
+		_out :+ s
+	EndMethod
+
+	Method SkipWhite()
+		While IsWhite() And _posb < _length
+			_posb :+ 1
+		Wend
+		_posa = _posb
+	EndMethod
+
+	Method SkipComment()
+		While Not IsEol() And _posb < _length
+			_posb :+ 1
+		Wend
+		'_token = _data[_posa.._posb]
+	EndMethod
+	
+	Method SkipEol()
+		While IsEol() And _posb < _length
+			_posb :+ 1
+		Wend
+	EndMethod
+
+	' lexer
+	Method NextToken()
+		_token = ""
+		_posa = _posb
+
+		SkipWhite()
+		
+		If IsAlpha()
+			MakeWord()
+		Else
+			_token = _data[_posa.._posb+1]
+			_posb :+ 1
+		EndIf
+	EndMethod
+	
+	Method MakeWord()
+		_posb :+ 1
+		While IsAlpha() Or IsNumeric()
+			_posb :+ 1
+		Wend
+		_token = _data[_posa.._posb]
+	EndMethod
+	
+	Method IsWhite:Int()
+		Return _data[_posb] <= 32
+	EndMethod
+
+	Method IsEol:Int()
+		Return _data[_posb] = 10 Or _data[_posb] = 13
+	EndMethod
+	
+	Method IsComment:Int()
+		Return _data[_posb] = 39
+	EndMethod
+	
+	Method IsNumeric:Int()
+		Return _data[_posb] >= 48 And _data[_posb] <= 57
+	EndMethod
+	
+	Method IsAlpha:Int()
+		Return ( _data[_posb] >= 65 And _data[_posb] <= 90 ) Or ( _data[_posb] >= 97 And _data[_posb] ) Or _data[_posb] = 95
+	EndMethod
+EndType
+
+Function BackupFile(filename$)
+	If FileType(filename)
+		Local data$ = LoadText(filename)
+		Local backup$ = filename+".bak"
+		SaveText data,backup
+	EndIf
+EndFunction

+ 152 - 0
xmmintrin.mod/tools/sse.txt

@@ -0,0 +1,152 @@
+' https://software.intel.com/sites/landingpage/IntrinsicsGuide/#techs=SSE
+__m128 _mm_add_ps (__m128 a, __m128 b)
+__m128 _mm_add_ss (__m128 a, __m128 b)
+__m128 _mm_and_ps (__m128 a, __m128 b)
+__m128 _mm_andnot_ps (__m128 a, __m128 b)
+__m64 _mm_avg_pu16 (__m64 a, __m64 b)
+__m64 _mm_avg_pu8 (__m64 a, __m64 b)
+__m128 _mm_cmpeq_ps (__m128 a, __m128 b)
+__m128 _mm_cmpeq_ss (__m128 a, __m128 b)
+__m128 _mm_cmpge_ps (__m128 a, __m128 b)
+__m128 _mm_cmpge_ss (__m128 a, __m128 b)
+__m128 _mm_cmpgt_ps (__m128 a, __m128 b)
+__m128 _mm_cmpgt_ss (__m128 a, __m128 b)
+__m128 _mm_cmple_ps (__m128 a, __m128 b)
+__m128 _mm_cmple_ss (__m128 a, __m128 b)
+__m128 _mm_cmplt_ps (__m128 a, __m128 b)
+__m128 _mm_cmplt_ss (__m128 a, __m128 b)
+__m128 _mm_cmpneq_ps (__m128 a, __m128 b)
+__m128 _mm_cmpneq_ss (__m128 a, __m128 b)
+__m128 _mm_cmpnge_ps (__m128 a, __m128 b)
+__m128 _mm_cmpnge_ss (__m128 a, __m128 b)
+__m128 _mm_cmpngt_ps (__m128 a, __m128 b)
+__m128 _mm_cmpngt_ss (__m128 a, __m128 b)
+__m128 _mm_cmpnle_ps (__m128 a, __m128 b)
+__m128 _mm_cmpnle_ss (__m128 a, __m128 b)
+__m128 _mm_cmpnlt_ps (__m128 a, __m128 b)
+__m128 _mm_cmpnlt_ss (__m128 a, __m128 b)
+__m128 _mm_cmpord_ps (__m128 a, __m128 b)
+__m128 _mm_cmpord_ss (__m128 a, __m128 b)
+__m128 _mm_cmpunord_ps (__m128 a, __m128 b)
+__m128 _mm_cmpunord_ss (__m128 a, __m128 b)
+int _mm_comieq_ss (__m128 a, __m128 b)
+int _mm_comige_ss (__m128 a, __m128 b)
+int _mm_comigt_ss (__m128 a, __m128 b)
+int _mm_comile_ss (__m128 a, __m128 b)
+int _mm_comilt_ss (__m128 a, __m128 b)
+int _mm_comineq_ss (__m128 a, __m128 b)
+__m128 _mm_cvt_pi2ps (__m128 a, __m64 b)
+__m64 _mm_cvt_ps2pi (__m128 a)
+__m128 _mm_cvt_si2ss (__m128 a, int b)
+int _mm_cvt_ss2si (__m128 a)
+__m128 _mm_cvtpi16_ps (__m64 a)
+__m128 _mm_cvtpi32_ps (__m128 a, __m64 b)
+__m128 _mm_cvtpi32x2_ps (__m64 a, __m64 b)
+__m128 _mm_cvtpi8_ps (__m64 a)
+__m64 _mm_cvtps_pi16 (__m128 a)
+__m64 _mm_cvtps_pi32 (__m128 a)
+__m64 _mm_cvtps_pi8 (__m128 a)
+__m128 _mm_cvtpu16_ps (__m64 a)
+__m128 _mm_cvtpu8_ps (__m64 a)
+__m128 _mm_cvtsi32_ss (__m128 a, int b)
+__m128 _mm_cvtsi64_ss (__m128 a, __int64 b)
+float _mm_cvtss_f32 (__m128 a)
+int _mm_cvtss_si32 (__m128 a)
+__int64 _mm_cvtss_si64 (__m128 a)
+__m64 _mm_cvtt_ps2pi (__m128 a)
+int _mm_cvtt_ss2si (__m128 a)
+__m64 _mm_cvttps_pi32 (__m128 a)
+int _mm_cvttss_si32 (__m128 a)
+__int64 _mm_cvttss_si64 (__m128 a)
+__m128 _mm_div_ps (__m128 a, __m128 b)
+__m128 _mm_div_ss (__m128 a, __m128 b)
+int _mm_extract_pi16 (__m64 a, int imm8)
+unsigned int _MM_GET_EXCEPTION_MASK ()
+unsigned int _MM_GET_EXCEPTION_STATE ()
+unsigned int _MM_GET_FLUSH_ZERO_MODE ()
+unsigned int _MM_GET_ROUNDING_MODE ()
+unsigned int _mm_getcsr (void)
+__m64 _mm_insert_pi16 (__m64 a, int i, int imm8)
+__m128 _mm_load_ps (float const* mem_addr)
+__m128 _mm_load_ps1 (float const* mem_addr)
+__m128 _mm_load_ss (float const* mem_addr)
+__m128 _mm_load1_ps (float const* mem_addr)
+__m128 _mm_loadh_pi (__m128 a, __m64 const* mem_addr)
+__m128 _mm_loadl_pi (__m128 a, __m64 const* mem_addr)
+__m128 _mm_loadr_ps (float const* mem_addr)
+__m128 _mm_loadu_ps (float const* mem_addr)
+void _mm_maskmove_si64 (__m64 a, __m64 mask, char* mem_addr)
+void _m_maskmovq (__m64 a, __m64 mask, char* mem_addr)
+__m64 _mm_max_pi16 (__m64 a, __m64 b)
+__m128 _mm_max_ps (__m128 a, __m128 b)
+__m64 _mm_max_pu8 (__m64 a, __m64 b)
+__m128 _mm_max_ss (__m128 a, __m128 b)
+__m64 _mm_min_pi16 (__m64 a, __m64 b)
+__m128 _mm_min_ps (__m128 a, __m128 b)
+__m64 _mm_min_pu8 (__m64 a, __m64 b)
+__m128 _mm_min_ss (__m128 a, __m128 b)
+__m128 _mm_move_ss (__m128 a, __m128 b)
+__m128 _mm_movehl_ps (__m128 a, __m128 b)
+__m128 _mm_movelh_ps (__m128 a, __m128 b)
+int _mm_movemask_pi8 (__m64 a)
+int _mm_movemask_ps (__m128 a)
+__m128 _mm_mul_ps (__m128 a, __m128 b)
+__m128 _mm_mul_ss (__m128 a, __m128 b)
+__m64 _mm_mulhi_pu16 (__m64 a, __m64 b)
+__m128 _mm_or_ps (__m128 a, __m128 b)
+__m64 _m_pavgb (__m64 a, __m64 b)
+__m64 _m_pavgw (__m64 a, __m64 b)
+int _m_pextrw (__m64 a, int imm8)
+__m64 _m_pinsrw (__m64 a, int i, int imm8)
+__m64 _m_pmaxsw (__m64 a, __m64 b)
+__m64 _m_pmaxub (__m64 a, __m64 b)
+__m64 _m_pminsw (__m64 a, __m64 b)
+__m64 _m_pminub (__m64 a, __m64 b)
+int _m_pmovmskb (__m64 a)
+__m64 _m_pmulhuw (__m64 a, __m64 b)
+void _mm_prefetch (char const* p, int i)
+__m64 _m_psadbw (__m64 a, __m64 b)
+__m64 _m_pshufw (__m64 a, int imm8)
+__m128 _mm_rcp_ps (__m128 a)
+__m128 _mm_rcp_ss (__m128 a)
+__m128 _mm_rsqrt_ps (__m128 a)
+__m128 _mm_rsqrt_ss (__m128 a)
+__m64 _mm_sad_pu8 (__m64 a, __m64 b)
+void _MM_SET_EXCEPTION_MASK (unsigned int a)
+void _MM_SET_EXCEPTION_STATE (unsigned int a)
+void _MM_SET_FLUSH_ZERO_MODE (unsigned int a)
+__m128 _mm_set_ps (float e3, float e2, float e1, float e0)
+__m128 _mm_set_ps1 (float a)
+void _MM_SET_ROUNDING_MODE (unsigned int a)
+__m128 _mm_set_ss (float a)
+__m128 _mm_set1_ps (float a)
+void _mm_setcsr (unsigned int a)
+__m128 _mm_setr_ps (float e3, float e2, float e1, float e0)
+__m128 _mm_setzero_ps (void)
+void _mm_sfence (void)
+__m64 _mm_shuffle_pi16 (__m64 a, int imm8)
+__m128 _mm_shuffle_ps (__m128 a, __m128 b, unsigned int imm8)
+__m128 _mm_sqrt_ps (__m128 a)
+__m128 _mm_sqrt_ss (__m128 a)
+void _mm_store_ps (float* mem_addr, __m128 a)
+void _mm_store_ps1 (float* mem_addr, __m128 a)
+void _mm_store_ss (float* mem_addr, __m128 a)
+void _mm_store1_ps (float* mem_addr, __m128 a)
+void _mm_storeh_pi (__m64* mem_addr, __m128 a)
+void _mm_storel_pi (__m64* mem_addr, __m128 a)
+void _mm_storer_ps (float* mem_addr, __m128 a)
+void _mm_storeu_ps (float* mem_addr, __m128 a)
+void _mm_stream_pi (__m64* mem_addr, __m64 a)
+void _mm_stream_ps (float* mem_addr, __m128 a)
+__m128 _mm_sub_ps (__m128 a, __m128 b)
+__m128 _mm_sub_ss (__m128 a, __m128 b)
+_MM_TRANSPOSE4_PS (__m128 row0, __m128 row1, __m128 row2, __m128 row3)
+int _mm_ucomieq_ss (__m128 a, __m128 b)
+int _mm_ucomige_ss (__m128 a, __m128 b)
+int _mm_ucomigt_ss (__m128 a, __m128 b)
+int _mm_ucomile_ss (__m128 a, __m128 b)
+int _mm_ucomilt_ss (__m128 a, __m128 b)
+int _mm_ucomineq_ss (__m128 a, __m128 b)
+__m128 _mm_unpackhi_ps (__m128 a, __m128 b)
+__m128 _mm_unpacklo_ps (__m128 a, __m128 b)
+__m128 _mm_xor_ps (__m128 a, __m128 b)

+ 231 - 0
xmmintrin.mod/tools/sse2.txt

@@ -0,0 +1,231 @@
+' https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1356,107&techs=SSE2
+__m128i _mm_add_epi16 (__m128i a, __m128i b)
+__m128i _mm_add_epi32 (__m128i a, __m128i b)
+__m128i _mm_add_epi64 (__m128i a, __m128i b)
+__m128i _mm_add_epi8 (__m128i a, __m128i b)
+__m128d _mm_add_pd (__m128d a, __m128d b)
+__m128d _mm_add_sd (__m128d a, __m128d b)
+__m64 _mm_add_si64 (__m64 a, __m64 b)
+__m128i _mm_adds_epi16 (__m128i a, __m128i b)
+__m128i _mm_adds_epi8 (__m128i a, __m128i b)
+__m128i _mm_adds_epu16 (__m128i a, __m128i b)
+__m128i _mm_adds_epu8 (__m128i a, __m128i b)
+__m128d _mm_and_pd (__m128d a, __m128d b)
+__m128i _mm_and_si128 (__m128i a, __m128i b)
+__m128d _mm_andnot_pd (__m128d a, __m128d b)
+__m128i _mm_andnot_si128 (__m128i a, __m128i b)
+__m128i _mm_avg_epu16 (__m128i a, __m128i b)
+__m128i _mm_avg_epu8 (__m128i a, __m128i b)
+__m128i _mm_bslli_si128 (__m128i a, int imm8)
+__m128i _mm_bsrli_si128 (__m128i a, int imm8)
+__m128 _mm_castpd_ps (__m128d a)
+__m128i _mm_castpd_si128 (__m128d a)
+__m128d _mm_castps_pd (__m128 a)
+__m128i _mm_castps_si128 (__m128 a)
+__m128d _mm_castsi128_pd (__m128i a)
+__m128 _mm_castsi128_ps (__m128i a)
+void _mm_clflush (void const* p)
+__m128i _mm_cmpeq_epi16 (__m128i a, __m128i b)
+__m128i _mm_cmpeq_epi32 (__m128i a, __m128i b)
+__m128i _mm_cmpeq_epi8 (__m128i a, __m128i b)
+__m128d _mm_cmpeq_pd (__m128d a, __m128d b)
+__m128d _mm_cmpeq_sd (__m128d a, __m128d b)
+__m128d _mm_cmpge_pd (__m128d a, __m128d b)
+__m128d _mm_cmpge_sd (__m128d a, __m128d b)
+__m128i _mm_cmpgt_epi16 (__m128i a, __m128i b)
+__m128i _mm_cmpgt_epi32 (__m128i a, __m128i b)
+__m128i _mm_cmpgt_epi8 (__m128i a, __m128i b)
+__m128d _mm_cmpgt_pd (__m128d a, __m128d b)
+__m128d _mm_cmpgt_sd (__m128d a, __m128d b)
+__m128d _mm_cmple_pd (__m128d a, __m128d b)
+__m128d _mm_cmple_sd (__m128d a, __m128d b)
+__m128i _mm_cmplt_epi16 (__m128i a, __m128i b)
+__m128i _mm_cmplt_epi32 (__m128i a, __m128i b)
+__m128i _mm_cmplt_epi8 (__m128i a, __m128i b)
+__m128d _mm_cmplt_pd (__m128d a, __m128d b)
+__m128d _mm_cmplt_sd (__m128d a, __m128d b)
+__m128d _mm_cmpneq_pd (__m128d a, __m128d b)
+__m128d _mm_cmpneq_sd (__m128d a, __m128d b)
+__m128d _mm_cmpnge_pd (__m128d a, __m128d b)
+__m128d _mm_cmpnge_sd (__m128d a, __m128d b)
+__m128d _mm_cmpngt_pd (__m128d a, __m128d b)
+__m128d _mm_cmpngt_sd (__m128d a, __m128d b)
+__m128d _mm_cmpnle_pd (__m128d a, __m128d b)
+__m128d _mm_cmpnle_sd (__m128d a, __m128d b)
+__m128d _mm_cmpnlt_pd (__m128d a, __m128d b)
+__m128d _mm_cmpnlt_sd (__m128d a, __m128d b)
+__m128d _mm_cmpord_pd (__m128d a, __m128d b)
+__m128d _mm_cmpord_sd (__m128d a, __m128d b)
+__m128d _mm_cmpunord_pd (__m128d a, __m128d b)
+__m128d _mm_cmpunord_sd (__m128d a, __m128d b)
+int _mm_comieq_sd (__m128d a, __m128d b)
+int _mm_comige_sd (__m128d a, __m128d b)
+int _mm_comigt_sd (__m128d a, __m128d b)
+int _mm_comile_sd (__m128d a, __m128d b)
+int _mm_comilt_sd (__m128d a, __m128d b)
+int _mm_comineq_sd (__m128d a, __m128d b)
+__m128d _mm_cvtepi32_pd (__m128i a)
+__m128 _mm_cvtepi32_ps (__m128i a)
+__m128i _mm_cvtpd_epi32 (__m128d a)
+__m64 _mm_cvtpd_pi32 (__m128d a)
+__m128 _mm_cvtpd_ps (__m128d a)
+__m128d _mm_cvtpi32_pd (__m64 a)
+__m128i _mm_cvtps_epi32 (__m128 a)
+__m128d _mm_cvtps_pd (__m128 a)
+double _mm_cvtsd_f64 (__m128d a)
+int _mm_cvtsd_si32 (__m128d a)
+__int64 _mm_cvtsd_si64 (__m128d a)
+__int64 _mm_cvtsd_si64x (__m128d a)
+__m128 _mm_cvtsd_ss (__m128 a, __m128d b)
+int _mm_cvtsi128_si32 (__m128i a)
+__int64 _mm_cvtsi128_si64 (__m128i a)
+__int64 _mm_cvtsi128_si64x (__m128i a)
+__m128d _mm_cvtsi32_sd (__m128d a, int b)
+__m128i _mm_cvtsi32_si128 (int a)
+__m128d _mm_cvtsi64_sd (__m128d a, __int64 b)
+__m128i _mm_cvtsi64_si128 (__int64 a)
+__m128d _mm_cvtsi64x_sd (__m128d a, __int64 b)
+__m128i _mm_cvtsi64x_si128 (__int64 a)
+__m128d _mm_cvtss_sd (__m128d a, __m128 b)
+__m128i _mm_cvttpd_epi32 (__m128d a)
+__m64 _mm_cvttpd_pi32 (__m128d a)
+__m128i _mm_cvttps_epi32 (__m128 a)
+int _mm_cvttsd_si32 (__m128d a)
+__int64 _mm_cvttsd_si64 (__m128d a)
+__int64 _mm_cvttsd_si64x (__m128d a)
+__m128d _mm_div_pd (__m128d a, __m128d b)
+__m128d _mm_div_sd (__m128d a, __m128d b)
+int _mm_extract_epi16 (__m128i a, int imm8)
+__m128i _mm_insert_epi16 (__m128i a, int i, int imm8)
+void _mm_lfence (void)
+__m128d _mm_load_pd (double const* mem_addr)
+__m128d _mm_load_pd1 (double const* mem_addr)
+__m128d _mm_load_sd (double const* mem_addr)
+__m128i _mm_load_si128 (__m128i const* mem_addr)
+__m128d _mm_load1_pd (double const* mem_addr)
+__m128d _mm_loadh_pd (__m128d a, double const* mem_addr)
+__m128i _mm_loadl_epi64 (__m128i const* mem_addr)
+__m128d _mm_loadl_pd (__m128d a, double const* mem_addr)
+__m128d _mm_loadr_pd (double const* mem_addr)
+__m128d _mm_loadu_pd (double const* mem_addr)
+__m128i _mm_loadu_si128 (__m128i const* mem_addr)
+__m128i _mm_madd_epi16 (__m128i a, __m128i b)
+void _mm_maskmoveu_si128 (__m128i a, __m128i mask, char* mem_addr)
+__m128i _mm_max_epi16 (__m128i a, __m128i b)
+__m128i _mm_max_epu8 (__m128i a, __m128i b)
+__m128d _mm_max_pd (__m128d a, __m128d b)
+__m128d _mm_max_sd (__m128d a, __m128d b)
+void _mm_mfence (void)
+__m128i _mm_min_epi16 (__m128i a, __m128i b)
+__m128i _mm_min_epu8 (__m128i a, __m128i b)
+__m128d _mm_min_pd (__m128d a, __m128d b)
+__m128d _mm_min_sd (__m128d a, __m128d b)
+__m128i _mm_move_epi64 (__m128i a)
+__m128d _mm_move_sd (__m128d a, __m128d b)
+int _mm_movemask_epi8 (__m128i a)
+int _mm_movemask_pd (__m128d a)
+__m64 _mm_movepi64_pi64 (__m128i a)
+__m128i _mm_movpi64_epi64 (__m64 a)
+__m128i _mm_mul_epu32 (__m128i a, __m128i b)
+__m128d _mm_mul_pd (__m128d a, __m128d b)
+__m128d _mm_mul_sd (__m128d a, __m128d b)
+__m64 _mm_mul_su32 (__m64 a, __m64 b)
+__m128i _mm_mulhi_epi16 (__m128i a, __m128i b)
+__m128i _mm_mulhi_epu16 (__m128i a, __m128i b)
+__m128i _mm_mullo_epi16 (__m128i a, __m128i b)
+__m128d _mm_or_pd (__m128d a, __m128d b)
+__m128i _mm_or_si128 (__m128i a, __m128i b)
+__m128i _mm_packs_epi16 (__m128i a, __m128i b)
+__m128i _mm_packs_epi32 (__m128i a, __m128i b)
+__m128i _mm_packus_epi16 (__m128i a, __m128i b)
+void _mm_pause (void)
+__m128i _mm_sad_epu8 (__m128i a, __m128i b)
+__m128i _mm_set_epi16 (short e7, short e6, short e5, short e4, short e3, short e2, short e1, short e0)
+__m128i _mm_set_epi32 (int e3, int e2, int e1, int e0)
+__m128i _mm_set_epi64 (__m64 e1, __m64 e0)
+__m128i _mm_set_epi64x (__int64 e1, __int64 e0)
+__m128i _mm_set_epi8 (char e15, char e14, char e13, char e12, char e11, char e10, char e9, char e8, char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0)
+__m128d _mm_set_pd (double e1, double e0)
+__m128d _mm_set_pd1 (double a)
+__m128d _mm_set_sd (double a)
+__m128i _mm_set1_epi16 (short a)
+__m128i _mm_set1_epi32 (int a)
+__m128i _mm_set1_epi64 (__m64 a)
+__m128i _mm_set1_epi64x (__int64 a)
+__m128i _mm_set1_epi8 (char a)
+__m128d _mm_set1_pd (double a)
+__m128i _mm_setr_epi16 (short e7, short e6, short e5, short e4, short e3, short e2, short e1, short e0)
+__m128i _mm_setr_epi32 (int e3, int e2, int e1, int e0)
+__m128i _mm_setr_epi64 (__m64 e1, __m64 e0)
+__m128i _mm_setr_epi8 (char e15, char e14, char e13, char e12, char e11, char e10, char e9, char e8, char e7, char e6, char e5, char e4, char e3, char e2, char e1, char e0)
+__m128d _mm_setr_pd (double e1, double e0)
+__m128d _mm_setzero_pd (void)
+__m128i _mm_setzero_si128 ()
+__m128i _mm_shuffle_epi32 (__m128i a, int imm8)
+__m128d _mm_shuffle_pd (__m128d a, __m128d b, int imm8)
+__m128i _mm_shufflehi_epi16 (__m128i a, int imm8)
+__m128i _mm_shufflelo_epi16 (__m128i a, int imm8)
+__m128i _mm_sll_epi16 (__m128i a, __m128i count)
+__m128i _mm_sll_epi32 (__m128i a, __m128i count)
+__m128i _mm_sll_epi64 (__m128i a, __m128i count)
+__m128i _mm_slli_epi16 (__m128i a, int imm8)
+__m128i _mm_slli_epi32 (__m128i a, int imm8)
+__m128i _mm_slli_epi64 (__m128i a, int imm8)
+__m128i _mm_slli_si128 (__m128i a, int imm8)
+__m128d _mm_sqrt_pd (__m128d a)
+__m128d _mm_sqrt_sd (__m128d a, __m128d b)
+__m128i _mm_sra_epi16 (__m128i a, __m128i count)
+__m128i _mm_sra_epi32 (__m128i a, __m128i count)
+__m128i _mm_srai_epi16 (__m128i a, int imm8)
+__m128i _mm_srai_epi32 (__m128i a, int imm8)
+__m128i _mm_srl_epi16 (__m128i a, __m128i count)
+__m128i _mm_srl_epi32 (__m128i a, __m128i count)
+__m128i _mm_srl_epi64 (__m128i a, __m128i count)
+__m128i _mm_srli_epi16 (__m128i a, int imm8)
+__m128i _mm_srli_epi32 (__m128i a, int imm8)
+__m128i _mm_srli_epi64 (__m128i a, int imm8)
+__m128i _mm_srli_si128 (__m128i a, int imm8)
+void _mm_store_pd (double* mem_addr, __m128d a)
+void _mm_store_pd1 (double* mem_addr, __m128d a)
+void _mm_store_sd (double* mem_addr, __m128d a)
+void _mm_store_si128 (__m128i* mem_addr, __m128i a)
+void _mm_store1_pd (double* mem_addr, __m128d a)
+void _mm_storeh_pd (double* mem_addr, __m128d a)
+void _mm_storel_epi64 (__m128i* mem_addr, __m128i a)
+void _mm_storel_pd (double* mem_addr, __m128d a)
+void _mm_storer_pd (double* mem_addr, __m128d a)
+void _mm_storeu_pd (double* mem_addr, __m128d a)
+void _mm_storeu_si128 (__m128i* mem_addr, __m128i a)
+void _mm_stream_pd (double* mem_addr, __m128d a)
+void _mm_stream_si128 (__m128i* mem_addr, __m128i a)
+void _mm_stream_si32 (int* mem_addr, int a)
+void _mm_stream_si64 (__int64* mem_addr, __int64 a)
+__m128i _mm_sub_epi16 (__m128i a, __m128i b)
+__m128i _mm_sub_epi32 (__m128i a, __m128i b)
+__m128i _mm_sub_epi64 (__m128i a, __m128i b)
+__m128i _mm_sub_epi8 (__m128i a, __m128i b)
+__m128d _mm_sub_pd (__m128d a, __m128d b)
+__m128d _mm_sub_sd (__m128d a, __m128d b)
+__m64 _mm_sub_si64 (__m64 a, __m64 b)
+__m128i _mm_subs_epi16 (__m128i a, __m128i b)
+__m128i _mm_subs_epi8 (__m128i a, __m128i b)
+__m128i _mm_subs_epu16 (__m128i a, __m128i b)
+__m128i _mm_subs_epu8 (__m128i a, __m128i b)
+int _mm_ucomieq_sd (__m128d a, __m128d b)
+int _mm_ucomige_sd (__m128d a, __m128d b)
+int _mm_ucomigt_sd (__m128d a, __m128d b)
+int _mm_ucomile_sd (__m128d a, __m128d b)
+int _mm_ucomilt_sd (__m128d a, __m128d b)
+int _mm_ucomineq_sd (__m128d a, __m128d b)
+__m128i _mm_unpackhi_epi16 (__m128i a, __m128i b)
+__m128i _mm_unpackhi_epi32 (__m128i a, __m128i b)
+__m128i _mm_unpackhi_epi64 (__m128i a, __m128i b)
+__m128i _mm_unpackhi_epi8 (__m128i a, __m128i b)
+__m128d _mm_unpackhi_pd (__m128d a, __m128d b)
+__m128i _mm_unpacklo_epi16 (__m128i a, __m128i b)
+__m128i _mm_unpacklo_epi32 (__m128i a, __m128i b)
+__m128i _mm_unpacklo_epi64 (__m128i a, __m128i b)
+__m128i _mm_unpacklo_epi8 (__m128i a, __m128i b)
+__m128d _mm_unpacklo_pd (__m128d a, __m128d b)
+__m128d _mm_xor_pd (__m128d a, __m128d b)
+__m128i _mm_xor_si128 (__m128i a, __m128i b)

+ 12 - 0
xmmintrin.mod/tools/sse3.txt

@@ -0,0 +1,12 @@
+' https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1356,107&techs=SSE3
+__m128d _mm_addsub_pd (__m128d a, __m128d b)
+__m128 _mm_addsub_ps (__m128 a, __m128 b)
+__m128d _mm_hadd_pd (__m128d a, __m128d b)
+__m128 _mm_hadd_ps (__m128 a, __m128 b)
+__m128d _mm_hsub_pd (__m128d a, __m128d b)
+__m128 _mm_hsub_ps (__m128 a, __m128 b)
+__m128i _mm_lddqu_si128 (__m128i const* mem_addr)
+__m128d _mm_loaddup_pd (double const* mem_addr)
+__m128d _mm_movedup_pd (__m128d a)
+__m128 _mm_movehdup_ps (__m128 a)
+__m128 _mm_moveldup_ps (__m128 a)

+ 62 - 0
xmmintrin.mod/tools/sse41.txt

@@ -0,0 +1,62 @@
+' https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1356,107&techs=SSE4_1
+__m128i _mm_blend_epi16 (__m128i a, __m128i b, const int imm8)
+__m128d _mm_blend_pd (__m128d a, __m128d b, const int imm8)
+__m128 _mm_blend_ps (__m128 a, __m128 b, const int imm8)
+__m128i _mm_blendv_epi8 (__m128i a, __m128i b, __m128i mask)
+__m128d _mm_blendv_pd (__m128d a, __m128d b, __m128d mask)
+__m128 _mm_blendv_ps (__m128 a, __m128 b, __m128 mask)
+__m128d _mm_ceil_pd (__m128d a)
+__m128 _mm_ceil_ps (__m128 a)
+__m128d _mm_ceil_sd (__m128d a, __m128d b)
+__m128 _mm_ceil_ss (__m128 a, __m128 b)
+__m128i _mm_cmpeq_epi64 (__m128i a, __m128i b)
+__m128i _mm_cvtepi16_epi32 (__m128i a)
+__m128i _mm_cvtepi16_epi64 (__m128i a)
+__m128i _mm_cvtepi32_epi64 (__m128i a)
+__m128i _mm_cvtepi8_epi16 (__m128i a)
+__m128i _mm_cvtepi8_epi32 (__m128i a)
+__m128i _mm_cvtepi8_epi64 (__m128i a)
+__m128i _mm_cvtepu16_epi32 (__m128i a)
+__m128i _mm_cvtepu16_epi64 (__m128i a)
+__m128i _mm_cvtepu32_epi64 (__m128i a)
+__m128i _mm_cvtepu8_epi16 (__m128i a)
+__m128i _mm_cvtepu8_epi32 (__m128i a)
+__m128i _mm_cvtepu8_epi64 (__m128i a)
+__m128d _mm_dp_pd (__m128d a, __m128d b, const int imm8)
+__m128 _mm_dp_ps (__m128 a, __m128 b, const int imm8)
+int _mm_extract_epi32 (__m128i a, const int imm8)
+__int64 _mm_extract_epi64 (__m128i a, const int imm8)
+int _mm_extract_epi8 (__m128i a, const int imm8)
+int _mm_extract_ps (__m128 a, const int imm8)
+__m128d _mm_floor_pd (__m128d a)
+__m128 _mm_floor_ps (__m128 a)
+__m128d _mm_floor_sd (__m128d a, __m128d b)
+__m128 _mm_floor_ss (__m128 a, __m128 b)
+__m128i _mm_insert_epi32 (__m128i a, int i, const int imm8)
+__m128i _mm_insert_epi64 (__m128i a, __int64 i, const int imm8)
+__m128i _mm_insert_epi8 (__m128i a, int i, const int imm8)
+__m128 _mm_insert_ps (__m128 a, __m128 b, const int imm8)
+__m128i _mm_max_epi32 (__m128i a, __m128i b)
+__m128i _mm_max_epi8 (__m128i a, __m128i b)
+__m128i _mm_max_epu16 (__m128i a, __m128i b)
+__m128i _mm_max_epu32 (__m128i a, __m128i b)
+__m128i _mm_min_epi32 (__m128i a, __m128i b)
+__m128i _mm_min_epi8 (__m128i a, __m128i b)
+__m128i _mm_min_epu16 (__m128i a, __m128i b)
+__m128i _mm_min_epu32 (__m128i a, __m128i b)
+__m128i _mm_minpos_epu16 (__m128i a)
+__m128i _mm_mpsadbw_epu8 (__m128i a, __m128i b, const int imm8)
+__m128i _mm_mul_epi32 (__m128i a, __m128i b)
+__m128i _mm_mullo_epi32 (__m128i a, __m128i b)
+__m128i _mm_packus_epi32 (__m128i a, __m128i b)
+__m128d _mm_round_pd (__m128d a, int rounding)
+__m128 _mm_round_ps (__m128 a, int rounding)
+__m128d _mm_round_sd (__m128d a, __m128d b, int rounding)
+__m128 _mm_round_ss (__m128 a, __m128 b, int rounding)
+__m128i _mm_stream_load_si128 (__m128i* mem_addr)
+int _mm_test_all_ones (__m128i a)
+int _mm_test_all_zeros (__m128i a, __m128i mask)
+int _mm_test_mix_ones_zeros (__m128i a, __m128i mask)
+int _mm_testc_si128 (__m128i a, __m128i b)
+int _mm_testnzc_si128 (__m128i a, __m128i b)
+int _mm_testz_si128 (__m128i a, __m128i b)

+ 20 - 0
xmmintrin.mod/tools/sse42.txt

@@ -0,0 +1,20 @@
+' https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1356,107&techs=SSE4_2
+int _mm_cmpestra (__m128i a, int la, __m128i b, int lb, const int imm8)
+int _mm_cmpestrc (__m128i a, int la, __m128i b, int lb, const int imm8)
+int _mm_cmpestri (__m128i a, int la, __m128i b, int lb, const int imm8)
+__m128i _mm_cmpestrm (__m128i a, int la, __m128i b, int lb, const int imm8)
+int _mm_cmpestro (__m128i a, int la, __m128i b, int lb, const int imm8)
+int _mm_cmpestrs (__m128i a, int la, __m128i b, int lb, const int imm8)
+int _mm_cmpestrz (__m128i a, int la, __m128i b, int lb, const int imm8)
+__m128i _mm_cmpgt_epi64 (__m128i a, __m128i b)
+int _mm_cmpistra (__m128i a, __m128i b, const int imm8)
+int _mm_cmpistrc (__m128i a, __m128i b, const int imm8)
+int _mm_cmpistri (__m128i a, __m128i b, const int imm8)
+__m128i _mm_cmpistrm (__m128i a, __m128i b, const int imm8)
+int _mm_cmpistro (__m128i a, __m128i b, const int imm8)
+int _mm_cmpistrs (__m128i a, __m128i b, const int imm8)
+int _mm_cmpistrz (__m128i a, __m128i b, const int imm8)
+unsigned int _mm_crc32_u16 (unsigned int crc, unsigned short v)
+unsigned int _mm_crc32_u32 (unsigned int crc, unsigned int v)
+unsigned __int64 _mm_crc32_u64 (unsigned __int64 crc, unsigned __int64 v)
+unsigned int _mm_crc32_u8 (unsigned int crc, unsigned char v)

+ 33 - 0
xmmintrin.mod/tools/ssse3.txt

@@ -0,0 +1,33 @@
+' https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1356,107&techs=SSSE3
+__m128i _mm_abs_epi16 (__m128i a)
+__m128i _mm_abs_epi32 (__m128i a)
+__m128i _mm_abs_epi8 (__m128i a)
+__m64 _mm_abs_pi16 (__m64 a)
+__m64 _mm_abs_pi32 (__m64 a)
+__m64 _mm_abs_pi8 (__m64 a)
+__m128i _mm_alignr_epi8 (__m128i a, __m128i b, int count)
+__m64 _mm_alignr_pi8 (__m64 a, __m64 b, int count)
+__m128i _mm_hadd_epi16 (__m128i a, __m128i b)
+__m128i _mm_hadd_epi32 (__m128i a, __m128i b)
+__m64 _mm_hadd_pi16 (__m64 a, __m64 b)
+__m64 _mm_hadd_pi32 (__m64 a, __m64 b)
+__m128i _mm_hadds_epi16 (__m128i a, __m128i b)
+__m64 _mm_hadds_pi16 (__m64 a, __m64 b)
+__m128i _mm_hsub_epi16 (__m128i a, __m128i b)
+__m128i _mm_hsub_epi32 (__m128i a, __m128i b)
+__m64 _mm_hsub_pi16 (__m64 a, __m64 b)
+__m64 _mm_hsub_pi32 (__m64 a, __m64 b)
+__m128i _mm_hsubs_epi16 (__m128i a, __m128i b)
+__m64 _mm_hsubs_pi16 (__m64 a, __m64 b)
+__m128i _mm_maddubs_epi16 (__m128i a, __m128i b)
+__m64 _mm_maddubs_pi16 (__m64 a, __m64 b)
+__m128i _mm_mulhrs_epi16 (__m128i a, __m128i b)
+__m64 _mm_mulhrs_pi16 (__m64 a, __m64 b)
+__m128i _mm_shuffle_epi8 (__m128i a, __m128i b)
+__m64 _mm_shuffle_pi8 (__m64 a, __m64 b)
+__m128i _mm_sign_epi16 (__m128i a, __m128i b)
+__m128i _mm_sign_epi32 (__m128i a, __m128i b)
+__m128i _mm_sign_epi8 (__m128i a, __m128i b)
+__m64 _mm_sign_pi16 (__m64 a, __m64 b)
+__m64 _mm_sign_pi32 (__m64 a, __m64 b)
+__m64 _mm_sign_pi8 (__m64 a, __m64 b)

+ 20 - 15
xmmintrin.mod/xmmintrin.bmx

@@ -1,23 +1,26 @@
-'  Copyright (C) 2016 David JJ Camp
+' Copyright (c) 2016 David JJ Camp
+' 
+' This software is provided 'as-is', without any express or implied
+' warranty. In no event will the authors be held liable for any damages
+' arising from the use of this software.
 '
-'  This software is provided 'as-is', without any express or implied
-'  warranty.  In no event will the authors be held liable for any damages
-'  arising from the use of this software.
+' Permission is granted to anyone to use this software for any purpose,
+' including commercial applications, and to alter it and redistribute it
+' freely, subject to the following restrictions:
 '
-'  Permission is granted to anyone to use this software for any purpose,
-'  including commercial applications, and to alter it and redistribute it
-'  freely, subject to the following restrictions:
+' 1. The origin of this software must not be misrepresented; you must not
+'    claim that you wrote the original software. If you use this software
+'    in a product, an acknowledgement in the product documentation would be
+'    appreciated but is not required.
+' 2. Altered source versions must be plainly marked as such, and must not be
+'    misrepresented as being the original software.
+' 3. This notice may not be removed or altered from any source distribution.
 '
-'  1. The origin of this software must not be misrepresented; you must not
-'     claim that you wrote the original software. If you use this software
-'     in a product, an acknowledgment in the product documentation would be
-'     appreciated but is not required.
-'  2. Altered source versions must be plainly marked as such, and must not be
-'     misrepresented as being the original software.
-'  3. This notice may not be removed or altered from any source distribution.
-
 SuperStrict
 
+Rem
+bbdoc: SIMD intrinsics for x64.
+End Rem
 Module pub.xmmintrin
 
 ModuleInfo "Version: 1.00"
@@ -25,9 +28,11 @@ ModuleInfo "Author: David JJ Camp"
 ModuleInfo "License: zlib/libpng"
 ModuleInfo "Copyright: David JJ Camp"
 
+?x64
 Import "sse.bmx"
 Import "sse2.bmx"
 Import "sse3.bmx"
 Import "ssse3.bmx"
 Import "sse41.bmx"
 Import "sse42.bmx"
+?