2
0
Эх сурвалжийг харах

Add most SSE instructions as intrinsics.

git-svn-id: trunk@44274 -
Jeppe Johansen 5 жил өмнө
parent
commit
c20b27ede9

+ 7 - 1
compiler/ninl.pas

@@ -5402,7 +5402,13 @@ implementation
          p: tnode;
          p: tnode;
        begin
        begin
          if count=1 then
          if count=1 then
-           set_varstate(left,vs_read,[vsf_must_be_valid])
+           begin
+             // Sometimes there are more callparanodes
+             if left is tcallparanode then
+               set_varstate(tcallparanode(left).left,vs_read,[vsf_must_be_valid])
+             else
+               set_varstate(left,vs_read,[vsf_must_be_valid])
+           end
          else
          else
            begin
            begin
              p:=left;
              p:=left;

+ 203 - 13
compiler/utils/mkx86inl.pp

@@ -22,15 +22,30 @@ const
 function GetPascalType(const ATyp: string): string;
 function GetPascalType(const ATyp: string): string;
   begin
   begin
     case ATyp of
     case ATyp of
+      'r8':    exit('byte');
+      'rs8':   exit('shortint');
+      'r16':   exit('word');
+      'rs16':  exit('smallint');
       'r32':   exit('longword');
       'r32':   exit('longword');
       'rs32':  exit('longint');
       'rs32':  exit('longint');
       'r64':   exit('qword');
       'r64':   exit('qword');
       'rs64':  exit('int64');
       'rs64':  exit('int64');
+      'reg':   exit('NativeUInt');
+      'sreg':  exit('NativeInt');
       'f32':   exit('single');
       'f32':   exit('single');
+      'f64':   exit('double');
       'mm':    exit('__m64');
       'mm':    exit('__m64');
+      'implicit_xmm0',
       'xmm':   exit('__m128');
       'xmm':   exit('__m128');
       'i32':   exit('longint');
       'i32':   exit('longint');
-      'ptr32': exit('pointer');
+
+      'edi_ptr':   exit('pointer');
+
+      'ptr8',
+      'ptr16',
+      'ptr32',
+      'ptr64',
+      'ptr128': exit('pointer');
     else
     else
       exit(ATyp);
       exit(ATyp);
     end;
     end;
@@ -39,15 +54,30 @@ function GetPascalType(const ATyp: string): string;
 function GetTypeDef(const ATyp: string): string;
 function GetTypeDef(const ATyp: string): string;
   begin
   begin
     case ATyp of
     case ATyp of
+      'r8':    exit('u8inttype');
+      'rs8':   exit('s8inttype');
+      'r16':   exit('u16inttype');
+      'rs16':  exit('s16inttype');
       'r32':   exit('u32inttype');
       'r32':   exit('u32inttype');
       'rs32':  exit('s32inttype');
       'rs32':  exit('s32inttype');
       'r64':   exit('u64inttype');
       'r64':   exit('u64inttype');
       'rs64':  exit('s64inttype');
       'rs64':  exit('s64inttype');
+      'reg':   exit('uinttype');
+      'sreg':  exit('sinttype');
       'f32':   exit('s32floattype');
       'f32':   exit('s32floattype');
+      'f64':   exit('s64floattype');
       'mm':    exit('x86_m64type');
       'mm':    exit('x86_m64type');
+      'implicit_xmm0',
       'xmm':   exit('x86_m128type');
       'xmm':   exit('x86_m128type');
       'i32':   exit('s32inttype');
       'i32':   exit('s32inttype');
-      'ptr32': exit('voidpointertype');
+
+      'edi_ptr':   exit('voidpointertype');
+
+      'ptr8',
+      'ptr16',
+      'ptr32',
+      'ptr64',
+      'ptr128': exit('voidpointertype');
     else
     else
       exit(ATyp);
       exit(ATyp);
     end;
     end;
@@ -56,15 +86,30 @@ function GetTypeDef(const ATyp: string): string;
 function GetOper(const ATyp: string): string;
 function GetOper(const ATyp: string): string;
   begin
   begin
     case ATyp of
     case ATyp of
+      'r8':    exit('_reg');
+      'rs8':   exit('_reg');
+      'r16':   exit('_reg');
+      'rs16':  exit('_reg');
       'r32':   exit('_reg');
       'r32':   exit('_reg');
       'rs32':  exit('_reg');
       'rs32':  exit('_reg');
       'r64':   exit('_reg_reg');
       'r64':   exit('_reg_reg');
       'rs64':  exit('_reg_reg');
       'rs64':  exit('_reg_reg');
+      'reg':   exit('_reg');
+      'sreg':  exit('_reg');
       'f32':   exit('_reg');
       'f32':   exit('_reg');
+      'f64':   exit('_reg');
       'mm':    exit('_reg');
       'mm':    exit('_reg');
       'xmm':   exit('_reg');
       'xmm':   exit('_reg');
       'i32':   exit('_const');
       'i32':   exit('_const');
-      'ptr32': exit('_ref');
+
+      'implicit_xmm0',
+      'edi_ptr':   exit('');
+
+      'ptr8',
+      'ptr16',
+      'ptr32',
+      'ptr64',
+      'ptr128': exit('_ref');
     else
     else
       exit('');
       exit('');
     end;
     end;
@@ -73,15 +118,30 @@ function GetOper(const ATyp: string): string;
 function GetOperand(const ATyp: string; AIndex: longint): string;
 function GetOperand(const ATyp: string; AIndex: longint): string;
   begin
   begin
     case ATyp of
     case ATyp of
+      'r8':    exit(format(',paraarray[%d].location.register', [AIndex]));
+      'rs8':   exit(format(',paraarray[%d].location.register', [AIndex]));
+      'r16':   exit(format(',paraarray[%d].location.register', [AIndex]));
+      'rs16':  exit(format(',paraarray[%d].location.register', [AIndex]));
       'r32':   exit(format(',paraarray[%d].location.register', [AIndex]));
       'r32':   exit(format(',paraarray[%d].location.register', [AIndex]));
       'rs32':  exit(format(',paraarray[%d].location.register', [AIndex]));
       'rs32':  exit(format(',paraarray[%d].location.register', [AIndex]));
       'r64':   exit(format(',paraarray[%d].location.register64.reglo,paraarray[%d].location.register64.reghi', [AIndex,AIndex]));
       'r64':   exit(format(',paraarray[%d].location.register64.reglo,paraarray[%d].location.register64.reghi', [AIndex,AIndex]));
       'rs64':  exit(format(',paraarray[%d].location.register64.reglo,paraarray[%d].location.register64.reghi', [AIndex,AIndex]));
       'rs64':  exit(format(',paraarray[%d].location.register64.reglo,paraarray[%d].location.register64.reghi', [AIndex,AIndex]));
+      'reg':   exit(format(',paraarray[%d].location.register', [AIndex]));
+      'sreg':  exit(format(',paraarray[%d].location.register', [AIndex]));
       'f32':   exit(format(',paraarray[%d].location.register', [AIndex]));
       'f32':   exit(format(',paraarray[%d].location.register', [AIndex]));
+      'f64':   exit(format(',paraarray[%d].location.register', [AIndex]));
       'mm':    exit(format(',paraarray[%d].location.register', [AIndex]));
       'mm':    exit(format(',paraarray[%d].location.register', [AIndex]));
       'xmm':   exit(format(',paraarray[%d].location.register', [AIndex]));
       'xmm':   exit(format(',paraarray[%d].location.register', [AIndex]));
       'i32':   exit(format(',GetConstInt(paraarray[%d])',[AIndex]));
       'i32':   exit(format(',GetConstInt(paraarray[%d])',[AIndex]));
-      'ptr32': exit(format(',paraarray[%d].location.reference', [AIndex]));
+
+      'implicit_xmm0',
+      'edi_ptr': exit('');
+
+      'ptr8',
+      'ptr16',
+      'ptr32',
+      'ptr64',
+      'ptr128': exit(format(',paraarray[%d].location.reference', [AIndex]));
     else
     else
       exit(ATyp);
       exit(ATyp);
     end;
     end;
@@ -91,13 +151,29 @@ function GetOperandLoc(const ATyp: string): string;
   begin
   begin
     result:='';
     result:='';
     case ATyp of
     case ATyp of
+      'r8':   exit(',location.register');
+      'rs8':  exit(',location.register');
+      'r16':  exit(',location.register');
+      'rs16': exit(',location.register');
       'r32':  exit(',location.register');
       'r32':  exit(',location.register');
       'rs32': exit(',location.register');
       'rs32': exit(',location.register');
       'r64':  exit(',location.register64.reglo,location.register64.reghi');
       'r64':  exit(',location.register64.reglo,location.register64.reghi');
       'rs64': exit(',location.register64.reglo,location.register64.reghi');
       'rs64': exit(',location.register64.reglo,location.register64.reghi');
+      'reg':  exit(',location.register');
+      'sreg': exit(',location.register');
       'f32':  exit(',location.register');
       'f32':  exit(',location.register');
+      'f64':  exit(',location.register');
       'mm':   exit(',location.register');
       'mm':   exit(',location.register');
+      'implicit_xmm0',
       'xmm':  exit(',location.register');
       'xmm':  exit(',location.register');
+
+      'edi_ptr': exit(',location.register');
+
+      'ptr8',
+      'ptr16',
+      'ptr32',
+      'ptr64',
+      'ptr128': exit(',location.register');
     end;
     end;
   end;
   end;
 
 
@@ -105,14 +181,47 @@ function GetLocStatement(AIndex: longint; const ATyp: string; AConst: boolean):
   begin
   begin
     result:='';
     result:='';
     case ATyp of
     case ATyp of
+      'r8':    exit(format('hlcg.location_force_reg(current_asmdata.CurrAsmList, paraarray[%d].location, paraarray[%d].resultdef,u8inttype,%s);', [AIndex+1, AIndex+1, BoolToStr(aconst,'true','false')]));
+      'rs8':   exit(format('hlcg.location_force_reg(current_asmdata.CurrAsmList, paraarray[%d].location, paraarray[%d].resultdef,u8inttype,%s);', [AIndex+1, AIndex+1, BoolToStr(aconst,'true','false')]));
+      'r16':   exit(format('hlcg.location_force_reg(current_asmdata.CurrAsmList, paraarray[%d].location, paraarray[%d].resultdef,u16inttype,%s);', [AIndex+1, AIndex+1, BoolToStr(aconst,'true','false')]));
+      'rs16':  exit(format('hlcg.location_force_reg(current_asmdata.CurrAsmList, paraarray[%d].location, paraarray[%d].resultdef,u16inttype,%s);', [AIndex+1, AIndex+1, BoolToStr(aconst,'true','false')]));
       'r32':   exit(format('hlcg.location_force_reg(current_asmdata.CurrAsmList, paraarray[%d].location, paraarray[%d].resultdef,u32inttype,%s);', [AIndex+1, AIndex+1, BoolToStr(aconst,'true','false')]));
       'r32':   exit(format('hlcg.location_force_reg(current_asmdata.CurrAsmList, paraarray[%d].location, paraarray[%d].resultdef,u32inttype,%s);', [AIndex+1, AIndex+1, BoolToStr(aconst,'true','false')]));
       'rs32':  exit(format('hlcg.location_force_reg(current_asmdata.CurrAsmList, paraarray[%d].location, paraarray[%d].resultdef,u32inttype,%s);', [AIndex+1, AIndex+1, BoolToStr(aconst,'true','false')]));
       'rs32':  exit(format('hlcg.location_force_reg(current_asmdata.CurrAsmList, paraarray[%d].location, paraarray[%d].resultdef,u32inttype,%s);', [AIndex+1, AIndex+1, BoolToStr(aconst,'true','false')]));
       'r64':   exit(format('hlcg.location_force_reg(current_asmdata.CurrAsmList, paraarray[%d].location, paraarray[%d].resultdef,u64inttype,%s);', [AIndex+1, AIndex+1, BoolToStr(aconst,'true','false')]));
       'r64':   exit(format('hlcg.location_force_reg(current_asmdata.CurrAsmList, paraarray[%d].location, paraarray[%d].resultdef,u64inttype,%s);', [AIndex+1, AIndex+1, BoolToStr(aconst,'true','false')]));
       'rs64':  exit(format('hlcg.location_force_reg(current_asmdata.CurrAsmList, paraarray[%d].location, paraarray[%d].resultdef,u64inttype,%s);', [AIndex+1, AIndex+1, BoolToStr(aconst,'true','false')]));
       'rs64':  exit(format('hlcg.location_force_reg(current_asmdata.CurrAsmList, paraarray[%d].location, paraarray[%d].resultdef,u64inttype,%s);', [AIndex+1, AIndex+1, BoolToStr(aconst,'true','false')]));
+      'reg':   exit(format('hlcg.location_force_reg(current_asmdata.CurrAsmList, paraarray[%d].location, paraarray[%d].resultdef,uinttype,%s);', [AIndex+1, AIndex+1, BoolToStr(aconst,'true','false')]));
+      'sreg':  exit(format('hlcg.location_force_reg(current_asmdata.CurrAsmList, paraarray[%d].location, paraarray[%d].resultdef,sinttype,%s);', [AIndex+1, AIndex+1, BoolToStr(aconst,'true','false')]));
       'f32':   exit(format('location_force_mmreg(current_asmdata.CurrAsmList, paraarray[%d].location, %s);', [AIndex+1, BoolToStr(aconst,'true','false')]));
       'f32':   exit(format('location_force_mmreg(current_asmdata.CurrAsmList, paraarray[%d].location, %s);', [AIndex+1, BoolToStr(aconst,'true','false')]));
+      'f64':   exit(format('location_force_mmreg(current_asmdata.CurrAsmList, paraarray[%d].location, %s);', [AIndex+1, BoolToStr(aconst,'true','false')]));
       'mm':    exit(format('location_force_mmxreg(current_asmdata.CurrAsmList, paraarray[%d].location, %s);', [AIndex+1, BoolToStr(aconst,'true','false')]));
       'mm':    exit(format('location_force_mmxreg(current_asmdata.CurrAsmList, paraarray[%d].location, %s);', [AIndex+1, BoolToStr(aconst,'true','false')]));
       'xmm':   exit(format('location_force_mmreg(current_asmdata.CurrAsmList, paraarray[%d].location, %s);', [AIndex+1, BoolToStr(aconst,'true','false')]));
       'xmm':   exit(format('location_force_mmreg(current_asmdata.CurrAsmList, paraarray[%d].location, %s);', [AIndex+1, BoolToStr(aconst,'true','false')]));
-      'ptr32': exit(format('location_make_ref(paraarray[%d].location);', [AIndex+1]));
+
+      'implicit_xmm0':
+        exit(format('location_force_mmreg(current_asmdata.CurrAsmList, paraarray[%d].location, %s);'+LineEnding+
+                    '    hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_XMM0);'+LineEnding+
+                    '    hlcg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,paraarray[%d].resultdef,x86_m128type,paraarray[%d].location,NR_XMM0,nil);',
+                      [AIndex+1, BoolToStr(aconst,'true','false'), AIndex+1, AIndex+1]));
+      'edi_ptr':
+        exit(format('hlcg.getcpuregister(current_asmdata.CurrAsmList,{$if defined(cpu64bitalu)}NR_RDI{$else}NR_EDI{$endif});'+LineEnding+
+                    '    hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,paraarray[%d].resultdef,voidpointertype,paraarray[%d].location,{$if defined(cpu64bitalu)}NR_RDI{$else}NR_EDI{$endif});',
+                      [AIndex+1, AIndex+1]));
+
+      'ptr8',
+      'ptr16',
+      'ptr32',
+      'ptr64',
+      'ptr128':exit(format('location_make_ref(paraarray[%d].location);', [AIndex+1]));
+    end;
+  end;
+
+function GetDeallocStatement(AIndex: longint; const ATyp: string): string;
+  begin
+    result:='';
+    case ATyp of
+      'implicit_xmm0':
+        exit('hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_XMM0);');
+      'edi_ptr':
+        exit('hlcg.ungetcpuregister(current_asmdata.CurrAsmList,{$if defined(cpu64bitalu)}NR_RDI{$else}NR_EDI{$endif});');
     end;
     end;
   end;
   end;
 
 
@@ -121,25 +230,55 @@ function GetLoc(const ATyp: string; AWithSize: boolean = true): string;
     result:='';
     result:='';
     if AWithSize then
     if AWithSize then
       case ATyp of
       case ATyp of
+        'r8':    exit('LOC_REGISTER,OS_8');
+        'rs8':   exit('LOC_REGISTER,OS_S8');
+        'r16':   exit('LOC_REGISTER,OS_16');
+        'rs16':  exit('LOC_REGISTER,OS_S16');
         'r32':   exit('LOC_REGISTER,OS_32');
         'r32':   exit('LOC_REGISTER,OS_32');
         'rs32':  exit('LOC_REGISTER,OS_S32');
         'rs32':  exit('LOC_REGISTER,OS_S32');
         'r64':   exit('LOC_REGISTER,OS_64');
         'r64':   exit('LOC_REGISTER,OS_64');
         'rs64':  exit('LOC_REGISTER,OS_S64');
         'rs64':  exit('LOC_REGISTER,OS_S64');
+        'reg':   exit('LOC_REGISTER,OS_INT');
+        'sreg':  exit('LOC_REGISTER,OS_SINT');
         'f32':   exit('LOC_MMREGISTER,OS_M128');
         'f32':   exit('LOC_MMREGISTER,OS_M128');
+        'f64':   exit('LOC_MMREGISTER,OS_M128');
         'mm':    exit('LOC_MMXREGISTER,OS_M64');
         'mm':    exit('LOC_MMXREGISTER,OS_M64');
+        'implicit_xmm0',
         'xmm':   exit('LOC_MMREGISTER,OS_M128');
         'xmm':   exit('LOC_MMREGISTER,OS_M128');
+
+        'edi_ptr':   exit('LOC_REGISTER,OS_INT');
+
+        'ptr8':  exit('LOC_MEM,OS_8');
+        'ptr16': exit('LOC_MEM,OS_16');
         'ptr32': exit('LOC_MEM,OS_32');
         'ptr32': exit('LOC_MEM,OS_32');
+        'ptr64': exit('LOC_MEM,OS_64');
+        'ptr128':exit('LOC_MEM,OS_128');
       end
       end
     else
     else
       case ATyp of
       case ATyp of
+        'r8':    exit('LOC_REGISTER');
+        'rs8':   exit('LOC_REGISTER');
+        'r16':   exit('LOC_REGISTER');
+        'rs16':  exit('LOC_REGISTER');
         'r32':   exit('LOC_REGISTER');
         'r32':   exit('LOC_REGISTER');
         'rs32':  exit('LOC_REGISTER');
         'rs32':  exit('LOC_REGISTER');
         'r64':   exit('LOC_REGISTER');
         'r64':   exit('LOC_REGISTER');
         'rs64':  exit('LOC_REGISTER');
         'rs64':  exit('LOC_REGISTER');
+        'reg':   exit('LOC_REGISTER');
+        'sreg':  exit('LOC_REGISTER');
         'f32':   exit('LOC_MMREGISTER');
         'f32':   exit('LOC_MMREGISTER');
+        'f64':   exit('LOC_MMREGISTER');
         'mm':    exit('LOC_MMXREGISTER');
         'mm':    exit('LOC_MMXREGISTER');
+        'implicit_xmm0',
         'xmm':   exit('LOC_MMREGISTER');
         'xmm':   exit('LOC_MMREGISTER');
-        'ptr32': exit('LOC_MEM');
+
+        'edi_ptr':   exit('LOC_REGISTER');
+
+        'ptr8',
+        'ptr16',
+        'ptr32',
+        'ptr64',
+        'ptr128':exit('LOC_MEM');
       end;
       end;
   end;
   end;
 
 
@@ -147,11 +286,18 @@ function GetLocAllocation(const ATyp: string): string;
   begin
   begin
     result:='';
     result:='';
     case ATyp of
     case ATyp of
+      'r8':  exit('location.register:=cg.getintregister(current_asmdata.CurrAsmList, OS_8);');
+      'rs8': exit('location.register:=cg.getintregister(current_asmdata.CurrAsmList, OS_8);');
+      'r16':  exit('location.register:=cg.getintregister(current_asmdata.CurrAsmList, OS_16);');
+      'rs16': exit('location.register:=cg.getintregister(current_asmdata.CurrAsmList, OS_16);');
       'r32':  exit('location.register:=cg.getintregister(current_asmdata.CurrAsmList, OS_32);');
       'r32':  exit('location.register:=cg.getintregister(current_asmdata.CurrAsmList, OS_32);');
       'rs32': exit('location.register:=cg.getintregister(current_asmdata.CurrAsmList, OS_32);');
       'rs32': exit('location.register:=cg.getintregister(current_asmdata.CurrAsmList, OS_32);');
       'r64':  exit('location.register64.reglo:=cg.getintregister(current_asmdata.CurrAsmList, OS_32); location.register64.reghi:=cg.getintregister(current_asmdata.CurrAsmList, OS_32);');
       'r64':  exit('location.register64.reglo:=cg.getintregister(current_asmdata.CurrAsmList, OS_32); location.register64.reghi:=cg.getintregister(current_asmdata.CurrAsmList, OS_32);');
       'rs64': exit('location.register64.reglo:=cg.getintregister(current_asmdata.CurrAsmList, OS_32); location.register64.reghi:=cg.getintregister(current_asmdata.CurrAsmList, OS_32);');
       'rs64': exit('location.register64.reglo:=cg.getintregister(current_asmdata.CurrAsmList, OS_32); location.register64.reghi:=cg.getintregister(current_asmdata.CurrAsmList, OS_32);');
+      'reg':  exit('location.register:=cg.getintregister(current_asmdata.CurrAsmList, OS_INT);');
+      'sreg': exit('location.register:=cg.getintregister(current_asmdata.CurrAsmList, OS_INT);');
       'f32':  exit('location.register:=cg.getmmregister(current_asmdata.CurrAsmList, OS_M128);');
       'f32':  exit('location.register:=cg.getmmregister(current_asmdata.CurrAsmList, OS_M128);');
+      'f64':  exit('location.register:=cg.getmmregister(current_asmdata.CurrAsmList, OS_M128);');
       'mm':   exit('location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);');
       'mm':   exit('location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);');
       'xmm':  exit('location.register:=cg.getmmregister(current_asmdata.CurrAsmList, OS_M128);');
       'xmm':  exit('location.register:=cg.getmmregister(current_asmdata.CurrAsmList, OS_M128);');
     end;
     end;
@@ -184,7 +330,7 @@ procedure ParseList(const APrefix, AFilename: string);
     outputType: string;
     outputType: string;
     cnt,
     cnt,
     i, intrnum: longint;
     i, intrnum: longint;
-    tmp: String;
+    tmp, condition, target: String;
 
 
   function ParseOperands(AIndex: longint = -1): string;
   function ParseOperands(AIndex: longint = -1): string;
     var
     var
@@ -330,6 +476,18 @@ procedure ParseList(const APrefix, AFilename: string);
         params:=trim(Copy2SymbDel(str,')'));
         params:=trim(Copy2SymbDel(str,')'));
         str:=trim(str);
         str:=trim(str);
 
 
+        // Parse condition and target
+        if pos('|', str)>0 then
+        begin
+          condition:=trim(Copy2SymbDel(str, '|'));
+          target:=trim(str);
+        end
+        else
+        begin
+          condition:=str;
+          target:='';
+        end;
+
         hasOutput:=false;
         hasOutput:=false;
         opercnt:=0;
         opercnt:=0;
         outputType:='';
         outputType:='';
@@ -340,10 +498,13 @@ procedure ParseList(const APrefix, AFilename: string);
         operline:=GetOperLine;
         operline:=GetOperLine;
         // Write typecheck code
         // Write typecheck code
         i:=ftypechk.IndexOf(': //'+operline);
         i:=ftypechk.IndexOf(': //'+operline);
-        if i>=0 then
+        if (i>=0) and (target='') then
           ftypechk.Insert(i,',in_'+APrefix+'_'+instrPart+postfix+_alias)
           ftypechk.Insert(i,',in_'+APrefix+'_'+instrPart+postfix+_alias)
         else
         else
           begin
           begin
+            if target<>'' then
+              ftypechk.add(format('{$ifdef %s}', [target]));
+
             ftypechk.Add('in_'+APrefix+'_'+instrPart+postfix+_alias);
             ftypechk.Add('in_'+APrefix+'_'+instrPart+postfix+_alias);
             ftypechk.Add(': //'+operline);
             ftypechk.Add(': //'+operline);
             ftypechk.Add('  begin');
             ftypechk.Add('  begin');
@@ -352,15 +513,21 @@ procedure ParseList(const APrefix, AFilename: string);
               ftypechk.Add('    resultdef:='+GetTypeDef(outputType)+';')
               ftypechk.Add('    resultdef:='+GetTypeDef(outputType)+';')
             else
             else
               ftypechk.Add('    resultdef:=voidtype;');
               ftypechk.Add('    resultdef:=voidtype;');
-            ftypechk.Add('  end;')
+            ftypechk.Add('  end;');
+
+            if target<>'' then
+              ftypechk.add('{$endif}');
           end;
           end;
 
 
         // Write firstpass code
         // Write firstpass code
         i:=ffirst.IndexOf(': //'+operline);
         i:=ffirst.IndexOf(': //'+operline);
-        if i>=0 then
+        if (i>=0) and (target='') then
           ffirst.Insert(i,',in_'+APrefix+'_'+instrPart+postfix+_alias)
           ffirst.Insert(i,',in_'+APrefix+'_'+instrPart+postfix+_alias)
         else
         else
           begin
           begin
+            if target<>'' then
+              ffirst.add(format('{$ifdef %s}', [target]));
+
             ffirst.Add('in_'+APrefix+'_'+instrPart+postfix+_alias);
             ffirst.Add('in_'+APrefix+'_'+instrPart+postfix+_alias);
             ffirst.Add(': //'+operline);
             ffirst.Add(': //'+operline);
             ffirst.Add('  begin');
             ffirst.Add('  begin');
@@ -369,18 +536,24 @@ procedure ParseList(const APrefix, AFilename: string);
             else
             else
               ffirst.Add('    expectloc:=LOC_VOID;');
               ffirst.Add('    expectloc:=LOC_VOID;');
             ffirst.Add('    result:=nil;');
             ffirst.Add('    result:=nil;');
-            ffirst.Add('  end;')
+            ffirst.Add('  end;');
+
+            if target<>'' then
+              ffirst.add('{$endif}');
           end;
           end;
 
 
         // Write secondpass code
         // Write secondpass code
         i:=fsecond.IndexOf(': //'+operline);
         i:=fsecond.IndexOf(': //'+operline);
-        if i>=0 then
+        if (i>=0) and (target='') then
           begin
           begin
             fsecond.Insert(i+3,'      in_'+APrefix+'_'+instrPart+postfix+_alias+': begin op:=A_'+instrPart+' end;');
             fsecond.Insert(i+3,'      in_'+APrefix+'_'+instrPart+postfix+_alias+': begin op:=A_'+instrPart+' end;');
             fsecond.Insert(i,',in_'+APrefix+'_'+instrPart+postfix+_alias);
             fsecond.Insert(i,',in_'+APrefix+'_'+instrPart+postfix+_alias);
           end
           end
         else
         else
           begin
           begin
+            if target<>'' then
+              fsecond.add(format('{$ifdef %s}', [target]));
+
             fsecond.Add('in_'+APrefix+'_'+instrPart+postfix+_alias);
             fsecond.Add('in_'+APrefix+'_'+instrPart+postfix+_alias);
             fsecond.Add(': //'+operline);
             fsecond.Add(': //'+operline);
             fsecond.Add('  begin');
             fsecond.Add('  begin');
@@ -478,7 +651,18 @@ procedure ParseList(const APrefix, AFilename: string);
 
 
             fsecond.Add('    current_asmdata.CurrAsmList.concat('+operline+');');
             fsecond.Add('    current_asmdata.CurrAsmList.concat('+operline+');');
 
 
-            fsecond.Add('  end;')
+            // Deallocate CPU registers
+            for i := 0 to opercnt-1 do
+              begin
+                tmp:=GetDeallocStatement(cnt, opers[i].typ);
+                if tmp<>'' then
+                  fsecond.add('    '+tmp);
+              end;
+
+            fsecond.Add('  end;');
+
+            if target<>'' then
+              fsecond.add('{$endif}');
           end;
           end;
 
 
         // Write innr
         // Write innr
@@ -486,6 +670,9 @@ procedure ParseList(const APrefix, AFilename: string);
         writeln(fcpumminnr, '  fpc_in_', APrefix,'_',instrPart,postfix+_alias,' = fpc_in_',APrefix,'_mm_first+',intrnum,';');
         writeln(fcpumminnr, '  fpc_in_', APrefix,'_',instrPart,postfix+_alias,' = fpc_in_',APrefix,'_mm_first+',intrnum,';');
 
 
         // Write function
         // Write function
+        if target<>'' then
+          writeln(fprocs, '{$ifdef ',target,'}');
+
         if hasOutput then write(fprocs,'function ') else write(fprocs,'procedure ');
         if hasOutput then write(fprocs,'function ') else write(fprocs,'procedure ');
         write(fprocs,APrefix,'_',instrPart,postfix,'(');
         write(fprocs,APrefix,'_',instrPart,postfix,'(');
 
 
@@ -515,6 +702,9 @@ procedure ParseList(const APrefix, AFilename: string);
         if hasOutput then write(fprocs,': ',GetPascalType(outputType));
         if hasOutput then write(fprocs,': ',GetPascalType(outputType));
         writeln(fprocs,'; [INTERNPROC: fpc_in_',APrefix,'_',instrPart,postfix+_alias,'];');
         writeln(fprocs,'; [INTERNPROC: fpc_in_',APrefix,'_',instrPart,postfix+_alias,'];');
 
 
+        if target<>'' then
+          writeln(fprocs, '{$endif}');
+
         // Str now contains conditionals
         // Str now contains conditionals
 
 
         inc(intrnum);
         inc(intrnum);

+ 493 - 58
compiler/x86/cx86mminnr.inc

@@ -8,61 +8,496 @@
   in_x86_movss_from_val = in_x86_mm_first+7,
   in_x86_movss_from_val = in_x86_mm_first+7,
   in_x86_movlps = in_x86_mm_first+8,
   in_x86_movlps = in_x86_mm_first+8,
   in_x86_movhps = in_x86_mm_first+9,
   in_x86_movhps = in_x86_mm_first+9,
-  in_x86_movlhps = in_x86_mm_first+10,
-  in_x86_movhlps = in_x86_mm_first+11,
-  in_x86_addss = in_x86_mm_first+12,
-  in_x86_subss = in_x86_mm_first+13,
-  in_x86_mulss = in_x86_mm_first+14,
-  in_x86_divss = in_x86_mm_first+15,
-  in_x86_rcpss = in_x86_mm_first+16,
-  in_x86_sqrtss = in_x86_mm_first+17,
-  in_x86_maxss = in_x86_mm_first+18,
-  in_x86_minss = in_x86_mm_first+19,
-  in_x86_rsqrtss = in_x86_mm_first+20,
-  in_x86_addps = in_x86_mm_first+21,
-  in_x86_subps = in_x86_mm_first+22,
-  in_x86_mulps = in_x86_mm_first+23,
-  in_x86_divps = in_x86_mm_first+24,
-  in_x86_rcpps = in_x86_mm_first+25,
-  in_x86_sqrtps = in_x86_mm_first+26,
-  in_x86_maxps = in_x86_mm_first+27,
-  in_x86_minps = in_x86_mm_first+28,
-  in_x86_rsqrtps = in_x86_mm_first+29,
-  in_x86_andps = in_x86_mm_first+30,
-  in_x86_orps = in_x86_mm_first+31,
-  in_x86_xorps = in_x86_mm_first+32,
-  in_x86_andnps = in_x86_mm_first+33,
-  in_x86_cmpss = in_x86_mm_first+34,
-  in_x86_cmpps = in_x86_mm_first+35,
-  in_x86_shufps = in_x86_mm_first+36,
-  in_x86_unpckhps = in_x86_mm_first+37,
-  in_x86_unpcklps = in_x86_mm_first+38,
-  in_x86_cvtsi2ss = in_x86_mm_first+39,
-  in_x86_cvtss2si = in_x86_mm_first+40,
-  in_x86_cvttss2si = in_x86_mm_first+41,
-  in_x86_cvtpi2ps = in_x86_mm_first+42,
-  in_x86_cvtps2pi = in_x86_mm_first+43,
-  in_x86_cvttps2pi = in_x86_mm_first+44,
-  in_x86_pmulhuw_mmx = in_x86_mm_first+45,
-  in_x86_psadbw_mmx = in_x86_mm_first+46,
-  in_x86_pavgb_mmx = in_x86_mm_first+47,
-  in_x86_pavgw_mmx = in_x86_mm_first+48,
-  in_x86_pmaxub_mmx = in_x86_mm_first+49,
-  in_x86_pminub_mmx = in_x86_mm_first+50,
-  in_x86_pmaxsw_mmx = in_x86_mm_first+51,
-  in_x86_pminsw_mmx = in_x86_mm_first+52,
-  in_x86_pextrw_mmx = in_x86_mm_first+53,
-  in_x86_pinsrw_mmx = in_x86_mm_first+54,
-  in_x86_pmovmskb = in_x86_mm_first+55,
-  in_x86_pshufw = in_x86_mm_first+56,
-  in_x86_pmulhuw = in_x86_mm_first+57,
-  in_x86_psadbw = in_x86_mm_first+58,
-  in_x86_pavgb = in_x86_mm_first+59,
-  in_x86_pavgw = in_x86_mm_first+60,
-  in_x86_pmaxub = in_x86_mm_first+61,
-  in_x86_pminub = in_x86_mm_first+62,
-  in_x86_pmaxsw = in_x86_mm_first+63,
-  in_x86_pminsw = in_x86_mm_first+64,
-  in_x86_pextrw = in_x86_mm_first+65,
-  in_x86_pinsrw = in_x86_mm_first+66,
-  in_x86mm_last = in_x86_mm_first+66
+  in_x86_movlps_to_mem = in_x86_mm_first+10,
+  in_x86_movhps_to_mem = in_x86_mm_first+11,
+  in_x86_movlhps = in_x86_mm_first+12,
+  in_x86_movhlps = in_x86_mm_first+13,
+  in_x86_addss = in_x86_mm_first+14,
+  in_x86_addss_from_mem = in_x86_mm_first+15,
+  in_x86_subss = in_x86_mm_first+16,
+  in_x86_subss_from_mem = in_x86_mm_first+17,
+  in_x86_mulss = in_x86_mm_first+18,
+  in_x86_mulss_from_mem = in_x86_mm_first+19,
+  in_x86_divss = in_x86_mm_first+20,
+  in_x86_divss_from_mem = in_x86_mm_first+21,
+  in_x86_rcpss = in_x86_mm_first+22,
+  in_x86_rcpss_from_mem = in_x86_mm_first+23,
+  in_x86_sqrtss = in_x86_mm_first+24,
+  in_x86_sqrtss_from_mem = in_x86_mm_first+25,
+  in_x86_maxss = in_x86_mm_first+26,
+  in_x86_maxss_from_mem = in_x86_mm_first+27,
+  in_x86_minss = in_x86_mm_first+28,
+  in_x86_minss_from_mem = in_x86_mm_first+29,
+  in_x86_rsqrtss = in_x86_mm_first+30,
+  in_x86_rsqrtss_from_mem = in_x86_mm_first+31,
+  in_x86_addps = in_x86_mm_first+32,
+  in_x86_addps_from_mem = in_x86_mm_first+33,
+  in_x86_subps = in_x86_mm_first+34,
+  in_x86_subps_from_mem = in_x86_mm_first+35,
+  in_x86_mulps = in_x86_mm_first+36,
+  in_x86_mulps_from_mem = in_x86_mm_first+37,
+  in_x86_divps = in_x86_mm_first+38,
+  in_x86_divps_from_mem = in_x86_mm_first+39,
+  in_x86_rcpps = in_x86_mm_first+40,
+  in_x86_rcpps_from_mem = in_x86_mm_first+41,
+  in_x86_sqrtps = in_x86_mm_first+42,
+  in_x86_sqrtps_from_mem = in_x86_mm_first+43,
+  in_x86_maxps = in_x86_mm_first+44,
+  in_x86_maxps_from_mem = in_x86_mm_first+45,
+  in_x86_minps = in_x86_mm_first+46,
+  in_x86_minps_from_mem = in_x86_mm_first+47,
+  in_x86_rsqrtps = in_x86_mm_first+48,
+  in_x86_rsqrtps_from_mem = in_x86_mm_first+49,
+  in_x86_andps = in_x86_mm_first+50,
+  in_x86_andps_from_mem = in_x86_mm_first+51,
+  in_x86_orps = in_x86_mm_first+52,
+  in_x86_orps_from_mem = in_x86_mm_first+53,
+  in_x86_xorps = in_x86_mm_first+54,
+  in_x86_xorps_from_mem = in_x86_mm_first+55,
+  in_x86_andnps = in_x86_mm_first+56,
+  in_x86_andnps_from_mem = in_x86_mm_first+57,
+  in_x86_cmpss = in_x86_mm_first+58,
+  in_x86_cmpss_from_mem = in_x86_mm_first+59,
+  in_x86_cmpps = in_x86_mm_first+60,
+  in_x86_cmpps_from_mem = in_x86_mm_first+61,
+  in_x86_shufps = in_x86_mm_first+62,
+  in_x86_shufps_from_mem = in_x86_mm_first+63,
+  in_x86_unpckhps = in_x86_mm_first+64,
+  in_x86_unpckhps_from_mem = in_x86_mm_first+65,
+  in_x86_unpcklps = in_x86_mm_first+66,
+  in_x86_unpcklps_from_mem = in_x86_mm_first+67,
+  in_x86_cvtsi2ss = in_x86_mm_first+68,
+  in_x86_cvtsi2ss_from_mem = in_x86_mm_first+69,
+  in_x86_cvtss2si = in_x86_mm_first+70,
+  in_x86_cvtss2si_from_mem = in_x86_mm_first+71,
+  in_x86_cvttss2si = in_x86_mm_first+72,
+  in_x86_cvttss2si_from_mem = in_x86_mm_first+73,
+  in_x86_cvtpi2ps = in_x86_mm_first+74,
+  in_x86_cvtpi2ps_from_mem = in_x86_mm_first+75,
+  in_x86_cvtps2pi = in_x86_mm_first+76,
+  in_x86_cvtps2pi_from_mem = in_x86_mm_first+77,
+  in_x86_cvttps2pi = in_x86_mm_first+78,
+  in_x86_cvttps2pi_from_mem = in_x86_mm_first+79,
+  in_x86_pmulhuw_mmx = in_x86_mm_first+80,
+  in_x86_pmulhuw_mmx_from_mem = in_x86_mm_first+81,
+  in_x86_psadbw_mmx = in_x86_mm_first+82,
+  in_x86_psadbw_mmx_from_mem = in_x86_mm_first+83,
+  in_x86_pavgb_mmx = in_x86_mm_first+84,
+  in_x86_pavgb_mmx_from_mem = in_x86_mm_first+85,
+  in_x86_pavgw_mmx = in_x86_mm_first+86,
+  in_x86_pavgw_mmx_from_mem = in_x86_mm_first+87,
+  in_x86_pmaxub_mmx = in_x86_mm_first+88,
+  in_x86_pmaxub_mmx_from_mem = in_x86_mm_first+89,
+  in_x86_pminub_mmx = in_x86_mm_first+90,
+  in_x86_pminub_mmx_from_mem = in_x86_mm_first+91,
+  in_x86_pmaxsw_mmx = in_x86_mm_first+92,
+  in_x86_pmaxsw_mmx_from_mem = in_x86_mm_first+93,
+  in_x86_pminsw_mmx = in_x86_mm_first+94,
+  in_x86_pminsw_mmx_from_mem = in_x86_mm_first+95,
+  in_x86_pextrw_mmx = in_x86_mm_first+96,
+  in_x86_pinsrw_mmx = in_x86_mm_first+97,
+  in_x86_pmovmskb_mmx = in_x86_mm_first+98,
+  in_x86_pshufw = in_x86_mm_first+99,
+  in_x86_pshufw_from_mem = in_x86_mm_first+100,
+  in_x86_movapd = in_x86_mm_first+101,
+  in_x86_movapd_to_mem = in_x86_mm_first+102,
+  in_x86_movntpd_to_mem = in_x86_mm_first+103,
+  in_x86_movhpd = in_x86_mm_first+104,
+  in_x86_movhpd_to_mem = in_x86_mm_first+105,
+  in_x86_movlpd = in_x86_mm_first+106,
+  in_x86_movlpd_to_mem = in_x86_mm_first+107,
+  in_x86_movupd = in_x86_mm_first+108,
+  in_x86_movupd_to_mem = in_x86_mm_first+109,
+  in_x86_movmskpd = in_x86_mm_first+110,
+  in_x86_movsd_from_mem = in_x86_mm_first+111,
+  in_x86_movsd_to_mem = in_x86_mm_first+112,
+  in_x86_movsd_to_val = in_x86_mm_first+113,
+  in_x86_movsd_from_val = in_x86_mm_first+114,
+  in_x86_addpd = in_x86_mm_first+115,
+  in_x86_addpd_from_mem = in_x86_mm_first+116,
+  in_x86_addsd = in_x86_mm_first+117,
+  in_x86_addsd_from_mem = in_x86_mm_first+118,
+  in_x86_divpd = in_x86_mm_first+119,
+  in_x86_divpd_from_mem = in_x86_mm_first+120,
+  in_x86_divsd = in_x86_mm_first+121,
+  in_x86_divsd_from_mem = in_x86_mm_first+122,
+  in_x86_maxpd = in_x86_mm_first+123,
+  in_x86_maxpd_from_mem = in_x86_mm_first+124,
+  in_x86_maxsd = in_x86_mm_first+125,
+  in_x86_maxsd_from_mem = in_x86_mm_first+126,
+  in_x86_minpd = in_x86_mm_first+127,
+  in_x86_minpd_from_mem = in_x86_mm_first+128,
+  in_x86_minsd = in_x86_mm_first+129,
+  in_x86_minsd_from_mem = in_x86_mm_first+130,
+  in_x86_mulpd = in_x86_mm_first+131,
+  in_x86_mulpd_from_mem = in_x86_mm_first+132,
+  in_x86_mulsd = in_x86_mm_first+133,
+  in_x86_mulsd_from_mem = in_x86_mm_first+134,
+  in_x86_sqrtpd = in_x86_mm_first+135,
+  in_x86_sqrtpd_from_mem = in_x86_mm_first+136,
+  in_x86_sqrtsd = in_x86_mm_first+137,
+  in_x86_sqrtsd_from_mem = in_x86_mm_first+138,
+  in_x86_subpd = in_x86_mm_first+139,
+  in_x86_subpd_from_mem = in_x86_mm_first+140,
+  in_x86_subsd = in_x86_mm_first+141,
+  in_x86_subsd_from_mem = in_x86_mm_first+142,
+  in_x86_andpd = in_x86_mm_first+143,
+  in_x86_andpd_from_mem = in_x86_mm_first+144,
+  in_x86_andnpd = in_x86_mm_first+145,
+  in_x86_andnpd_from_mem = in_x86_mm_first+146,
+  in_x86_orpd = in_x86_mm_first+147,
+  in_x86_orpd_from_mem = in_x86_mm_first+148,
+  in_x86_xorpd = in_x86_mm_first+149,
+  in_x86_xorpd_from_mem = in_x86_mm_first+150,
+  in_x86_cmppd = in_x86_mm_first+151,
+  in_x86_cmppd_from_mem = in_x86_mm_first+152,
+  in_x86_cmpsd = in_x86_mm_first+153,
+  in_x86_cmpsd_from_mem = in_x86_mm_first+154,
+  in_x86_comisd = in_x86_mm_first+155,
+  in_x86_comisd_from_mem = in_x86_mm_first+156,
+  in_x86_ucomisd = in_x86_mm_first+157,
+  in_x86_ucomisd_from_mem = in_x86_mm_first+158,
+  in_x86_shufpd = in_x86_mm_first+159,
+  in_x86_shufpd_from_mem = in_x86_mm_first+160,
+  in_x86_unpckhpd = in_x86_mm_first+161,
+  in_x86_unpckhpd_from_mem = in_x86_mm_first+162,
+  in_x86_unpcklpd = in_x86_mm_first+163,
+  in_x86_unpcklpd_from_mem = in_x86_mm_first+164,
+  in_x86_cvtdq2pd = in_x86_mm_first+165,
+  in_x86_cvtdq2pd_from_mem = in_x86_mm_first+166,
+  in_x86_cvtdq2ps = in_x86_mm_first+167,
+  in_x86_cvtdq2ps_from_mem = in_x86_mm_first+168,
+  in_x86_cvtpd2dq = in_x86_mm_first+169,
+  in_x86_cvtpd2dq_from_mem = in_x86_mm_first+170,
+  in_x86_cvtpd2pi = in_x86_mm_first+171,
+  in_x86_cvtpd2pi_from_mem = in_x86_mm_first+172,
+  in_x86_cvtpd2ps = in_x86_mm_first+173,
+  in_x86_cvtpd2ps_from_mem = in_x86_mm_first+174,
+  in_x86_cvtpi2pd = in_x86_mm_first+175,
+  in_x86_cvtpi2pd_from_mem = in_x86_mm_first+176,
+  in_x86_cvtps2dq = in_x86_mm_first+177,
+  in_x86_cvtps2dq_from_mem = in_x86_mm_first+178,
+  in_x86_cvtps2pd = in_x86_mm_first+179,
+  in_x86_cvtps2pd_from_mem = in_x86_mm_first+180,
+  in_x86_cvtsd2si = in_x86_mm_first+181,
+  in_x86_cvtsd2si_from_mem = in_x86_mm_first+182,
+  in_x86_cvtsd2ss = in_x86_mm_first+183,
+  in_x86_cvtsd2ss_from_mem = in_x86_mm_first+184,
+  in_x86_cvtsi2sd = in_x86_mm_first+185,
+  in_x86_cvtsi2sd_from_mem = in_x86_mm_first+186,
+  in_x86_cvtss2sd = in_x86_mm_first+187,
+  in_x86_cvtss2sd_from_mem = in_x86_mm_first+188,
+  in_x86_cvttpd2dq = in_x86_mm_first+189,
+  in_x86_cvttpd2dq_from_mem = in_x86_mm_first+190,
+  in_x86_cvttpd2pi = in_x86_mm_first+191,
+  in_x86_cvttpd2pi_from_mem = in_x86_mm_first+192,
+  in_x86_cvttps2dq = in_x86_mm_first+193,
+  in_x86_cvttps2dq_from_mem = in_x86_mm_first+194,
+  in_x86_cvttsd2si = in_x86_mm_first+195,
+  in_x86_cvttsd2si_from_mem = in_x86_mm_first+196,
+  in_x86_movd_from_reg = in_x86_mm_first+197,
+  in_x86_movd_from_mem = in_x86_mm_first+198,
+  in_x86_movd_to_reg = in_x86_mm_first+199,
+  in_x86_movd_to_mem = in_x86_mm_first+200,
+  in_x86_movq_from_mem = in_x86_mm_first+201,
+  in_x86_movq_to_mem = in_x86_mm_first+202,
+  in_x86_pmovmskb = in_x86_mm_first+203,
+  in_x86_pextrw_sse2 = in_x86_mm_first+204,
+  in_x86_pinsrw_sse2 = in_x86_mm_first+205,
+  in_x86_pinsrw_from_mem = in_x86_mm_first+206,
+  in_x86_packssdw = in_x86_mm_first+207,
+  in_x86_packssdw_from_mem = in_x86_mm_first+208,
+  in_x86_packsswb = in_x86_mm_first+209,
+  in_x86_packsswb_from_mem = in_x86_mm_first+210,
+  in_x86_packuswb = in_x86_mm_first+211,
+  in_x86_packuswb_from_mem = in_x86_mm_first+212,
+  in_x86_paddb = in_x86_mm_first+213,
+  in_x86_paddb_from_mem = in_x86_mm_first+214,
+  in_x86_paddw = in_x86_mm_first+215,
+  in_x86_paddw_from_mem = in_x86_mm_first+216,
+  in_x86_paddd = in_x86_mm_first+217,
+  in_x86_paddd_from_mem = in_x86_mm_first+218,
+  in_x86_paddq = in_x86_mm_first+219,
+  in_x86_paddq_from_mem = in_x86_mm_first+220,
+  in_x86_paddsb = in_x86_mm_first+221,
+  in_x86_paddsb_from_mem = in_x86_mm_first+222,
+  in_x86_paddsw = in_x86_mm_first+223,
+  in_x86_paddsw_from_mem = in_x86_mm_first+224,
+  in_x86_paddusb = in_x86_mm_first+225,
+  in_x86_paddusb_from_mem = in_x86_mm_first+226,
+  in_x86_paddusw = in_x86_mm_first+227,
+  in_x86_paddusw_from_mem = in_x86_mm_first+228,
+  in_x86_pand = in_x86_mm_first+229,
+  in_x86_pand_from_mem = in_x86_mm_first+230,
+  in_x86_pandn = in_x86_mm_first+231,
+  in_x86_pandn_from_mem = in_x86_mm_first+232,
+  in_x86_por = in_x86_mm_first+233,
+  in_x86_por_from_mem = in_x86_mm_first+234,
+  in_x86_pxor = in_x86_mm_first+235,
+  in_x86_pxor_from_mem = in_x86_mm_first+236,
+  in_x86_pcmpeqb = in_x86_mm_first+237,
+  in_x86_pcmpeqb_from_mem = in_x86_mm_first+238,
+  in_x86_pcmpeqw = in_x86_mm_first+239,
+  in_x86_pcmpeqw_from_mem = in_x86_mm_first+240,
+  in_x86_pcmpeqd = in_x86_mm_first+241,
+  in_x86_pcmpeqd_from_mem = in_x86_mm_first+242,
+  in_x86_pcmpgtb = in_x86_mm_first+243,
+  in_x86_pcmpgtb_from_mem = in_x86_mm_first+244,
+  in_x86_pcmpgtw = in_x86_mm_first+245,
+  in_x86_pcmpgtw_from_mem = in_x86_mm_first+246,
+  in_x86_pcmpgtd = in_x86_mm_first+247,
+  in_x86_pcmpgtd_from_mem = in_x86_mm_first+248,
+  in_x86_pmullw = in_x86_mm_first+249,
+  in_x86_pmullw_from_mem = in_x86_mm_first+250,
+  in_x86_pmulhw = in_x86_mm_first+251,
+  in_x86_pmulhw_from_mem = in_x86_mm_first+252,
+  in_x86_pmulhuw_sse2 = in_x86_mm_first+253,
+  in_x86_pmulhuw_from_mem = in_x86_mm_first+254,
+  in_x86_pmuludq = in_x86_mm_first+255,
+  in_x86_pmuludq_from_mem = in_x86_mm_first+256,
+  in_x86_psllw_sse2 = in_x86_mm_first+257,
+  in_x86_psllw_from_mem = in_x86_mm_first+258,
+  in_x86_psllw_sse2_imm = in_x86_mm_first+259,
+  in_x86_pslld_sse2 = in_x86_mm_first+260,
+  in_x86_pslld_from_mem = in_x86_mm_first+261,
+  in_x86_pslld_sse2_imm = in_x86_mm_first+262,
+  in_x86_psllq_sse2 = in_x86_mm_first+263,
+  in_x86_psllq_from_mem = in_x86_mm_first+264,
+  in_x86_psllq_sse2_imm = in_x86_mm_first+265,
+  in_x86_psrad_sse2 = in_x86_mm_first+266,
+  in_x86_psrad_from_mem = in_x86_mm_first+267,
+  in_x86_psrad_sse2_imm = in_x86_mm_first+268,
+  in_x86_psraw_sse2 = in_x86_mm_first+269,
+  in_x86_psraw_from_mem = in_x86_mm_first+270,
+  in_x86_psraw_sse2_imm = in_x86_mm_first+271,
+  in_x86_psrlw_sse2 = in_x86_mm_first+272,
+  in_x86_psrlw_from_mem = in_x86_mm_first+273,
+  in_x86_psrlw_sse2_imm = in_x86_mm_first+274,
+  in_x86_psrld_sse2 = in_x86_mm_first+275,
+  in_x86_psrld_from_mem = in_x86_mm_first+276,
+  in_x86_psrld_sse2_imm = in_x86_mm_first+277,
+  in_x86_psrlq_sse2 = in_x86_mm_first+278,
+  in_x86_psrlq_from_mem = in_x86_mm_first+279,
+  in_x86_psrlq_sse2_imm = in_x86_mm_first+280,
+  in_x86_psubb = in_x86_mm_first+281,
+  in_x86_psubb_from_mem = in_x86_mm_first+282,
+  in_x86_psubw = in_x86_mm_first+283,
+  in_x86_psubw_from_mem = in_x86_mm_first+284,
+  in_x86_psubd = in_x86_mm_first+285,
+  in_x86_psubd_from_mem = in_x86_mm_first+286,
+  in_x86_psubq = in_x86_mm_first+287,
+  in_x86_psubq_from_mem = in_x86_mm_first+288,
+  in_x86_psubsb = in_x86_mm_first+289,
+  in_x86_psubsb_from_mem = in_x86_mm_first+290,
+  in_x86_psubsw = in_x86_mm_first+291,
+  in_x86_psubsw_from_mem = in_x86_mm_first+292,
+  in_x86_pmaddwd = in_x86_mm_first+293,
+  in_x86_pmaddwd_from_mem = in_x86_mm_first+294,
+  in_x86_psubusb = in_x86_mm_first+295,
+  in_x86_psubusb_from_mem = in_x86_mm_first+296,
+  in_x86_psubusw = in_x86_mm_first+297,
+  in_x86_psubusw_from_mem = in_x86_mm_first+298,
+  in_x86_punpckhbw = in_x86_mm_first+299,
+  in_x86_punpckhbw_from_mem = in_x86_mm_first+300,
+  in_x86_punpckhwd = in_x86_mm_first+301,
+  in_x86_punpckhwd_from_mem = in_x86_mm_first+302,
+  in_x86_punpckhdq = in_x86_mm_first+303,
+  in_x86_punpckhdq_from_mem = in_x86_mm_first+304,
+  in_x86_punpcklbw = in_x86_mm_first+305,
+  in_x86_punpcklbw_from_mem = in_x86_mm_first+306,
+  in_x86_punpcklwd = in_x86_mm_first+307,
+  in_x86_punpcklwd_from_mem = in_x86_mm_first+308,
+  in_x86_punpckldq = in_x86_mm_first+309,
+  in_x86_punpckldq_from_mem = in_x86_mm_first+310,
+  in_x86_pavgb_sse2 = in_x86_mm_first+311,
+  in_x86_pavgb_from_mem = in_x86_mm_first+312,
+  in_x86_pavgw_sse2 = in_x86_mm_first+313,
+  in_x86_pavgw_from_mem = in_x86_mm_first+314,
+  in_x86_pminub_sse2 = in_x86_mm_first+315,
+  in_x86_pminub_from_mem = in_x86_mm_first+316,
+  in_x86_pminsw_sse2 = in_x86_mm_first+317,
+  in_x86_pminsw_from_mem = in_x86_mm_first+318,
+  in_x86_pmaxsw_sse2 = in_x86_mm_first+319,
+  in_x86_pmaxsw_from_mem = in_x86_mm_first+320,
+  in_x86_pmaxub_sse2 = in_x86_mm_first+321,
+  in_x86_pmaxub_from_mem = in_x86_mm_first+322,
+  in_x86_psadbw_sse2 = in_x86_mm_first+323,
+  in_x86_psadbw_from_mem = in_x86_mm_first+324,
+  in_x86_maskmovdqu = in_x86_mm_first+325,
+  in_x86_movdq2q = in_x86_mm_first+326,
+  in_x86_movdqa_from_mem = in_x86_mm_first+327,
+  in_x86_movdqa = in_x86_mm_first+328,
+  in_x86_movdqu_from_mem = in_x86_mm_first+329,
+  in_x86_movdqu = in_x86_mm_first+330,
+  in_x86_movq2dq = in_x86_mm_first+331,
+  in_x86_movntdq = in_x86_mm_first+332,
+  in_x86_pshufhw = in_x86_mm_first+333,
+  in_x86_pshuflw = in_x86_mm_first+334,
+  in_x86_pshufd = in_x86_mm_first+335,
+  in_x86_pshufhw_from_mem = in_x86_mm_first+336,
+  in_x86_pshuflw_from_mem = in_x86_mm_first+337,
+  in_x86_pshufd_from_mem = in_x86_mm_first+338,
+  in_x86_pslldq = in_x86_mm_first+339,
+  in_x86_psrldq = in_x86_mm_first+340,
+  in_x86_punpckhqdq = in_x86_mm_first+341,
+  in_x86_punpckhqdq_from_mem = in_x86_mm_first+342,
+  in_x86_punpcklqdq = in_x86_mm_first+343,
+  in_x86_punpcklqdq_from_mem = in_x86_mm_first+344,
+  in_x86_addsubps = in_x86_mm_first+345,
+  in_x86_addsubps_from_mem = in_x86_mm_first+346,
+  in_x86_addsubpd = in_x86_mm_first+347,
+  in_x86_addsubpd_from_mem = in_x86_mm_first+348,
+  in_x86_movddup = in_x86_mm_first+349,
+  in_x86_movddup_from_mem = in_x86_mm_first+350,
+  in_x86_movsldup = in_x86_mm_first+351,
+  in_x86_movsldup_from_mem = in_x86_mm_first+352,
+  in_x86_movshdup = in_x86_mm_first+353,
+  in_x86_movshdup_from_mem = in_x86_mm_first+354,
+  in_x86_haddps = in_x86_mm_first+355,
+  in_x86_haddps_from_mem = in_x86_mm_first+356,
+  in_x86_haddpd = in_x86_mm_first+357,
+  in_x86_haddpd_from_mem = in_x86_mm_first+358,
+  in_x86_hsubps = in_x86_mm_first+359,
+  in_x86_hsubps_from_mem = in_x86_mm_first+360,
+  in_x86_hsubpd = in_x86_mm_first+361,
+  in_x86_hsubpd_from_mem = in_x86_mm_first+362,
+  in_x86_lddqu = in_x86_mm_first+363,
+  in_x86_psignb = in_x86_mm_first+364,
+  in_x86_psignb_from_mem = in_x86_mm_first+365,
+  in_x86_psignw = in_x86_mm_first+366,
+  in_x86_psignw_from_mem = in_x86_mm_first+367,
+  in_x86_psignd = in_x86_mm_first+368,
+  in_x86_psignd_from_mem = in_x86_mm_first+369,
+  in_x86_pshufb = in_x86_mm_first+370,
+  in_x86_pshufb_from_mem = in_x86_mm_first+371,
+  in_x86_pmulhrsw = in_x86_mm_first+372,
+  in_x86_pmulhrsw_from_mem = in_x86_mm_first+373,
+  in_x86_pmaddubsw = in_x86_mm_first+374,
+  in_x86_pmaddubsw_from_mem = in_x86_mm_first+375,
+  in_x86_phsubw = in_x86_mm_first+376,
+  in_x86_phsubw_from_mem = in_x86_mm_first+377,
+  in_x86_phsubsw = in_x86_mm_first+378,
+  in_x86_phsubsw_from_mem = in_x86_mm_first+379,
+  in_x86_phsubd = in_x86_mm_first+380,
+  in_x86_phsubd_from_mem = in_x86_mm_first+381,
+  in_x86_phaddsw = in_x86_mm_first+382,
+  in_x86_phaddsw_from_mem = in_x86_mm_first+383,
+  in_x86_phaddw = in_x86_mm_first+384,
+  in_x86_phaddw_from_mem = in_x86_mm_first+385,
+  in_x86_phaddd = in_x86_mm_first+386,
+  in_x86_phaddd_from_mem = in_x86_mm_first+387,
+  in_x86_palignr = in_x86_mm_first+388,
+  in_x86_palignr_from_mem = in_x86_mm_first+389,
+  in_x86_pabsb = in_x86_mm_first+390,
+  in_x86_pabsb_from_mem = in_x86_mm_first+391,
+  in_x86_pabsw = in_x86_mm_first+392,
+  in_x86_pabsw_from_mem = in_x86_mm_first+393,
+  in_x86_pabsd = in_x86_mm_first+394,
+  in_x86_pabsd_from_mem = in_x86_mm_first+395,
+  in_x86_dpps = in_x86_mm_first+396,
+  in_x86_dpps_from_mem = in_x86_mm_first+397,
+  in_x86_dppd = in_x86_mm_first+398,
+  in_x86_dppd_from_mem = in_x86_mm_first+399,
+  in_x86_blendps = in_x86_mm_first+400,
+  in_x86_blendps_from_mem = in_x86_mm_first+401,
+  in_x86_blendvps = in_x86_mm_first+402,
+  in_x86_blendvps_from_mem = in_x86_mm_first+403,
+  in_x86_blendpd = in_x86_mm_first+404,
+  in_x86_blendpd_from_mem = in_x86_mm_first+405,
+  in_x86_blendvpd = in_x86_mm_first+406,
+  in_x86_blendvpd_from_mem = in_x86_mm_first+407,
+  in_x86_roundps = in_x86_mm_first+408,
+  in_x86_roundps_from_mem = in_x86_mm_first+409,
+  in_x86_roundss = in_x86_mm_first+410,
+  in_x86_roundss_from_mem = in_x86_mm_first+411,
+  in_x86_roundpd = in_x86_mm_first+412,
+  in_x86_roundpd_from_mem = in_x86_mm_first+413,
+  in_x86_roundsd = in_x86_mm_first+414,
+  in_x86_roundsd_from_mem = in_x86_mm_first+415,
+  in_x86_insertps = in_x86_mm_first+416,
+  in_x86_insertps_from_mem = in_x86_mm_first+417,
+  in_x86_extractps = in_x86_mm_first+418,
+  in_x86_extractps_from_mem = in_x86_mm_first+419,
+  in_x86_mpsadbw = in_x86_mm_first+420,
+  in_x86_mpsadbw_from_mem = in_x86_mm_first+421,
+  in_x86_phminposuw = in_x86_mm_first+422,
+  in_x86_phminposuw_from_mem = in_x86_mm_first+423,
+  in_x86_pmulld = in_x86_mm_first+424,
+  in_x86_pmulld_from_mem = in_x86_mm_first+425,
+  in_x86_pmuldq = in_x86_mm_first+426,
+  in_x86_pmuldq_from_mem = in_x86_mm_first+427,
+  in_x86_pblendvb = in_x86_mm_first+428,
+  in_x86_pblendvb_from_mem = in_x86_mm_first+429,
+  in_x86_pblendw = in_x86_mm_first+430,
+  in_x86_pblendw_from_mem = in_x86_mm_first+431,
+  in_x86_pminsb = in_x86_mm_first+432,
+  in_x86_pminsb_from_mem = in_x86_mm_first+433,
+  in_x86_pminuw = in_x86_mm_first+434,
+  in_x86_pminuw_from_mem = in_x86_mm_first+435,
+  in_x86_pminsd = in_x86_mm_first+436,
+  in_x86_pminsd_from_mem = in_x86_mm_first+437,
+  in_x86_pminud = in_x86_mm_first+438,
+  in_x86_pminud_from_mem = in_x86_mm_first+439,
+  in_x86_pmaxsb = in_x86_mm_first+440,
+  in_x86_pmaxsb_from_mem = in_x86_mm_first+441,
+  in_x86_pmaxuw = in_x86_mm_first+442,
+  in_x86_pmaxuw_from_mem = in_x86_mm_first+443,
+  in_x86_pmaxsd = in_x86_mm_first+444,
+  in_x86_pmaxsd_from_mem = in_x86_mm_first+445,
+  in_x86_pmaxud = in_x86_mm_first+446,
+  in_x86_pmaxud_from_mem = in_x86_mm_first+447,
+  in_x86_pinsrb = in_x86_mm_first+448,
+  in_x86_pinsrb_from_mem = in_x86_mm_first+449,
+  in_x86_pinsrd = in_x86_mm_first+450,
+  in_x86_pinsrd_from_mem = in_x86_mm_first+451,
+  in_x86_pinsrq = in_x86_mm_first+452,
+  in_x86_pinsrq_from_mem = in_x86_mm_first+453,
+  in_x86_pextrb = in_x86_mm_first+454,
+  in_x86_pextrb_to_mem = in_x86_mm_first+455,
+  in_x86_pextrw_sse41_to_mem = in_x86_mm_first+456,
+  in_x86_pextrd = in_x86_mm_first+457,
+  in_x86_pextrd_to_mem = in_x86_mm_first+458,
+  in_x86_pextrq = in_x86_mm_first+459,
+  in_x86_pextrq_to_mem = in_x86_mm_first+460,
+  in_x86_pmovsxbw = in_x86_mm_first+461,
+  in_x86_pmovsxbw_from_mem = in_x86_mm_first+462,
+  in_x86_pmovzxbw = in_x86_mm_first+463,
+  in_x86_pmovzxbw_from_mem = in_x86_mm_first+464,
+  in_x86_pmovsxbd = in_x86_mm_first+465,
+  in_x86_pmovsxbd_from_mem = in_x86_mm_first+466,
+  in_x86_pmovzxbd = in_x86_mm_first+467,
+  in_x86_pmovzxbd_from_mem = in_x86_mm_first+468,
+  in_x86_pmovsxbq = in_x86_mm_first+469,
+  in_x86_pmovsxbq_from_mem = in_x86_mm_first+470,
+  in_x86_pmovzxbq = in_x86_mm_first+471,
+  in_x86_pmovzxbq_from_mem = in_x86_mm_first+472,
+  in_x86_pmovsxwd = in_x86_mm_first+473,
+  in_x86_pmovsxwd_from_mem = in_x86_mm_first+474,
+  in_x86_pmovzxwd = in_x86_mm_first+475,
+  in_x86_pmovzxwd_from_mem = in_x86_mm_first+476,
+  in_x86_pmovsxwq = in_x86_mm_first+477,
+  in_x86_pmovsxwq_from_mem = in_x86_mm_first+478,
+  in_x86_pmovzxwq = in_x86_mm_first+479,
+  in_x86_pmovzxwq_from_mem = in_x86_mm_first+480,
+  in_x86_pmovsxdq = in_x86_mm_first+481,
+  in_x86_pmovsxdq_from_mem = in_x86_mm_first+482,
+  in_x86_pmovzxdq = in_x86_mm_first+483,
+  in_x86_pmovzxdq_from_mem = in_x86_mm_first+484,
+  in_x86_ptest = in_x86_mm_first+485,
+  in_x86_ptest_from_mem = in_x86_mm_first+486,
+  in_x86_pcmpeqq = in_x86_mm_first+487,
+  in_x86_pcmpeqq_from_mem = in_x86_mm_first+488,
+  in_x86_packusdw = in_x86_mm_first+489,
+  in_x86_packusdw_from_mem = in_x86_mm_first+490,
+  in_x86_movntdqa = in_x86_mm_first+491,
+  in_x86_pcmpestri = in_x86_mm_first+492,
+  in_x86_pcmpestri_from_mem = in_x86_mm_first+493,
+  in_x86_pcmpestrm = in_x86_mm_first+494,
+  in_x86_pcmpestrm_from_mem = in_x86_mm_first+495,
+  in_x86_pcmpistri = in_x86_mm_first+496,
+  in_x86_pcmpistri_from_mem = in_x86_mm_first+497,
+  in_x86_pcmpistrm = in_x86_mm_first+498,
+  in_x86_pcmpistrm_from_mem = in_x86_mm_first+499,
+  in_x86_pcmpgtq = in_x86_mm_first+500,
+  in_x86_pcmpgtq_from_mem = in_x86_mm_first+501,
+  in_x86mm_last = in_x86_mm_first+501

+ 488 - 24
compiler/x86/x86intr.dat

@@ -1,81 +1,545 @@
-movss(out r0: xmm; r1: ptr32)
-movaps(out r0: xmm; r1: ptr32)
-movups(out r0: xmm; r1: ptr32)
+; SSE
+movss(out r0: xmm;  r1: ptr32)
+movaps(out r0: xmm; r1: ptr128)
+movups(out r0: xmm; r1: ptr128)
 
 
-movss[to_mem](r0: ptr32; r1: xmm)
-movaps[to_mem](r0: ptr32; r1: xmm)
-movups[to_mem](r0: ptr32; r1: xmm)
+movss[to_mem](r0:  ptr32; r1: xmm)
+movaps[to_mem](r0: ptr128; r1: xmm)
+movups[to_mem](r0: ptr128; r1: xmm)
 
 
 movss[to_val](out r0: f32; r1: xmm)
 movss[to_val](out r0: f32; r1: xmm)
 movss[from_val](out r0: xmm; r1: f32)
 movss[from_val](out r0: xmm; r1: f32)
 
 
-movlps(var r0: xmm; r1: ptr32)
-movhps(var r0: xmm; r1: ptr32)
+movlps(var r0: xmm; r1: ptr64)
+movhps(var r0: xmm; r1: ptr64)
+movlps[to_mem](r0: ptr64; r1: xmm)
+movhps[to_mem](r0: ptr64; r1: xmm)
 
 
 movlhps(var r0: xmm; r1: xmm)
 movlhps(var r0: xmm; r1: xmm)
 movhlps(var r0: xmm; r1: xmm)
 movhlps(var r0: xmm; r1: xmm)
 
 
 addss(var r0: xmm; r1: xmm)
 addss(var r0: xmm; r1: xmm)
+addss[from_mem](var r0: xmm; r1: ptr32)
 subss(var r0: xmm; r1: xmm)
 subss(var r0: xmm; r1: xmm)
+subss[from_mem](var r0: xmm; r1: ptr32)
 mulss(var r0: xmm; r1: xmm)
 mulss(var r0: xmm; r1: xmm)
+mulss[from_mem](var r0: xmm; r1: ptr32)
 divss(var r0: xmm; r1: xmm)
 divss(var r0: xmm; r1: xmm)
+divss[from_mem](var r0: xmm; r1: ptr32)
 rcpss(var r0: xmm; r1: xmm)
 rcpss(var r0: xmm; r1: xmm)
+rcpss[from_mem](var r0: xmm; r1: ptr32)
 sqrtss(var r0: xmm; r1: xmm)
 sqrtss(var r0: xmm; r1: xmm)
+sqrtss[from_mem](var r0: xmm; r1: ptr32)
 maxss(var r0: xmm; r1: xmm)
 maxss(var r0: xmm; r1: xmm)
+maxss[from_mem](var r0: xmm; r1: ptr32)
 minss(var r0: xmm; r1: xmm)
 minss(var r0: xmm; r1: xmm)
+minss[from_mem](var r0: xmm; r1: ptr32)
 rsqrtss(var r0: xmm; r1: xmm)
 rsqrtss(var r0: xmm; r1: xmm)
+rsqrtss[from_mem](var r0: xmm; r1: ptr32)
 
 
 addps(var r0: xmm; r1: xmm)
 addps(var r0: xmm; r1: xmm)
+addps[from_mem](var r0: xmm; r1: ptr128)
 subps(var r0: xmm; r1: xmm)
 subps(var r0: xmm; r1: xmm)
+subps[from_mem](var r0: xmm; r1: ptr128)
 mulps(var r0: xmm; r1: xmm)
 mulps(var r0: xmm; r1: xmm)
+mulps[from_mem](var r0: xmm; r1: ptr128)
 divps(var r0: xmm; r1: xmm)
 divps(var r0: xmm; r1: xmm)
+divps[from_mem](var r0: xmm; r1: ptr128)
 rcpps(var r0: xmm; r1: xmm)
 rcpps(var r0: xmm; r1: xmm)
+rcpps[from_mem](var r0: xmm; r1: ptr128)
 sqrtps(var r0: xmm; r1: xmm)
 sqrtps(var r0: xmm; r1: xmm)
+sqrtps[from_mem](var r0: xmm; r1: ptr128)
 maxps(var r0: xmm; r1: xmm)
 maxps(var r0: xmm; r1: xmm)
+maxps[from_mem](var r0: xmm; r1: ptr128)
 minps(var r0: xmm; r1: xmm)
 minps(var r0: xmm; r1: xmm)
+minps[from_mem](var r0: xmm; r1: ptr128)
 rsqrtps(var r0: xmm; r1: xmm)
 rsqrtps(var r0: xmm; r1: xmm)
+rsqrtps[from_mem](var r0: xmm; r1: ptr128)
 
 
 andps(var r0: xmm; r1: xmm)
 andps(var r0: xmm; r1: xmm)
+andps[from_mem](var r0: xmm; r1: ptr128)
 orps(var r0: xmm; r1: xmm)
 orps(var r0: xmm; r1: xmm)
+orps[from_mem](var r0: xmm; r1: ptr128)
 xorps(var r0: xmm; r1: xmm)
 xorps(var r0: xmm; r1: xmm)
+xorps[from_mem](var r0: xmm; r1: ptr128)
 andnps(var r0: xmm; r1: xmm)
 andnps(var r0: xmm; r1: xmm)
+andnps[from_mem](var r0: xmm; r1: ptr128)
 
 
 cmpss(var r0: xmm; r1: xmm; imm: i32)             (imm in [0..7])
 cmpss(var r0: xmm; r1: xmm; imm: i32)             (imm in [0..7])
+cmpss[from_mem](var r0: xmm; r1: ptr32; imm: i32)           (imm in [0..7])
 cmpps(var r0: xmm; r1: xmm; imm: i32)             (imm in [0..7])
 cmpps(var r0: xmm; r1: xmm; imm: i32)             (imm in [0..7])
+cmpps[from_mem](var r0: xmm; r1: ptr128; imm: i32)          (imm in [0..7])
 
 
 shufps(var r0: xmm; r1: xmm; imm: i32)            (imm in [0..$ff])
 shufps(var r0: xmm; r1: xmm; imm: i32)            (imm in [0..$ff])
+shufps[from_mem](var r0: xmm; r1: ptr128; imm: i32)            (imm in [0..$ff])
 unpckhps(var r0: xmm; r1: xmm)
 unpckhps(var r0: xmm; r1: xmm)
+unpckhps[from_mem](var r0: xmm; r1: ptr128)
 unpcklps(var r0: xmm; r1: xmm)
 unpcklps(var r0: xmm; r1: xmm)
+unpcklps[from_mem](var r0: xmm; r1: ptr128)
 
 
-cvtsi2ss(var r0: xmm; r1: r32)
-cvtss2si(out r0: r32; r1: xmm)
-cvttss2si(out r0: r32; r1: xmm)
+cvtsi2ss(var r0: xmm; r1: reg)
+cvtsi2ss[from_mem](var r0: xmm; r1: ptr32)
+cvtss2si(out r0: reg; r1: xmm)
+cvtss2si[from_mem](out r0: reg; r1: ptr32)
+cvttss2si(out r0: reg; r1: xmm)
+cvttss2si[from_mem](out r0: reg; r1: ptr32)
 
 
 cvtpi2ps(var r0: xmm; r1: mm)
 cvtpi2ps(var r0: xmm; r1: mm)
+cvtpi2ps[from_mem](var r0: xmm; r1: ptr64)
 cvtps2pi(out r0: mm; r1: xmm)
 cvtps2pi(out r0: mm; r1: xmm)
+cvtps2pi[from_mem](out r0: mm; r1: ptr64)
 cvttps2pi(out r0: mm; r1: xmm)
 cvttps2pi(out r0: mm; r1: xmm)
+cvttps2pi[from_mem](out r0: mm; r1: ptr64)
 
 
 pmulhuw[mmx](var r0: mm; r1: mm)
 pmulhuw[mmx](var r0: mm; r1: mm)
+pmulhuw[mmx_from_mem](var r0: mm; r1: ptr64)
 psadbw[mmx](var r0: mm; r1: mm)
 psadbw[mmx](var r0: mm; r1: mm)
+psadbw[mmx_from_mem](var r0: mm; r1: ptr64)
 pavgb[mmx](var r0: mm; r1: mm)
 pavgb[mmx](var r0: mm; r1: mm)
+pavgb[mmx_from_mem](var r0: mm; r1: ptr64)
 pavgw[mmx](var r0: mm; r1: mm)
 pavgw[mmx](var r0: mm; r1: mm)
+pavgw[mmx_from_mem](var r0: mm; r1: ptr64)
 pmaxub[mmx](var r0: mm; r1: mm)
 pmaxub[mmx](var r0: mm; r1: mm)
+pmaxub[mmx_from_mem](var r0: mm; r1: ptr64)
 pminub[mmx](var r0: mm; r1: mm)
 pminub[mmx](var r0: mm; r1: mm)
+pminub[mmx_from_mem](var r0: mm; r1: ptr64)
 pmaxsw[mmx](var r0: mm; r1: mm)
 pmaxsw[mmx](var r0: mm; r1: mm)
+pmaxsw[mmx_from_mem](var r0: mm; r1: ptr64)
 pminsw[mmx](var r0: mm; r1: mm)
 pminsw[mmx](var r0: mm; r1: mm)
-pextrw[mmx](out r0: r32; r1: mm; imm: i32)             (imm in [0..3])
-pinsrw[mmx](var r0: mm; r1: r32; imm: i32)             (imm in [0..3])
+pminsw[mmx_from_mem](var r0: mm; r1: ptr64)
+pextrw[mmx](out r0: reg; r1: mm; imm: i32)             (imm in [0..3])
+pinsrw[mmx](var r0: mm; r1: reg; imm: i32)             (imm in [0..3])
 
 
-pmovmskb(out r0: r32; r1: mm)
+pmovmskb[mmx](out r0: reg; r1: mm)
 pshufw(out r0: mm; r1: mm; imm: i32)                   (imm in [0..$ff])
 pshufw(out r0: mm; r1: mm; imm: i32)                   (imm in [0..$ff])
+pshufw[from_mem](out r0: mm; r1: ptr64; imm: i32)                   (imm in [0..$ff])
 
 
-pmulhuw(var r0: xmm; r1: xmm)
-psadbw(var r0: xmm; r1: xmm)
-pavgb(var r0: xmm; r1: xmm)
-pavgw(var r0: xmm; r1: xmm)
-pmaxub(var r0: xmm; r1: xmm)
-pminub(var r0: xmm; r1: xmm)
-pmaxsw(var r0: xmm; r1: xmm)
-pminsw(var r0: xmm; r1: xmm)
-pextrw(out r0: r32; r1: xmm; imm: i32)            (imm in [0..7])
-pinsrw(var r0: xmm; r1: r32; imm: i32)            (imm in [0..7])         
+; SSE2 data movement instructions
+movapd(out r0: xmm; r1: ptr32)
+movapd[to_mem](r0: ptr32; r1: xmm)
+movntpd[to_mem](r0: ptr32; r1: xmm)
+movhpd(out r0: xmm; r1: ptr32)
+movhpd[to_mem](r0: ptr32; r1: xmm)
+movlpd(out r0: xmm; r1: ptr32)
+movlpd[to_mem](r0: ptr32; r1: xmm)
+movupd(out r0: xmm; r1: ptr32)
+movupd[to_mem](r0: ptr32; r1: xmm)
+movmskpd(out r0: r32; r1: xmm)
+movsd[from_mem](out r0: xmm; r1: ptr32)
+movsd[to_mem](r0: ptr32; r1: xmm)
+
+movsd[to_val](out r0: f64; r1: xmm)
+movsd[from_val](out r0: xmm; r1: f64)
+
+; SSE2 packed arithmetic instructions
+addpd(var r0: xmm; r1: xmm)
+addpd[from_mem](var r0: xmm; r1: ptr128)
+addsd(var r0: xmm; r1: xmm)
+addsd[from_mem](var r0: xmm; r1: ptr64)
+divpd(var r0: xmm; r1: xmm)
+divpd[from_mem](var r0: xmm; r1: ptr128)
+divsd(var r0: xmm; r1: xmm)
+divsd[from_mem](var r0: xmm; r1: ptr64)
+maxpd(var r0: xmm; r1: xmm)
+maxpd[from_mem](var r0: xmm; r1: ptr128)
+maxsd(var r0: xmm; r1: xmm)
+maxsd[from_mem](var r0: xmm; r1: ptr64)
+minpd(var r0: xmm; r1: xmm)
+minpd[from_mem](var r0: xmm; r1: ptr128)
+minsd(var r0: xmm; r1: xmm)
+minsd[from_mem](var r0: xmm; r1: ptr64)
+mulpd(var r0: xmm; r1: xmm)
+mulpd[from_mem](var r0: xmm; r1: ptr128)
+mulsd(var r0: xmm; r1: xmm)
+mulsd[from_mem](var r0: xmm; r1: ptr64)
+sqrtpd(out r0: xmm; r1: xmm)
+sqrtpd[from_mem](out r0: xmm; r1: ptr128)
+sqrtsd(out r0: xmm; r1: xmm)
+sqrtsd[from_mem](out r0: xmm; r1: ptr64)
+subpd(var r0: xmm; r1: xmm)
+subpd[from_mem](var r0: xmm; r1: ptr128)
+subsd(var r0: xmm; r1: xmm)
+subsd[from_mem](var r0: xmm; r1: ptr64)
+
+; SSE2 logical instructions
+andpd(var r0: xmm; r1: xmm)
+andpd[from_mem](var r0: xmm; r1: ptr128)
+andnpd(var r0: xmm; r1: xmm)
+andnpd[from_mem](var r0: xmm; r1: ptr128)
+orpd(var r0: xmm; r1: xmm)
+orpd[from_mem](var r0: xmm; r1: ptr128)
+xorpd(var r0: xmm; r1: xmm)
+xorpd[from_mem](var r0: xmm; r1: ptr128)
+
+; SSE2 compare instructions
+cmppd(var r0: xmm; r1: xmm; imm: i32)
+cmppd[from_mem](var r0: xmm; r1: ptr128; imm: i32)
+cmpsd(var r0: xmm; r1: xmm; imm: i32)
+cmpsd[from_mem](var r0: xmm; r1: ptr64; imm: i32)
+comisd(var r0: xmm; r1: xmm)
+comisd[from_mem](var r0: xmm; r1: ptr64)
+ucomisd(var r0: xmm; r1: xmm)
+ucomisd[from_mem](var r0: xmm; r1: ptr64)
+
+; SSE2 shuffle and unpack instructions
+shufpd(var r0: xmm; r1: xmm; imm: i32)
+shufpd[from_mem](var r0: xmm; r1: ptr128; imm: i32)
+unpckhpd(var r0: xmm; r1: xmm)
+unpckhpd[from_mem](var r0: xmm; r1: ptr128)
+unpcklpd(var r0: xmm; r1: xmm)
+unpcklpd[from_mem](var r0: xmm; r1: ptr128)
+
+; SSE2 conversion instructions
+cvtdq2pd(var r0: xmm; r1: xmm)
+cvtdq2pd[from_mem](var r0: xmm; r1: ptr64)
+cvtdq2ps(var r0: xmm; r1: xmm)
+cvtdq2ps[from_mem](var r0: xmm; r1: ptr128)
+cvtpd2dq(var r0: xmm; r1: xmm)
+cvtpd2dq[from_mem](var r0: xmm; r1: ptr128)
+cvtpd2pi(var r0: mm; r1: xmm)
+cvtpd2pi[from_mem](var r0: mm; r1: ptr128)
+cvtpd2ps(var r0: xmm; r1: xmm)
+cvtpd2ps[from_mem](var r0: xmm; r1: ptr128)
+cvtpi2pd(var r0: xmm; r1: mm)
+cvtpi2pd[from_mem](var r0: xmm; r1: ptr64)
+cvtps2dq(var r0: xmm; r1: xmm)
+cvtps2dq[from_mem](var r0: xmm; r1: ptr128)
+cvtps2pd(var r0: xmm; r1: xmm)
+cvtps2pd[from_mem](var r0: xmm; r1: ptr64)
+cvtsd2si(var r0: sreg; r1: xmm)
+cvtsd2si[from_mem](var r0: sreg; r1: ptr64)
+cvtsd2ss(var r0: xmm; r1: xmm)
+cvtsd2ss[from_mem](var r0: xmm; r1: ptr64)
+cvtsi2sd(var r0: xmm; r1: r32)
+cvtsi2sd[from_mem](var r0: xmm; r1: ptr32)
+cvtss2sd(var r0: xmm; r1: xmm)
+cvtss2sd[from_mem](var r0: xmm; r1: ptr32)
+cvttpd2dq(var r0: xmm; r1: xmm)
+cvttpd2dq[from_mem](var r0: xmm; r1: ptr128)
+cvttpd2pi(var r0: mm; r1: xmm)
+cvttpd2pi[from_mem](var r0: mm; r1: ptr128)
+cvttps2dq(var r0: xmm; r1: xmm)
+cvttps2dq[from_mem](var r0: xmm; r1: ptr128)
+cvttsd2si(var r0: sreg; r1: xmm)
+cvttsd2si[from_mem](var r0: sreg; r1: ptr64)
+
+; SSE2 MMX-like instructions
+movd[from_reg](out r0: xmm; r1: r32)
+movd[from_mem](out r0: xmm; r1: ptr32)
+movd[to_reg](out r0: r32; r1: xmm)
+movd[to_mem](r0: ptr32; r1: xmm)
+movq[from_mem](out r0: xmm; r1: ptr64)
+movq[to_mem](r0: ptr64; r1: xmm)
+pmovmskb(var r0: r32; r1: xmm)
+pextrw[sse2](out r0: r16; r1: xmm; imm: i32)
+pinsrw[sse2](var r0: xmm; r1: r32; imm: i32)
+pinsrw[from_mem](var r0: xmm; r1: ptr16; imm: i32)
+packssdw(var r0: xmm; r1: xmm)
+packssdw[from_mem](var r0: xmm; r1: ptr128)
+packsswb(var r0: xmm; r1: xmm)
+packsswb[from_mem](var r0: xmm; r1: ptr128)
+packuswb(var r0: xmm; r1: xmm)
+packuswb[from_mem](var r0: xmm; r1: ptr128)
+paddb(var r0: xmm; r1: xmm)
+paddb[from_mem](var r0: xmm; r1: ptr128)
+paddw(var r0: xmm; r1: xmm)
+paddw[from_mem](var r0: xmm; r1: ptr128)
+paddd(var r0: xmm; r1: xmm)
+paddd[from_mem](var r0: xmm; r1: ptr128)
+paddq(var r0: xmm; r1: xmm)
+paddq[from_mem](var r0: xmm; r1: ptr128)
+paddsb(var r0: xmm; r1: xmm)
+paddsb[from_mem](var r0: xmm; r1: ptr128)
+paddsw(var r0: xmm; r1: xmm)
+paddsw[from_mem](var r0: xmm; r1: ptr128)
+paddusb(var r0: xmm; r1: xmm)
+paddusb[from_mem](var r0: xmm; r1: ptr128)
+paddusw(var r0: xmm; r1: xmm)
+paddusw[from_mem](var r0: xmm; r1: ptr128)
+pand(var r0: xmm; r1: xmm)
+pand[from_mem](var r0: xmm; r1: ptr128)
+pandn(var r0: xmm; r1: xmm)
+pandn[from_mem](var r0: xmm; r1: ptr128)
+por(var r0: xmm; r1: xmm)
+por[from_mem](var r0: xmm; r1: ptr128)
+pxor(var r0: xmm; r1: xmm)
+pxor[from_mem](var r0: xmm; r1: ptr128)
+pcmpeqb(var r0: xmm; r1: xmm)
+pcmpeqb[from_mem](var r0: xmm; r1: ptr128)
+pcmpeqw(var r0: xmm; r1: xmm)
+pcmpeqw[from_mem](var r0: xmm; r1: ptr128)
+pcmpeqd(var r0: xmm; r1: xmm)
+pcmpeqd[from_mem](var r0: xmm; r1: ptr128)
+pcmpgtb(var r0: xmm; r1: xmm)
+pcmpgtb[from_mem](var r0: xmm; r1: ptr128)
+pcmpgtw(var r0: xmm; r1: xmm)
+pcmpgtw[from_mem](var r0: xmm; r1: ptr128)
+pcmpgtd(var r0: xmm; r1: xmm)
+pcmpgtd[from_mem](var r0: xmm; r1: ptr128)
+pmullw(var r0: xmm; r1: xmm)
+pmullw[from_mem](var r0: xmm; r1: ptr128)
+pmulhw(var r0: xmm; r1: xmm)
+pmulhw[from_mem](var r0: xmm; r1: ptr128)
+pmulhuw[sse2](var r0: xmm; r1: xmm)
+pmulhuw[from_mem](var r0: xmm; r1: ptr128)
+pmuludq(var r0: xmm; r1: xmm)
+pmuludq[from_mem](var r0: xmm; r1: ptr128)
+psllw[sse2](var r0: xmm; r1: xmm)
+psllw[from_mem](var r0: xmm; r1: ptr128)
+psllw[sse2_imm](var r0: xmm; imm: i32)
+pslld[sse2](var r0: xmm; r1: xmm)
+pslld[from_mem](var r0: xmm; r1: ptr128)
+pslld[sse2_imm](var r0: xmm; imm: i32)
+psllq[sse2](var r0: xmm; r1: xmm)
+psllq[from_mem](var r0: xmm; r1: ptr128)
+psllq[sse2_imm](var r0: xmm; imm: i32)
+psrad[sse2](var r0: xmm; r1: xmm)
+psrad[from_mem](var r0: xmm; r1: ptr128)
+psrad[sse2_imm](var r0: xmm; imm: i32)
+psraw[sse2](var r0: xmm; r1: xmm)
+psraw[from_mem](var r0: xmm; r1: ptr128)
+psraw[sse2_imm](var r0: xmm; imm: i32)
+psrlw[sse2](var r0: xmm; r1: xmm)
+psrlw[from_mem](var r0: xmm; r1: ptr128)
+psrlw[sse2_imm](var r0: xmm; imm: i32)
+psrld[sse2](var r0: xmm; r1: xmm)
+psrld[from_mem](var r0: xmm; r1: ptr128)
+psrld[sse2_imm](var r0: xmm; imm: i32)
+psrlq[sse2](var r0: xmm; r1: xmm)
+psrlq[from_mem](var r0: xmm; r1: ptr128)
+psrlq[sse2_imm](var r0: xmm; imm: i32)
+psubb(var r0: xmm; r1: xmm)
+psubb[from_mem](var r0: xmm; r1: ptr128)
+psubw(var r0: xmm; r1: xmm)
+psubw[from_mem](var r0: xmm; r1: ptr128)
+psubd(var r0: xmm; r1: xmm)
+psubd[from_mem](var r0: xmm; r1: ptr128)
+psubq(var r0: xmm; r1: xmm)
+psubq[from_mem](var r0: xmm; r1: ptr128)
+psubsb(var r0: xmm; r1: xmm)
+psubsb[from_mem](var r0: xmm; r1: ptr128)
+psubsw(var r0: xmm; r1: xmm)
+psubsw[from_mem](var r0: xmm; r1: ptr128)
+pmaddwd(var r0: xmm; r1: xmm)
+pmaddwd[from_mem](var r0: xmm; r1: ptr128)
+psubusb(var r0: xmm; r1: xmm)
+psubusb[from_mem](var r0: xmm; r1: ptr128)
+psubusw(var r0: xmm; r1: xmm)
+psubusw[from_mem](var r0: xmm; r1: ptr128)
+punpckhbw(var r0: xmm; r1: xmm)
+punpckhbw[from_mem](var r0: xmm; r1: ptr128)
+punpckhwd(var r0: xmm; r1: xmm)
+punpckhwd[from_mem](var r0: xmm; r1: ptr128)
+punpckhdq(var r0: xmm; r1: xmm)
+punpckhdq[from_mem](var r0: xmm; r1: ptr128)
+punpcklbw(var r0: xmm; r1: xmm)
+punpcklbw[from_mem](var r0: xmm; r1: ptr128)
+punpcklwd(var r0: xmm; r1: xmm)
+punpcklwd[from_mem](var r0: xmm; r1: ptr128)
+punpckldq(var r0: xmm; r1: xmm)
+punpckldq[from_mem](var r0: xmm; r1: ptr128)
+pavgb[sse2](var r0: xmm; r1: xmm)
+pavgb[from_mem](var r0: xmm; r1: ptr128)
+pavgw[sse2](var r0: xmm; r1: xmm)
+pavgw[from_mem](var r0: xmm; r1: ptr128)
+pminub[sse2](var r0: xmm; r1: xmm)
+pminub[from_mem](var r0: xmm; r1: ptr128)
+pminsw[sse2](var r0: xmm; r1: xmm)
+pminsw[from_mem](var r0: xmm; r1: ptr128)
+pmaxsw[sse2](var r0: xmm; r1: xmm)
+pmaxsw[from_mem](var r0: xmm; r1: ptr128)
+pmaxub[sse2](var r0: xmm; r1: xmm)
+pmaxub[from_mem](var r0: xmm; r1: ptr128)
+psadbw[sse2](var r0: xmm; r1: xmm)
+psadbw[from_mem](var r0: xmm; r1: ptr128)
+
+; SSE2 integer instructions
+maskmovdqu(addr: edi_ptr; r0: xmm; r1: xmm)
+movdq2q(out r0: mm; r1: xmm)
+movdqa[from_mem](out r0: xmm; r1: ptr128)
+movdqa(r0: ptr128; r1: xmm)
+movdqu[from_mem](out r0: xmm; r1: ptr128)
+movdqu(r0: ptr128; r1: xmm)
+movq2dq(out r0: xmm; r1: mm)
+movntdq(r0: ptr128; r1: xmm)
+pshufhw(out r0: xmm; r1: xmm; imm: i32)
+pshuflw(out r0: xmm; r1: xmm; imm: i32)
+pshufd(out r0: xmm; r1: xmm; imm: i32)
+pshufhw[from_mem](out r0: xmm; r1: ptr128; imm: i32)
+pshuflw[from_mem](out r0: xmm; r1: ptr128; imm: i32)
+pshufd[from_mem](out r0: xmm; r1: ptr128; imm: i32)
+pslldq(var r0: xmm; imm: i32)
+psrldq(var r0: xmm; imm: i32)
+punpckhqdq(var r0: xmm; r1: xmm)
+punpckhqdq[from_mem](var r0: xmm; r1: ptr128)
+punpcklqdq(var r0: xmm; r1: xmm)
+punpcklqdq[from_mem](var r0: xmm; r1: ptr128)
+
+; SSE3 SIMD floating-point instructions
+addsubps(var r0: xmm; r1: xmm)
+addsubps[from_mem](var r0: xmm; r1: ptr128)
+addsubpd(var r0: xmm; r1: xmm)
+addsubpd[from_mem](var r0: xmm; r1: ptr128)
+movddup(out r0: xmm; r1: xmm)
+movddup[from_mem](out r0: xmm; r1: ptr64)
+movsldup(out r0: xmm; r1: xmm)
+movsldup[from_mem](out r0: xmm; r1: ptr128)
+movshdup(out r0: xmm; r1: xmm)
+movshdup[from_mem](out r0: xmm; r1: ptr128)
+haddps(var r0: xmm; r1: xmm)
+haddps[from_mem](var r0: xmm; r1: ptr128)
+haddpd(var r0: xmm; r1: xmm)
+haddpd[from_mem](var r0: xmm; r1: ptr128)
+hsubps(var r0: xmm; r1: xmm)
+hsubps[from_mem](var r0: xmm; r1: ptr128)
+hsubpd(var r0: xmm; r1: xmm)
+hsubpd[from_mem](var r0: xmm; r1: ptr128)
+
+; SSE3 SIMD integer instructions
+lddqu(out r0: xmm; r1: ptr128)
+
+; SSSE3
+psignb(var r0: xmm; r1: xmm)
+psignb[from_mem](var r0: xmm; r1: ptr128)
+psignw(var r0: xmm; r1: xmm)
+psignw[from_mem](var r0: xmm; r1: ptr128)
+psignd(var r0: xmm; r1: xmm)
+psignd[from_mem](var r0: xmm; r1: ptr128)
+pshufb(var r0: xmm; r1: xmm)
+pshufb[from_mem](var r0: xmm; r1: ptr128)
+pmulhrsw(var r0: xmm; r1: xmm)
+pmulhrsw[from_mem](var r0: xmm; r1: ptr128)
+pmaddubsw(var r0: xmm; r1: xmm)
+pmaddubsw[from_mem](var r0: xmm; r1: ptr128)
+phsubw(var r0: xmm; r1: xmm)
+phsubw[from_mem](var r0: xmm; r1: ptr128)
+phsubsw(var r0: xmm; r1: xmm)
+phsubsw[from_mem](var r0: xmm; r1: ptr128)
+phsubd(var r0: xmm; r1: xmm)
+phsubd[from_mem](var r0: xmm; r1: ptr128)
+phaddsw(var r0: xmm; r1: xmm)
+phaddsw[from_mem](var r0: xmm; r1: ptr128)
+phaddw(var r0: xmm; r1: xmm)
+phaddw[from_mem](var r0: xmm; r1: ptr128)
+phaddd(var r0: xmm; r1: xmm)
+phaddd[from_mem](var r0: xmm; r1: ptr128)
+palignr(var r0: xmm; r1: xmm; imm: i32)
+palignr[from_mem](var r0: xmm; r1: ptr128; imm: i32)
+pabsb(out r0: xmm; r1: xmm)
+pabsb[from_mem](out r0: xmm; r1: ptr128)
+pabsw(out r0: xmm; r1: xmm)
+pabsw[from_mem](out r0: xmm; r1: ptr128)
+pabsd(out r0: xmm; r1: xmm)
+pabsd[from_mem](out r0: xmm; r1: ptr128)
+
+; SSE4.1 SIMD floating-point instructions
+dpps(var r0: xmm; r1: xmm; imm: i32)
+dpps[from_mem](var r0: xmm; r1: ptr128; imm: i32)
+dppd(var r0: xmm; r1: xmm; imm: i32)
+dppd[from_mem](var r0: xmm; r1: ptr128; imm: i32)
+blendps(var r0: xmm; r1: xmm; imm: i32)
+blendps[from_mem](var r0: xmm; r1: ptr128; imm: i32)
+blendvps(var r0: xmm; r1: xmm; mask: implicit_xmm0)
+blendvps[from_mem](var r0: xmm; r1: ptr128; mask: implicit_xmm0)
+blendpd(var r0: xmm; r1: xmm; imm: i32)
+blendpd[from_mem](var r0: xmm; r1: ptr128; imm: i32)
+blendvpd(var r0: xmm; r1: xmm; mask: implicit_xmm0)
+blendvpd[from_mem](var r0: xmm; r1: ptr128; mask: implicit_xmm0)
+roundps(out r0: xmm; r1: xmm; imm: i32)
+roundps[from_mem](out r0: xmm; r1: ptr128; imm: i32)
+roundss(out r0: xmm; r1: xmm; imm: i32)
+roundss[from_mem](out r0: xmm; r1: ptr32; imm: i32)
+roundpd(out r0: xmm; r1: xmm; imm: i32)
+roundpd[from_mem](out r0: xmm; r1: ptr128; imm: i32)
+roundsd(out r0: xmm; r1: xmm; imm: i32)
+roundsd[from_mem](out r0: xmm; r1: ptr64; imm: i32)
+insertps(var r0: xmm; r1: xmm; imm: i32)
+insertps[from_mem](var r0: xmm; r1: ptr32; imm: i32)
+extractps(out r0: r32; r1: xmm; imm: i32)
+extractps[from_mem](r0: ptr32; r1: xmm; imm: i32)
+
+; SSE4.1 SIMD integer instructions
+mpsadbw(var r0: xmm; r1: xmm; imm: i32)
+mpsadbw[from_mem](var r0: xmm; r1: ptr128; imm: i32)
+phminposuw(var r0: xmm; r1: xmm)
+phminposuw[from_mem](var r0: xmm; r1: ptr128)
+pmulld(var r0: xmm; r1: xmm)
+pmulld[from_mem](var r0: xmm; r1: ptr128)
+pmuldq(var r0: xmm; r1: xmm)
+pmuldq[from_mem](var r0: xmm; r1: ptr128)
+pblendvb(var r0: xmm; r1: xmm; mask: implicit_xmm0)
+pblendvb[from_mem](var r0: xmm; r1: ptr128; mask: implicit_xmm0)
+pblendw(var r0: xmm; r1: xmm; imm: i32)
+pblendw[from_mem](var r0: xmm; r1: ptr128; imm: i32)
+pminsb(var r0: xmm; r1: xmm)
+pminsb[from_mem](var r0: xmm; r1: ptr128)
+pminuw(var r0: xmm; r1: xmm)
+pminuw[from_mem](var r0: xmm; r1: ptr128)
+pminsd(var r0: xmm; r1: xmm)
+pminsd[from_mem](var r0: xmm; r1: ptr128)
+pminud(var r0: xmm; r1: xmm)
+pminud[from_mem](var r0: xmm; r1: ptr128)
+pmaxsb(var r0: xmm; r1: xmm)
+pmaxsb[from_mem](var r0: xmm; r1: ptr128)
+pmaxuw(var r0: xmm; r1: xmm)
+pmaxuw[from_mem](var r0: xmm; r1: ptr128)
+pmaxsd(var r0: xmm; r1: xmm)
+pmaxsd[from_mem](var r0: xmm; r1: ptr128)
+pmaxud(var r0: xmm; r1: xmm)
+pmaxud[from_mem](var r0: xmm; r1: ptr128)
+pinsrb(var r0: xmm; r1: r32; imm: i32)
+pinsrb[from_mem](var r0: xmm; r1: ptr8; imm: i32)
+pinsrd(var r0: xmm; r1: r32; imm: i32)
+pinsrd[from_mem](var r0: xmm; r1: ptr32; imm: i32)
+pinsrq(var r0: xmm; r1: reg; imm: i32)                  |X86_64
+pinsrq[from_mem](var r0: xmm; r1: ptr64; imm: i32)      |X86_64
+pextrb(out r0: r8; r1: xmm; imm: i32)
+pextrb[to_mem](r0: ptr8; r1: xmm; imm: i32)
+pextrw[sse41_to_mem](r0: ptr16; r1: xmm; imm: i32)
+pextrd(out r0: r32; r1: xmm; imm: i32)
+pextrd[to_mem](r0: ptr32; r1: xmm; imm: i32)
+pextrq(out r0: r32; r1: xmm; imm: i32)                  |X86_64
+pextrq[to_mem](r0: ptr64; r1: xmm; imm: i32)            |X86_64
+pmovsxbw(out r0: xmm; r1: xmm)
+pmovsxbw[from_mem](out r0: xmm; r1: ptr64)
+pmovzxbw(out r0: xmm; r1: xmm)
+pmovzxbw[from_mem](out r0: xmm; r1: ptr64)
+pmovsxbd(out r0: xmm; r1: xmm)
+pmovsxbd[from_mem](out r0: xmm; r1: ptr32)
+pmovzxbd(out r0: xmm; r1: xmm)
+pmovzxbd[from_mem](out r0: xmm; r1: ptr32)
+pmovsxbq(out r0: xmm; r1: xmm)
+pmovsxbq[from_mem](out r0: xmm; r1: ptr16)
+pmovzxbq(out r0: xmm; r1: xmm)
+pmovzxbq[from_mem](out r0: xmm; r1: ptr16)
+pmovsxwd(out r0: xmm; r1: xmm)
+pmovsxwd[from_mem](out r0: xmm; r1: ptr64)
+pmovzxwd(out r0: xmm; r1: xmm)
+pmovzxwd[from_mem](out r0: xmm; r1: ptr64)
+pmovsxwq(out r0: xmm; r1: xmm)
+pmovsxwq[from_mem](out r0: xmm; r1: ptr32)
+pmovzxwq(out r0: xmm; r1: xmm)
+pmovzxwq[from_mem](out r0: xmm; r1: ptr32)
+pmovsxdq(out r0: xmm; r1: xmm)
+pmovsxdq[from_mem](out r0: xmm; r1: ptr64)
+pmovzxdq(out r0: xmm; r1: xmm)
+pmovzxdq[from_mem](out r0: xmm; r1: ptr64)
+ptest(var r0: xmm; r1: xmm)
+ptest[from_mem](var r0: xmm; r1: ptr128)
+pcmpeqq(var r0: xmm; r1: xmm)
+pcmpeqq[from_mem](var r0: xmm; r1: ptr128)
+packusdw(var r0: xmm; r1: xmm)
+packusdw[from_mem](var r0: xmm; r1: ptr128)
+movntdqa(out r0: xmm; r1: ptr128)
+
+; SSE4.2
+pcmpestri(var r0: xmm; r1: xmm; imm: i32)
+pcmpestri[from_mem](var r0: xmm; r1: ptr128; imm: i32)
+pcmpestrm(var r0: xmm; r1: xmm; imm: i32)
+pcmpestrm[from_mem](var r0: xmm; r1: ptr128; imm: i32)
+pcmpistri(var r0: xmm; r1: xmm; imm: i32)
+pcmpistri[from_mem](var r0: xmm; r1: ptr128; imm: i32)
+pcmpistrm(var r0: xmm; r1: xmm; imm: i32)
+pcmpistrm[from_mem](var r0: xmm; r1: ptr128; imm: i32)
+pcmpgtq(var r0: xmm; r1: xmm)
+pcmpgtq[from_mem](var r0: xmm; r1: ptr128)

+ 690 - 22
compiler/x86/x86mmfirst.inc

@@ -1,19 +1,59 @@
 in_x86_movss
 in_x86_movss
-,in_x86_movaps
-,in_x86_movups
+,in_x86_movapd
+,in_x86_movhpd
+,in_x86_movlpd
+,in_x86_movupd
+,in_x86_movsd_from_mem
+,in_x86_movd_from_mem
+,in_x86_pmovsxbd_from_mem
+,in_x86_pmovzxbd_from_mem
+,in_x86_pmovsxwq_from_mem
+,in_x86_pmovzxwq_from_mem
 : //out r0:xmm;r1:ptr32;
 : //out r0:xmm;r1:ptr32;
   begin
   begin
     expectloc:=LOC_MMREGISTER;
     expectloc:=LOC_MMREGISTER;
     result:=nil;
     result:=nil;
   end;
   end;
+in_x86_movaps
+,in_x86_movups
+,in_x86_sqrtpd_from_mem
+,in_x86_movdqa_from_mem
+,in_x86_movdqu_from_mem
+,in_x86_movsldup_from_mem
+,in_x86_movshdup_from_mem
+,in_x86_lddqu
+,in_x86_pabsb_from_mem
+,in_x86_pabsw_from_mem
+,in_x86_pabsd_from_mem
+,in_x86_movntdqa
+: //out r0:xmm;r1:ptr128;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
 in_x86_movss_to_mem
 in_x86_movss_to_mem
-,in_x86_movaps_to_mem
-,in_x86_movups_to_mem
+,in_x86_movapd_to_mem
+,in_x86_movntpd_to_mem
+,in_x86_movhpd_to_mem
+,in_x86_movlpd_to_mem
+,in_x86_movupd_to_mem
+,in_x86_movsd_to_mem
+,in_x86_movd_to_mem
 : //r0:ptr32;r1:xmm;
 : //r0:ptr32;r1:xmm;
   begin
   begin
     expectloc:=LOC_VOID;
     expectloc:=LOC_VOID;
     result:=nil;
     result:=nil;
   end;
   end;
+in_x86_movaps_to_mem
+,in_x86_movups_to_mem
+,in_x86_movdqa
+,in_x86_movdqu
+,in_x86_movntdq
+: //r0:ptr128;r1:xmm;
+  begin
+    expectloc:=LOC_VOID;
+    result:=nil;
+  end;
 in_x86_movss_to_val
 in_x86_movss_to_val
 : //out r0:f32;r1:xmm;
 : //out r0:f32;r1:xmm;
   begin
   begin
@@ -28,11 +68,32 @@ in_x86_movss_from_val
   end;
   end;
 in_x86_movlps
 in_x86_movlps
 ,in_x86_movhps
 ,in_x86_movhps
-: //var r0:xmm;r1:ptr32;
+,in_x86_cvtpi2ps_from_mem
+,in_x86_addsd_from_mem
+,in_x86_divsd_from_mem
+,in_x86_maxsd_from_mem
+,in_x86_minsd_from_mem
+,in_x86_mulsd_from_mem
+,in_x86_subsd_from_mem
+,in_x86_comisd_from_mem
+,in_x86_ucomisd_from_mem
+,in_x86_cvtdq2pd_from_mem
+,in_x86_cvtpi2pd_from_mem
+,in_x86_cvtps2pd_from_mem
+,in_x86_cvtsd2ss_from_mem
+: //var r0:xmm;r1:ptr64;
   begin
   begin
     expectloc:=LOC_MMREGISTER;
     expectloc:=LOC_MMREGISTER;
     result:=nil;
     result:=nil;
   end;
   end;
+in_x86_movlps_to_mem
+,in_x86_movhps_to_mem
+,in_x86_movq_to_mem
+: //r0:ptr64;r1:xmm;
+  begin
+    expectloc:=LOC_VOID;
+    result:=nil;
+  end;
 in_x86_movlhps
 in_x86_movlhps
 ,in_x86_movhlps
 ,in_x86_movhlps
 ,in_x86_addss
 ,in_x86_addss
@@ -59,41 +120,349 @@ in_x86_movlhps
 ,in_x86_andnps
 ,in_x86_andnps
 ,in_x86_unpckhps
 ,in_x86_unpckhps
 ,in_x86_unpcklps
 ,in_x86_unpcklps
-,in_x86_pmulhuw
-,in_x86_psadbw
-,in_x86_pavgb
-,in_x86_pavgw
-,in_x86_pmaxub
-,in_x86_pminub
-,in_x86_pmaxsw
-,in_x86_pminsw
+,in_x86_addpd
+,in_x86_addsd
+,in_x86_divpd
+,in_x86_divsd
+,in_x86_maxpd
+,in_x86_maxsd
+,in_x86_minpd
+,in_x86_minsd
+,in_x86_mulpd
+,in_x86_mulsd
+,in_x86_subpd
+,in_x86_subsd
+,in_x86_andpd
+,in_x86_andnpd
+,in_x86_orpd
+,in_x86_xorpd
+,in_x86_comisd
+,in_x86_ucomisd
+,in_x86_unpckhpd
+,in_x86_unpcklpd
+,in_x86_cvtdq2pd
+,in_x86_cvtdq2ps
+,in_x86_cvtpd2dq
+,in_x86_cvtpd2ps
+,in_x86_cvtps2dq
+,in_x86_cvtps2pd
+,in_x86_cvtsd2ss
+,in_x86_cvtss2sd
+,in_x86_cvttpd2dq
+,in_x86_cvttps2dq
+,in_x86_packssdw
+,in_x86_packsswb
+,in_x86_packuswb
+,in_x86_paddb
+,in_x86_paddw
+,in_x86_paddd
+,in_x86_paddq
+,in_x86_paddsb
+,in_x86_paddsw
+,in_x86_paddusb
+,in_x86_paddusw
+,in_x86_pand
+,in_x86_pandn
+,in_x86_por
+,in_x86_pxor
+,in_x86_pcmpeqb
+,in_x86_pcmpeqw
+,in_x86_pcmpeqd
+,in_x86_pcmpgtb
+,in_x86_pcmpgtw
+,in_x86_pcmpgtd
+,in_x86_pmullw
+,in_x86_pmulhw
+,in_x86_pmulhuw_sse2
+,in_x86_pmuludq
+,in_x86_psllw_sse2
+,in_x86_pslld_sse2
+,in_x86_psllq_sse2
+,in_x86_psrad_sse2
+,in_x86_psraw_sse2
+,in_x86_psrlw_sse2
+,in_x86_psrld_sse2
+,in_x86_psrlq_sse2
+,in_x86_psubb
+,in_x86_psubw
+,in_x86_psubd
+,in_x86_psubq
+,in_x86_psubsb
+,in_x86_psubsw
+,in_x86_pmaddwd
+,in_x86_psubusb
+,in_x86_psubusw
+,in_x86_punpckhbw
+,in_x86_punpckhwd
+,in_x86_punpckhdq
+,in_x86_punpcklbw
+,in_x86_punpcklwd
+,in_x86_punpckldq
+,in_x86_pavgb_sse2
+,in_x86_pavgw_sse2
+,in_x86_pminub_sse2
+,in_x86_pminsw_sse2
+,in_x86_pmaxsw_sse2
+,in_x86_pmaxub_sse2
+,in_x86_psadbw_sse2
+,in_x86_punpckhqdq
+,in_x86_punpcklqdq
+,in_x86_addsubps
+,in_x86_addsubpd
+,in_x86_haddps
+,in_x86_haddpd
+,in_x86_hsubps
+,in_x86_hsubpd
+,in_x86_psignb
+,in_x86_psignw
+,in_x86_psignd
+,in_x86_pshufb
+,in_x86_pmulhrsw
+,in_x86_pmaddubsw
+,in_x86_phsubw
+,in_x86_phsubsw
+,in_x86_phsubd
+,in_x86_phaddsw
+,in_x86_phaddw
+,in_x86_phaddd
+,in_x86_phminposuw
+,in_x86_pmulld
+,in_x86_pmuldq
+,in_x86_pminsb
+,in_x86_pminuw
+,in_x86_pminsd
+,in_x86_pminud
+,in_x86_pmaxsb
+,in_x86_pmaxuw
+,in_x86_pmaxsd
+,in_x86_pmaxud
+,in_x86_ptest
+,in_x86_pcmpeqq
+,in_x86_packusdw
+,in_x86_pcmpgtq
 : //var r0:xmm;r1:xmm;
 : //var r0:xmm;r1:xmm;
   begin
   begin
     expectloc:=LOC_MMREGISTER;
     expectloc:=LOC_MMREGISTER;
     result:=nil;
     result:=nil;
   end;
   end;
+in_x86_addss_from_mem
+,in_x86_subss_from_mem
+,in_x86_mulss_from_mem
+,in_x86_divss_from_mem
+,in_x86_rcpss_from_mem
+,in_x86_sqrtss_from_mem
+,in_x86_maxss_from_mem
+,in_x86_minss_from_mem
+,in_x86_rsqrtss_from_mem
+,in_x86_cvtsi2ss_from_mem
+,in_x86_cvtsi2sd_from_mem
+,in_x86_cvtss2sd_from_mem
+: //var r0:xmm;r1:ptr32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_addps_from_mem
+,in_x86_subps_from_mem
+,in_x86_mulps_from_mem
+,in_x86_divps_from_mem
+,in_x86_rcpps_from_mem
+,in_x86_sqrtps_from_mem
+,in_x86_maxps_from_mem
+,in_x86_minps_from_mem
+,in_x86_rsqrtps_from_mem
+,in_x86_andps_from_mem
+,in_x86_orps_from_mem
+,in_x86_xorps_from_mem
+,in_x86_andnps_from_mem
+,in_x86_unpckhps_from_mem
+,in_x86_unpcklps_from_mem
+,in_x86_addpd_from_mem
+,in_x86_divpd_from_mem
+,in_x86_maxpd_from_mem
+,in_x86_minpd_from_mem
+,in_x86_mulpd_from_mem
+,in_x86_subpd_from_mem
+,in_x86_andpd_from_mem
+,in_x86_andnpd_from_mem
+,in_x86_orpd_from_mem
+,in_x86_xorpd_from_mem
+,in_x86_unpckhpd_from_mem
+,in_x86_unpcklpd_from_mem
+,in_x86_cvtdq2ps_from_mem
+,in_x86_cvtpd2dq_from_mem
+,in_x86_cvtpd2ps_from_mem
+,in_x86_cvtps2dq_from_mem
+,in_x86_cvttpd2dq_from_mem
+,in_x86_cvttps2dq_from_mem
+,in_x86_packssdw_from_mem
+,in_x86_packsswb_from_mem
+,in_x86_packuswb_from_mem
+,in_x86_paddb_from_mem
+,in_x86_paddw_from_mem
+,in_x86_paddd_from_mem
+,in_x86_paddq_from_mem
+,in_x86_paddsb_from_mem
+,in_x86_paddsw_from_mem
+,in_x86_paddusb_from_mem
+,in_x86_paddusw_from_mem
+,in_x86_pand_from_mem
+,in_x86_pandn_from_mem
+,in_x86_por_from_mem
+,in_x86_pxor_from_mem
+,in_x86_pcmpeqb_from_mem
+,in_x86_pcmpeqw_from_mem
+,in_x86_pcmpeqd_from_mem
+,in_x86_pcmpgtb_from_mem
+,in_x86_pcmpgtw_from_mem
+,in_x86_pcmpgtd_from_mem
+,in_x86_pmullw_from_mem
+,in_x86_pmulhw_from_mem
+,in_x86_pmulhuw_from_mem
+,in_x86_pmuludq_from_mem
+,in_x86_psllw_from_mem
+,in_x86_pslld_from_mem
+,in_x86_psllq_from_mem
+,in_x86_psrad_from_mem
+,in_x86_psraw_from_mem
+,in_x86_psrlw_from_mem
+,in_x86_psrld_from_mem
+,in_x86_psrlq_from_mem
+,in_x86_psubb_from_mem
+,in_x86_psubw_from_mem
+,in_x86_psubd_from_mem
+,in_x86_psubq_from_mem
+,in_x86_psubsb_from_mem
+,in_x86_psubsw_from_mem
+,in_x86_pmaddwd_from_mem
+,in_x86_psubusb_from_mem
+,in_x86_psubusw_from_mem
+,in_x86_punpckhbw_from_mem
+,in_x86_punpckhwd_from_mem
+,in_x86_punpckhdq_from_mem
+,in_x86_punpcklbw_from_mem
+,in_x86_punpcklwd_from_mem
+,in_x86_punpckldq_from_mem
+,in_x86_pavgb_from_mem
+,in_x86_pavgw_from_mem
+,in_x86_pminub_from_mem
+,in_x86_pminsw_from_mem
+,in_x86_pmaxsw_from_mem
+,in_x86_pmaxub_from_mem
+,in_x86_psadbw_from_mem
+,in_x86_punpckhqdq_from_mem
+,in_x86_punpcklqdq_from_mem
+,in_x86_addsubps_from_mem
+,in_x86_addsubpd_from_mem
+,in_x86_haddps_from_mem
+,in_x86_haddpd_from_mem
+,in_x86_hsubps_from_mem
+,in_x86_hsubpd_from_mem
+,in_x86_psignb_from_mem
+,in_x86_psignw_from_mem
+,in_x86_psignd_from_mem
+,in_x86_pshufb_from_mem
+,in_x86_pmulhrsw_from_mem
+,in_x86_pmaddubsw_from_mem
+,in_x86_phsubw_from_mem
+,in_x86_phsubsw_from_mem
+,in_x86_phsubd_from_mem
+,in_x86_phaddsw_from_mem
+,in_x86_phaddw_from_mem
+,in_x86_phaddd_from_mem
+,in_x86_phminposuw_from_mem
+,in_x86_pmulld_from_mem
+,in_x86_pmuldq_from_mem
+,in_x86_pminsb_from_mem
+,in_x86_pminuw_from_mem
+,in_x86_pminsd_from_mem
+,in_x86_pminud_from_mem
+,in_x86_pmaxsb_from_mem
+,in_x86_pmaxuw_from_mem
+,in_x86_pmaxsd_from_mem
+,in_x86_pmaxud_from_mem
+,in_x86_ptest_from_mem
+,in_x86_pcmpeqq_from_mem
+,in_x86_packusdw_from_mem
+,in_x86_pcmpgtq_from_mem
+: //var r0:xmm;r1:ptr128;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
 in_x86_cmpss
 in_x86_cmpss
 ,in_x86_cmpps
 ,in_x86_cmpps
 ,in_x86_shufps
 ,in_x86_shufps
+,in_x86_cmppd
+,in_x86_cmpsd
+,in_x86_shufpd
+,in_x86_palignr
+,in_x86_dpps
+,in_x86_dppd
+,in_x86_blendps
+,in_x86_blendpd
+,in_x86_insertps
+,in_x86_mpsadbw
+,in_x86_pblendw
+,in_x86_pcmpestri
+,in_x86_pcmpestrm
+,in_x86_pcmpistri
+,in_x86_pcmpistrm
 : //var r0:xmm;r1:xmm;imm:i32;
 : //var r0:xmm;r1:xmm;imm:i32;
   begin
   begin
     expectloc:=LOC_MMREGISTER;
     expectloc:=LOC_MMREGISTER;
     result:=nil;
     result:=nil;
   end;
   end;
+in_x86_cmpss_from_mem
+,in_x86_insertps_from_mem
+,in_x86_pinsrd_from_mem
+: //var r0:xmm;r1:ptr32;imm:i32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_cmpps_from_mem
+,in_x86_shufps_from_mem
+,in_x86_cmppd_from_mem
+,in_x86_shufpd_from_mem
+,in_x86_palignr_from_mem
+,in_x86_dpps_from_mem
+,in_x86_dppd_from_mem
+,in_x86_blendps_from_mem
+,in_x86_blendpd_from_mem
+,in_x86_mpsadbw_from_mem
+,in_x86_pblendw_from_mem
+,in_x86_pcmpestri_from_mem
+,in_x86_pcmpestrm_from_mem
+,in_x86_pcmpistri_from_mem
+,in_x86_pcmpistrm_from_mem
+: //var r0:xmm;r1:ptr128;imm:i32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
 in_x86_cvtsi2ss
 in_x86_cvtsi2ss
-: //var r0:xmm;r1:r32;
+: //var r0:xmm;r1:reg;
   begin
   begin
     expectloc:=LOC_MMREGISTER;
     expectloc:=LOC_MMREGISTER;
     result:=nil;
     result:=nil;
   end;
   end;
 in_x86_cvtss2si
 in_x86_cvtss2si
 ,in_x86_cvttss2si
 ,in_x86_cvttss2si
-: //out r0:r32;r1:xmm;
+: //out r0:reg;r1:xmm;
+  begin
+    expectloc:=LOC_REGISTER;
+    result:=nil;
+  end;
+in_x86_cvtss2si_from_mem
+,in_x86_cvttss2si_from_mem
+: //out r0:reg;r1:ptr32;
   begin
   begin
     expectloc:=LOC_REGISTER;
     expectloc:=LOC_REGISTER;
     result:=nil;
     result:=nil;
   end;
   end;
 in_x86_cvtpi2ps
 in_x86_cvtpi2ps
+,in_x86_cvtpi2pd
 : //var r0:xmm;r1:mm;
 : //var r0:xmm;r1:mm;
   begin
   begin
     expectloc:=LOC_MMREGISTER;
     expectloc:=LOC_MMREGISTER;
@@ -101,11 +470,19 @@ in_x86_cvtpi2ps
   end;
   end;
 in_x86_cvtps2pi
 in_x86_cvtps2pi
 ,in_x86_cvttps2pi
 ,in_x86_cvttps2pi
+,in_x86_movdq2q
 : //out r0:mm;r1:xmm;
 : //out r0:mm;r1:xmm;
   begin
   begin
     expectloc:=LOC_MMXREGISTER;
     expectloc:=LOC_MMXREGISTER;
     result:=nil;
     result:=nil;
   end;
   end;
+in_x86_cvtps2pi_from_mem
+,in_x86_cvttps2pi_from_mem
+: //out r0:mm;r1:ptr64;
+  begin
+    expectloc:=LOC_MMXREGISTER;
+    result:=nil;
+  end;
 in_x86_pmulhuw_mmx
 in_x86_pmulhuw_mmx
 ,in_x86_psadbw_mmx
 ,in_x86_psadbw_mmx
 ,in_x86_pavgb_mmx
 ,in_x86_pavgb_mmx
@@ -119,20 +496,33 @@ in_x86_pmulhuw_mmx
     expectloc:=LOC_MMXREGISTER;
     expectloc:=LOC_MMXREGISTER;
     result:=nil;
     result:=nil;
   end;
   end;
+in_x86_pmulhuw_mmx_from_mem
+,in_x86_psadbw_mmx_from_mem
+,in_x86_pavgb_mmx_from_mem
+,in_x86_pavgw_mmx_from_mem
+,in_x86_pmaxub_mmx_from_mem
+,in_x86_pminub_mmx_from_mem
+,in_x86_pmaxsw_mmx_from_mem
+,in_x86_pminsw_mmx_from_mem
+: //var r0:mm;r1:ptr64;
+  begin
+    expectloc:=LOC_MMXREGISTER;
+    result:=nil;
+  end;
 in_x86_pextrw_mmx
 in_x86_pextrw_mmx
-: //out r0:r32;r1:mm;imm:i32;
+: //out r0:reg;r1:mm;imm:i32;
   begin
   begin
     expectloc:=LOC_REGISTER;
     expectloc:=LOC_REGISTER;
     result:=nil;
     result:=nil;
   end;
   end;
 in_x86_pinsrw_mmx
 in_x86_pinsrw_mmx
-: //var r0:mm;r1:r32;imm:i32;
+: //var r0:mm;r1:reg;imm:i32;
   begin
   begin
     expectloc:=LOC_MMXREGISTER;
     expectloc:=LOC_MMXREGISTER;
     result:=nil;
     result:=nil;
   end;
   end;
-in_x86_pmovmskb
-: //out r0:r32;r1:mm;
+in_x86_pmovmskb_mmx
+: //out r0:reg;r1:mm;
   begin
   begin
     expectloc:=LOC_REGISTER;
     expectloc:=LOC_REGISTER;
     result:=nil;
     result:=nil;
@@ -143,15 +533,293 @@ in_x86_pshufw
     expectloc:=LOC_MMXREGISTER;
     expectloc:=LOC_MMXREGISTER;
     result:=nil;
     result:=nil;
   end;
   end;
-in_x86_pextrw
-: //out r0:r32;r1:xmm;imm:i32;
+in_x86_pshufw_from_mem
+: //out r0:mm;r1:ptr64;imm:i32;
+  begin
+    expectloc:=LOC_MMXREGISTER;
+    result:=nil;
+  end;
+in_x86_movmskpd
+,in_x86_movd_to_reg
+: //out r0:r32;r1:xmm;
   begin
   begin
     expectloc:=LOC_REGISTER;
     expectloc:=LOC_REGISTER;
     result:=nil;
     result:=nil;
   end;
   end;
-in_x86_pinsrw
+in_x86_movsd_to_val
+: //out r0:f64;r1:xmm;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_movsd_from_val
+: //out r0:xmm;r1:f64;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_sqrtpd
+,in_x86_sqrtsd
+,in_x86_movddup
+,in_x86_movsldup
+,in_x86_movshdup
+,in_x86_pabsb
+,in_x86_pabsw
+,in_x86_pabsd
+,in_x86_pmovsxbw
+,in_x86_pmovzxbw
+,in_x86_pmovsxbd
+,in_x86_pmovzxbd
+,in_x86_pmovsxbq
+,in_x86_pmovzxbq
+,in_x86_pmovsxwd
+,in_x86_pmovzxwd
+,in_x86_pmovsxwq
+,in_x86_pmovzxwq
+,in_x86_pmovsxdq
+,in_x86_pmovzxdq
+: //out r0:xmm;r1:xmm;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_sqrtsd_from_mem
+,in_x86_movq_from_mem
+,in_x86_movddup_from_mem
+,in_x86_pmovsxbw_from_mem
+,in_x86_pmovzxbw_from_mem
+,in_x86_pmovsxwd_from_mem
+,in_x86_pmovzxwd_from_mem
+,in_x86_pmovsxdq_from_mem
+,in_x86_pmovzxdq_from_mem
+: //out r0:xmm;r1:ptr64;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_cmpsd_from_mem
+: //var r0:xmm;r1:ptr64;imm:i32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_cvtpd2pi
+,in_x86_cvttpd2pi
+: //var r0:mm;r1:xmm;
+  begin
+    expectloc:=LOC_MMXREGISTER;
+    result:=nil;
+  end;
+in_x86_cvtpd2pi_from_mem
+,in_x86_cvttpd2pi_from_mem
+: //var r0:mm;r1:ptr128;
+  begin
+    expectloc:=LOC_MMXREGISTER;
+    result:=nil;
+  end;
+in_x86_cvtsd2si
+,in_x86_cvttsd2si
+: //var r0:sreg;r1:xmm;
+  begin
+    expectloc:=LOC_REGISTER;
+    result:=nil;
+  end;
+in_x86_cvtsd2si_from_mem
+,in_x86_cvttsd2si_from_mem
+: //var r0:sreg;r1:ptr64;
+  begin
+    expectloc:=LOC_REGISTER;
+    result:=nil;
+  end;
+in_x86_cvtsi2sd
+: //var r0:xmm;r1:r32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_movd_from_reg
+: //out r0:xmm;r1:r32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_pmovmskb
+: //var r0:r32;r1:xmm;
+  begin
+    expectloc:=LOC_REGISTER;
+    result:=nil;
+  end;
+in_x86_pextrw_sse2
+: //out r0:r16;r1:xmm;imm:i32;
+  begin
+    expectloc:=LOC_REGISTER;
+    result:=nil;
+  end;
+in_x86_pinsrw_sse2
+,in_x86_pinsrb
+,in_x86_pinsrd
 : //var r0:xmm;r1:r32;imm:i32;
 : //var r0:xmm;r1:r32;imm:i32;
   begin
   begin
     expectloc:=LOC_MMREGISTER;
     expectloc:=LOC_MMREGISTER;
     result:=nil;
     result:=nil;
   end;
   end;
+in_x86_pinsrw_from_mem
+: //var r0:xmm;r1:ptr16;imm:i32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_psllw_sse2_imm
+,in_x86_pslld_sse2_imm
+,in_x86_psllq_sse2_imm
+,in_x86_psrad_sse2_imm
+,in_x86_psraw_sse2_imm
+,in_x86_psrlw_sse2_imm
+,in_x86_psrld_sse2_imm
+,in_x86_psrlq_sse2_imm
+,in_x86_pslldq
+,in_x86_psrldq
+: //var r0:xmm;imm:i32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_maskmovdqu
+: //addr:edi_ptr;r0:xmm;r1:xmm;
+  begin
+    expectloc:=LOC_VOID;
+    result:=nil;
+  end;
+in_x86_movq2dq
+: //out r0:xmm;r1:mm;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_pshufhw
+,in_x86_pshuflw
+,in_x86_pshufd
+,in_x86_roundps
+,in_x86_roundss
+,in_x86_roundpd
+,in_x86_roundsd
+: //out r0:xmm;r1:xmm;imm:i32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_pshufhw_from_mem
+,in_x86_pshuflw_from_mem
+,in_x86_pshufd_from_mem
+,in_x86_roundps_from_mem
+,in_x86_roundpd_from_mem
+: //out r0:xmm;r1:ptr128;imm:i32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_blendvps
+,in_x86_blendvpd
+,in_x86_pblendvb
+: //var r0:xmm;r1:xmm;mask:implicit_xmm0;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_blendvps_from_mem
+,in_x86_blendvpd_from_mem
+,in_x86_pblendvb_from_mem
+: //var r0:xmm;r1:ptr128;mask:implicit_xmm0;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_roundss_from_mem
+: //out r0:xmm;r1:ptr32;imm:i32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_roundsd_from_mem
+: //out r0:xmm;r1:ptr64;imm:i32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+in_x86_extractps
+,in_x86_pextrd
+: //out r0:r32;r1:xmm;imm:i32;
+  begin
+    expectloc:=LOC_REGISTER;
+    result:=nil;
+  end;
+in_x86_extractps_from_mem
+,in_x86_pextrd_to_mem
+: //r0:ptr32;r1:xmm;imm:i32;
+  begin
+    expectloc:=LOC_VOID;
+    result:=nil;
+  end;
+in_x86_pinsrb_from_mem
+: //var r0:xmm;r1:ptr8;imm:i32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+{$ifdef X86_64}
+in_x86_pinsrq
+: //var r0:xmm;r1:reg;imm:i32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+{$endif}
+{$ifdef X86_64}
+in_x86_pinsrq_from_mem
+: //var r0:xmm;r1:ptr64;imm:i32;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;
+{$endif}
+in_x86_pextrb
+: //out r0:r8;r1:xmm;imm:i32;
+  begin
+    expectloc:=LOC_REGISTER;
+    result:=nil;
+  end;
+in_x86_pextrb_to_mem
+: //r0:ptr8;r1:xmm;imm:i32;
+  begin
+    expectloc:=LOC_VOID;
+    result:=nil;
+  end;
+in_x86_pextrw_sse41_to_mem
+: //r0:ptr16;r1:xmm;imm:i32;
+  begin
+    expectloc:=LOC_VOID;
+    result:=nil;
+  end;
+{$ifdef X86_64}
+in_x86_pextrq
+: //out r0:r32;r1:xmm;imm:i32;
+  begin
+    expectloc:=LOC_REGISTER;
+    result:=nil;
+  end;
+{$endif}
+{$ifdef X86_64}
+in_x86_pextrq_to_mem
+: //r0:ptr64;r1:xmm;imm:i32;
+  begin
+    expectloc:=LOC_VOID;
+    result:=nil;
+  end;
+{$endif}
+in_x86_pmovsxbq_from_mem
+,in_x86_pmovzxbq_from_mem
+: //out r0:xmm;r1:ptr16;
+  begin
+    expectloc:=LOC_MMREGISTER;
+    result:=nil;
+  end;

Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 869 - 31
compiler/x86/x86mmsecond.inc


+ 693 - 25
compiler/x86/x86mmtype.inc

@@ -1,19 +1,59 @@
 in_x86_movss
 in_x86_movss
-,in_x86_movaps
-,in_x86_movups
+,in_x86_movapd
+,in_x86_movhpd
+,in_x86_movlpd
+,in_x86_movupd
+,in_x86_movsd_from_mem
+,in_x86_movd_from_mem
+,in_x86_pmovsxbd_from_mem
+,in_x86_pmovzxbd_from_mem
+,in_x86_pmovsxwq_from_mem
+,in_x86_pmovzxwq_from_mem
 : //out r0:xmm;r1:ptr32;
 : //out r0:xmm;r1:ptr32;
   begin
   begin
     CheckParameters(1);
     CheckParameters(1);
     resultdef:=x86_m128type;
     resultdef:=x86_m128type;
   end;
   end;
+in_x86_movaps
+,in_x86_movups
+,in_x86_sqrtpd_from_mem
+,in_x86_movdqa_from_mem
+,in_x86_movdqu_from_mem
+,in_x86_movsldup_from_mem
+,in_x86_movshdup_from_mem
+,in_x86_lddqu
+,in_x86_pabsb_from_mem
+,in_x86_pabsw_from_mem
+,in_x86_pabsd_from_mem
+,in_x86_movntdqa
+: //out r0:xmm;r1:ptr128;
+  begin
+    CheckParameters(1);
+    resultdef:=x86_m128type;
+  end;
 in_x86_movss_to_mem
 in_x86_movss_to_mem
-,in_x86_movaps_to_mem
-,in_x86_movups_to_mem
+,in_x86_movapd_to_mem
+,in_x86_movntpd_to_mem
+,in_x86_movhpd_to_mem
+,in_x86_movlpd_to_mem
+,in_x86_movupd_to_mem
+,in_x86_movsd_to_mem
+,in_x86_movd_to_mem
 : //r0:ptr32;r1:xmm;
 : //r0:ptr32;r1:xmm;
   begin
   begin
     CheckParameters(2);
     CheckParameters(2);
     resultdef:=voidtype;
     resultdef:=voidtype;
   end;
   end;
+in_x86_movaps_to_mem
+,in_x86_movups_to_mem
+,in_x86_movdqa
+,in_x86_movdqu
+,in_x86_movntdq
+: //r0:ptr128;r1:xmm;
+  begin
+    CheckParameters(2);
+    resultdef:=voidtype;
+  end;
 in_x86_movss_to_val
 in_x86_movss_to_val
 : //out r0:f32;r1:xmm;
 : //out r0:f32;r1:xmm;
   begin
   begin
@@ -28,11 +68,32 @@ in_x86_movss_from_val
   end;
   end;
 in_x86_movlps
 in_x86_movlps
 ,in_x86_movhps
 ,in_x86_movhps
-: //var r0:xmm;r1:ptr32;
+,in_x86_cvtpi2ps_from_mem
+,in_x86_addsd_from_mem
+,in_x86_divsd_from_mem
+,in_x86_maxsd_from_mem
+,in_x86_minsd_from_mem
+,in_x86_mulsd_from_mem
+,in_x86_subsd_from_mem
+,in_x86_comisd_from_mem
+,in_x86_ucomisd_from_mem
+,in_x86_cvtdq2pd_from_mem
+,in_x86_cvtpi2pd_from_mem
+,in_x86_cvtps2pd_from_mem
+,in_x86_cvtsd2ss_from_mem
+: //var r0:xmm;r1:ptr64;
   begin
   begin
     CheckParameters(2);
     CheckParameters(2);
     resultdef:=x86_m128type;
     resultdef:=x86_m128type;
   end;
   end;
+in_x86_movlps_to_mem
+,in_x86_movhps_to_mem
+,in_x86_movq_to_mem
+: //r0:ptr64;r1:xmm;
+  begin
+    CheckParameters(2);
+    resultdef:=voidtype;
+  end;
 in_x86_movlhps
 in_x86_movlhps
 ,in_x86_movhlps
 ,in_x86_movhlps
 ,in_x86_addss
 ,in_x86_addss
@@ -59,41 +120,349 @@ in_x86_movlhps
 ,in_x86_andnps
 ,in_x86_andnps
 ,in_x86_unpckhps
 ,in_x86_unpckhps
 ,in_x86_unpcklps
 ,in_x86_unpcklps
-,in_x86_pmulhuw
-,in_x86_psadbw
-,in_x86_pavgb
-,in_x86_pavgw
-,in_x86_pmaxub
-,in_x86_pminub
-,in_x86_pmaxsw
-,in_x86_pminsw
+,in_x86_addpd
+,in_x86_addsd
+,in_x86_divpd
+,in_x86_divsd
+,in_x86_maxpd
+,in_x86_maxsd
+,in_x86_minpd
+,in_x86_minsd
+,in_x86_mulpd
+,in_x86_mulsd
+,in_x86_subpd
+,in_x86_subsd
+,in_x86_andpd
+,in_x86_andnpd
+,in_x86_orpd
+,in_x86_xorpd
+,in_x86_comisd
+,in_x86_ucomisd
+,in_x86_unpckhpd
+,in_x86_unpcklpd
+,in_x86_cvtdq2pd
+,in_x86_cvtdq2ps
+,in_x86_cvtpd2dq
+,in_x86_cvtpd2ps
+,in_x86_cvtps2dq
+,in_x86_cvtps2pd
+,in_x86_cvtsd2ss
+,in_x86_cvtss2sd
+,in_x86_cvttpd2dq
+,in_x86_cvttps2dq
+,in_x86_packssdw
+,in_x86_packsswb
+,in_x86_packuswb
+,in_x86_paddb
+,in_x86_paddw
+,in_x86_paddd
+,in_x86_paddq
+,in_x86_paddsb
+,in_x86_paddsw
+,in_x86_paddusb
+,in_x86_paddusw
+,in_x86_pand
+,in_x86_pandn
+,in_x86_por
+,in_x86_pxor
+,in_x86_pcmpeqb
+,in_x86_pcmpeqw
+,in_x86_pcmpeqd
+,in_x86_pcmpgtb
+,in_x86_pcmpgtw
+,in_x86_pcmpgtd
+,in_x86_pmullw
+,in_x86_pmulhw
+,in_x86_pmulhuw_sse2
+,in_x86_pmuludq
+,in_x86_psllw_sse2
+,in_x86_pslld_sse2
+,in_x86_psllq_sse2
+,in_x86_psrad_sse2
+,in_x86_psraw_sse2
+,in_x86_psrlw_sse2
+,in_x86_psrld_sse2
+,in_x86_psrlq_sse2
+,in_x86_psubb
+,in_x86_psubw
+,in_x86_psubd
+,in_x86_psubq
+,in_x86_psubsb
+,in_x86_psubsw
+,in_x86_pmaddwd
+,in_x86_psubusb
+,in_x86_psubusw
+,in_x86_punpckhbw
+,in_x86_punpckhwd
+,in_x86_punpckhdq
+,in_x86_punpcklbw
+,in_x86_punpcklwd
+,in_x86_punpckldq
+,in_x86_pavgb_sse2
+,in_x86_pavgw_sse2
+,in_x86_pminub_sse2
+,in_x86_pminsw_sse2
+,in_x86_pmaxsw_sse2
+,in_x86_pmaxub_sse2
+,in_x86_psadbw_sse2
+,in_x86_punpckhqdq
+,in_x86_punpcklqdq
+,in_x86_addsubps
+,in_x86_addsubpd
+,in_x86_haddps
+,in_x86_haddpd
+,in_x86_hsubps
+,in_x86_hsubpd
+,in_x86_psignb
+,in_x86_psignw
+,in_x86_psignd
+,in_x86_pshufb
+,in_x86_pmulhrsw
+,in_x86_pmaddubsw
+,in_x86_phsubw
+,in_x86_phsubsw
+,in_x86_phsubd
+,in_x86_phaddsw
+,in_x86_phaddw
+,in_x86_phaddd
+,in_x86_phminposuw
+,in_x86_pmulld
+,in_x86_pmuldq
+,in_x86_pminsb
+,in_x86_pminuw
+,in_x86_pminsd
+,in_x86_pminud
+,in_x86_pmaxsb
+,in_x86_pmaxuw
+,in_x86_pmaxsd
+,in_x86_pmaxud
+,in_x86_ptest
+,in_x86_pcmpeqq
+,in_x86_packusdw
+,in_x86_pcmpgtq
 : //var r0:xmm;r1:xmm;
 : //var r0:xmm;r1:xmm;
   begin
   begin
     CheckParameters(2);
     CheckParameters(2);
     resultdef:=x86_m128type;
     resultdef:=x86_m128type;
   end;
   end;
+in_x86_addss_from_mem
+,in_x86_subss_from_mem
+,in_x86_mulss_from_mem
+,in_x86_divss_from_mem
+,in_x86_rcpss_from_mem
+,in_x86_sqrtss_from_mem
+,in_x86_maxss_from_mem
+,in_x86_minss_from_mem
+,in_x86_rsqrtss_from_mem
+,in_x86_cvtsi2ss_from_mem
+,in_x86_cvtsi2sd_from_mem
+,in_x86_cvtss2sd_from_mem
+: //var r0:xmm;r1:ptr32;
+  begin
+    CheckParameters(2);
+    resultdef:=x86_m128type;
+  end;
+in_x86_addps_from_mem
+,in_x86_subps_from_mem
+,in_x86_mulps_from_mem
+,in_x86_divps_from_mem
+,in_x86_rcpps_from_mem
+,in_x86_sqrtps_from_mem
+,in_x86_maxps_from_mem
+,in_x86_minps_from_mem
+,in_x86_rsqrtps_from_mem
+,in_x86_andps_from_mem
+,in_x86_orps_from_mem
+,in_x86_xorps_from_mem
+,in_x86_andnps_from_mem
+,in_x86_unpckhps_from_mem
+,in_x86_unpcklps_from_mem
+,in_x86_addpd_from_mem
+,in_x86_divpd_from_mem
+,in_x86_maxpd_from_mem
+,in_x86_minpd_from_mem
+,in_x86_mulpd_from_mem
+,in_x86_subpd_from_mem
+,in_x86_andpd_from_mem
+,in_x86_andnpd_from_mem
+,in_x86_orpd_from_mem
+,in_x86_xorpd_from_mem
+,in_x86_unpckhpd_from_mem
+,in_x86_unpcklpd_from_mem
+,in_x86_cvtdq2ps_from_mem
+,in_x86_cvtpd2dq_from_mem
+,in_x86_cvtpd2ps_from_mem
+,in_x86_cvtps2dq_from_mem
+,in_x86_cvttpd2dq_from_mem
+,in_x86_cvttps2dq_from_mem
+,in_x86_packssdw_from_mem
+,in_x86_packsswb_from_mem
+,in_x86_packuswb_from_mem
+,in_x86_paddb_from_mem
+,in_x86_paddw_from_mem
+,in_x86_paddd_from_mem
+,in_x86_paddq_from_mem
+,in_x86_paddsb_from_mem
+,in_x86_paddsw_from_mem
+,in_x86_paddusb_from_mem
+,in_x86_paddusw_from_mem
+,in_x86_pand_from_mem
+,in_x86_pandn_from_mem
+,in_x86_por_from_mem
+,in_x86_pxor_from_mem
+,in_x86_pcmpeqb_from_mem
+,in_x86_pcmpeqw_from_mem
+,in_x86_pcmpeqd_from_mem
+,in_x86_pcmpgtb_from_mem
+,in_x86_pcmpgtw_from_mem
+,in_x86_pcmpgtd_from_mem
+,in_x86_pmullw_from_mem
+,in_x86_pmulhw_from_mem
+,in_x86_pmulhuw_from_mem
+,in_x86_pmuludq_from_mem
+,in_x86_psllw_from_mem
+,in_x86_pslld_from_mem
+,in_x86_psllq_from_mem
+,in_x86_psrad_from_mem
+,in_x86_psraw_from_mem
+,in_x86_psrlw_from_mem
+,in_x86_psrld_from_mem
+,in_x86_psrlq_from_mem
+,in_x86_psubb_from_mem
+,in_x86_psubw_from_mem
+,in_x86_psubd_from_mem
+,in_x86_psubq_from_mem
+,in_x86_psubsb_from_mem
+,in_x86_psubsw_from_mem
+,in_x86_pmaddwd_from_mem
+,in_x86_psubusb_from_mem
+,in_x86_psubusw_from_mem
+,in_x86_punpckhbw_from_mem
+,in_x86_punpckhwd_from_mem
+,in_x86_punpckhdq_from_mem
+,in_x86_punpcklbw_from_mem
+,in_x86_punpcklwd_from_mem
+,in_x86_punpckldq_from_mem
+,in_x86_pavgb_from_mem
+,in_x86_pavgw_from_mem
+,in_x86_pminub_from_mem
+,in_x86_pminsw_from_mem
+,in_x86_pmaxsw_from_mem
+,in_x86_pmaxub_from_mem
+,in_x86_psadbw_from_mem
+,in_x86_punpckhqdq_from_mem
+,in_x86_punpcklqdq_from_mem
+,in_x86_addsubps_from_mem
+,in_x86_addsubpd_from_mem
+,in_x86_haddps_from_mem
+,in_x86_haddpd_from_mem
+,in_x86_hsubps_from_mem
+,in_x86_hsubpd_from_mem
+,in_x86_psignb_from_mem
+,in_x86_psignw_from_mem
+,in_x86_psignd_from_mem
+,in_x86_pshufb_from_mem
+,in_x86_pmulhrsw_from_mem
+,in_x86_pmaddubsw_from_mem
+,in_x86_phsubw_from_mem
+,in_x86_phsubsw_from_mem
+,in_x86_phsubd_from_mem
+,in_x86_phaddsw_from_mem
+,in_x86_phaddw_from_mem
+,in_x86_phaddd_from_mem
+,in_x86_phminposuw_from_mem
+,in_x86_pmulld_from_mem
+,in_x86_pmuldq_from_mem
+,in_x86_pminsb_from_mem
+,in_x86_pminuw_from_mem
+,in_x86_pminsd_from_mem
+,in_x86_pminud_from_mem
+,in_x86_pmaxsb_from_mem
+,in_x86_pmaxuw_from_mem
+,in_x86_pmaxsd_from_mem
+,in_x86_pmaxud_from_mem
+,in_x86_ptest_from_mem
+,in_x86_pcmpeqq_from_mem
+,in_x86_packusdw_from_mem
+,in_x86_pcmpgtq_from_mem
+: //var r0:xmm;r1:ptr128;
+  begin
+    CheckParameters(2);
+    resultdef:=x86_m128type;
+  end;
 in_x86_cmpss
 in_x86_cmpss
 ,in_x86_cmpps
 ,in_x86_cmpps
 ,in_x86_shufps
 ,in_x86_shufps
+,in_x86_cmppd
+,in_x86_cmpsd
+,in_x86_shufpd
+,in_x86_palignr
+,in_x86_dpps
+,in_x86_dppd
+,in_x86_blendps
+,in_x86_blendpd
+,in_x86_insertps
+,in_x86_mpsadbw
+,in_x86_pblendw
+,in_x86_pcmpestri
+,in_x86_pcmpestrm
+,in_x86_pcmpistri
+,in_x86_pcmpistrm
 : //var r0:xmm;r1:xmm;imm:i32;
 : //var r0:xmm;r1:xmm;imm:i32;
   begin
   begin
     CheckParameters(3);
     CheckParameters(3);
     resultdef:=x86_m128type;
     resultdef:=x86_m128type;
   end;
   end;
+in_x86_cmpss_from_mem
+,in_x86_insertps_from_mem
+,in_x86_pinsrd_from_mem
+: //var r0:xmm;r1:ptr32;imm:i32;
+  begin
+    CheckParameters(3);
+    resultdef:=x86_m128type;
+  end;
+in_x86_cmpps_from_mem
+,in_x86_shufps_from_mem
+,in_x86_cmppd_from_mem
+,in_x86_shufpd_from_mem
+,in_x86_palignr_from_mem
+,in_x86_dpps_from_mem
+,in_x86_dppd_from_mem
+,in_x86_blendps_from_mem
+,in_x86_blendpd_from_mem
+,in_x86_mpsadbw_from_mem
+,in_x86_pblendw_from_mem
+,in_x86_pcmpestri_from_mem
+,in_x86_pcmpestrm_from_mem
+,in_x86_pcmpistri_from_mem
+,in_x86_pcmpistrm_from_mem
+: //var r0:xmm;r1:ptr128;imm:i32;
+  begin
+    CheckParameters(3);
+    resultdef:=x86_m128type;
+  end;
 in_x86_cvtsi2ss
 in_x86_cvtsi2ss
-: //var r0:xmm;r1:r32;
+: //var r0:xmm;r1:reg;
   begin
   begin
     CheckParameters(2);
     CheckParameters(2);
     resultdef:=x86_m128type;
     resultdef:=x86_m128type;
   end;
   end;
 in_x86_cvtss2si
 in_x86_cvtss2si
 ,in_x86_cvttss2si
 ,in_x86_cvttss2si
-: //out r0:r32;r1:xmm;
+: //out r0:reg;r1:xmm;
   begin
   begin
     CheckParameters(1);
     CheckParameters(1);
-    resultdef:=u32inttype;
+    resultdef:=uinttype;
+  end;
+in_x86_cvtss2si_from_mem
+,in_x86_cvttss2si_from_mem
+: //out r0:reg;r1:ptr32;
+  begin
+    CheckParameters(1);
+    resultdef:=uinttype;
   end;
   end;
 in_x86_cvtpi2ps
 in_x86_cvtpi2ps
+,in_x86_cvtpi2pd
 : //var r0:xmm;r1:mm;
 : //var r0:xmm;r1:mm;
   begin
   begin
     CheckParameters(2);
     CheckParameters(2);
@@ -101,11 +470,19 @@ in_x86_cvtpi2ps
   end;
   end;
 in_x86_cvtps2pi
 in_x86_cvtps2pi
 ,in_x86_cvttps2pi
 ,in_x86_cvttps2pi
+,in_x86_movdq2q
 : //out r0:mm;r1:xmm;
 : //out r0:mm;r1:xmm;
   begin
   begin
     CheckParameters(1);
     CheckParameters(1);
     resultdef:=x86_m64type;
     resultdef:=x86_m64type;
   end;
   end;
+in_x86_cvtps2pi_from_mem
+,in_x86_cvttps2pi_from_mem
+: //out r0:mm;r1:ptr64;
+  begin
+    CheckParameters(1);
+    resultdef:=x86_m64type;
+  end;
 in_x86_pmulhuw_mmx
 in_x86_pmulhuw_mmx
 ,in_x86_psadbw_mmx
 ,in_x86_psadbw_mmx
 ,in_x86_pavgb_mmx
 ,in_x86_pavgb_mmx
@@ -119,23 +496,36 @@ in_x86_pmulhuw_mmx
     CheckParameters(2);
     CheckParameters(2);
     resultdef:=x86_m64type;
     resultdef:=x86_m64type;
   end;
   end;
+in_x86_pmulhuw_mmx_from_mem
+,in_x86_psadbw_mmx_from_mem
+,in_x86_pavgb_mmx_from_mem
+,in_x86_pavgw_mmx_from_mem
+,in_x86_pmaxub_mmx_from_mem
+,in_x86_pminub_mmx_from_mem
+,in_x86_pmaxsw_mmx_from_mem
+,in_x86_pminsw_mmx_from_mem
+: //var r0:mm;r1:ptr64;
+  begin
+    CheckParameters(2);
+    resultdef:=x86_m64type;
+  end;
 in_x86_pextrw_mmx
 in_x86_pextrw_mmx
-: //out r0:r32;r1:mm;imm:i32;
+: //out r0:reg;r1:mm;imm:i32;
   begin
   begin
     CheckParameters(2);
     CheckParameters(2);
-    resultdef:=u32inttype;
+    resultdef:=uinttype;
   end;
   end;
 in_x86_pinsrw_mmx
 in_x86_pinsrw_mmx
-: //var r0:mm;r1:r32;imm:i32;
+: //var r0:mm;r1:reg;imm:i32;
   begin
   begin
     CheckParameters(3);
     CheckParameters(3);
     resultdef:=x86_m64type;
     resultdef:=x86_m64type;
   end;
   end;
-in_x86_pmovmskb
-: //out r0:r32;r1:mm;
+in_x86_pmovmskb_mmx
+: //out r0:reg;r1:mm;
   begin
   begin
     CheckParameters(1);
     CheckParameters(1);
-    resultdef:=u32inttype;
+    resultdef:=uinttype;
   end;
   end;
 in_x86_pshufw
 in_x86_pshufw
 : //out r0:mm;r1:mm;imm:i32;
 : //out r0:mm;r1:mm;imm:i32;
@@ -143,15 +533,293 @@ in_x86_pshufw
     CheckParameters(2);
     CheckParameters(2);
     resultdef:=x86_m64type;
     resultdef:=x86_m64type;
   end;
   end;
-in_x86_pextrw
-: //out r0:r32;r1:xmm;imm:i32;
+in_x86_pshufw_from_mem
+: //out r0:mm;r1:ptr64;imm:i32;
+  begin
+    CheckParameters(2);
+    resultdef:=x86_m64type;
+  end;
+in_x86_movmskpd
+,in_x86_movd_to_reg
+: //out r0:r32;r1:xmm;
+  begin
+    CheckParameters(1);
+    resultdef:=u32inttype;
+  end;
+in_x86_movsd_to_val
+: //out r0:f64;r1:xmm;
+  begin
+    CheckParameters(1);
+    resultdef:=s64floattype;
+  end;
+in_x86_movsd_from_val
+: //out r0:xmm;r1:f64;
+  begin
+    CheckParameters(1);
+    resultdef:=x86_m128type;
+  end;
+in_x86_sqrtpd
+,in_x86_sqrtsd
+,in_x86_movddup
+,in_x86_movsldup
+,in_x86_movshdup
+,in_x86_pabsb
+,in_x86_pabsw
+,in_x86_pabsd
+,in_x86_pmovsxbw
+,in_x86_pmovzxbw
+,in_x86_pmovsxbd
+,in_x86_pmovzxbd
+,in_x86_pmovsxbq
+,in_x86_pmovzxbq
+,in_x86_pmovsxwd
+,in_x86_pmovzxwd
+,in_x86_pmovsxwq
+,in_x86_pmovzxwq
+,in_x86_pmovsxdq
+,in_x86_pmovzxdq
+: //out r0:xmm;r1:xmm;
+  begin
+    CheckParameters(1);
+    resultdef:=x86_m128type;
+  end;
+in_x86_sqrtsd_from_mem
+,in_x86_movq_from_mem
+,in_x86_movddup_from_mem
+,in_x86_pmovsxbw_from_mem
+,in_x86_pmovzxbw_from_mem
+,in_x86_pmovsxwd_from_mem
+,in_x86_pmovzxwd_from_mem
+,in_x86_pmovsxdq_from_mem
+,in_x86_pmovzxdq_from_mem
+: //out r0:xmm;r1:ptr64;
+  begin
+    CheckParameters(1);
+    resultdef:=x86_m128type;
+  end;
+in_x86_cmpsd_from_mem
+: //var r0:xmm;r1:ptr64;imm:i32;
+  begin
+    CheckParameters(3);
+    resultdef:=x86_m128type;
+  end;
+in_x86_cvtpd2pi
+,in_x86_cvttpd2pi
+: //var r0:mm;r1:xmm;
+  begin
+    CheckParameters(2);
+    resultdef:=x86_m64type;
+  end;
+in_x86_cvtpd2pi_from_mem
+,in_x86_cvttpd2pi_from_mem
+: //var r0:mm;r1:ptr128;
+  begin
+    CheckParameters(2);
+    resultdef:=x86_m64type;
+  end;
+in_x86_cvtsd2si
+,in_x86_cvttsd2si
+: //var r0:sreg;r1:xmm;
+  begin
+    CheckParameters(2);
+    resultdef:=sinttype;
+  end;
+in_x86_cvtsd2si_from_mem
+,in_x86_cvttsd2si_from_mem
+: //var r0:sreg;r1:ptr64;
+  begin
+    CheckParameters(2);
+    resultdef:=sinttype;
+  end;
+in_x86_cvtsi2sd
+: //var r0:xmm;r1:r32;
+  begin
+    CheckParameters(2);
+    resultdef:=x86_m128type;
+  end;
+in_x86_movd_from_reg
+: //out r0:xmm;r1:r32;
+  begin
+    CheckParameters(1);
+    resultdef:=x86_m128type;
+  end;
+in_x86_pmovmskb
+: //var r0:r32;r1:xmm;
   begin
   begin
     CheckParameters(2);
     CheckParameters(2);
     resultdef:=u32inttype;
     resultdef:=u32inttype;
   end;
   end;
-in_x86_pinsrw
+in_x86_pextrw_sse2
+: //out r0:r16;r1:xmm;imm:i32;
+  begin
+    CheckParameters(2);
+    resultdef:=u16inttype;
+  end;
+in_x86_pinsrw_sse2
+,in_x86_pinsrb
+,in_x86_pinsrd
 : //var r0:xmm;r1:r32;imm:i32;
 : //var r0:xmm;r1:r32;imm:i32;
   begin
   begin
     CheckParameters(3);
     CheckParameters(3);
     resultdef:=x86_m128type;
     resultdef:=x86_m128type;
   end;
   end;
+in_x86_pinsrw_from_mem
+: //var r0:xmm;r1:ptr16;imm:i32;
+  begin
+    CheckParameters(3);
+    resultdef:=x86_m128type;
+  end;
+in_x86_psllw_sse2_imm
+,in_x86_pslld_sse2_imm
+,in_x86_psllq_sse2_imm
+,in_x86_psrad_sse2_imm
+,in_x86_psraw_sse2_imm
+,in_x86_psrlw_sse2_imm
+,in_x86_psrld_sse2_imm
+,in_x86_psrlq_sse2_imm
+,in_x86_pslldq
+,in_x86_psrldq
+: //var r0:xmm;imm:i32;
+  begin
+    CheckParameters(2);
+    resultdef:=x86_m128type;
+  end;
+in_x86_maskmovdqu
+: //addr:edi_ptr;r0:xmm;r1:xmm;
+  begin
+    CheckParameters(3);
+    resultdef:=voidtype;
+  end;
+in_x86_movq2dq
+: //out r0:xmm;r1:mm;
+  begin
+    CheckParameters(1);
+    resultdef:=x86_m128type;
+  end;
+in_x86_pshufhw
+,in_x86_pshuflw
+,in_x86_pshufd
+,in_x86_roundps
+,in_x86_roundss
+,in_x86_roundpd
+,in_x86_roundsd
+: //out r0:xmm;r1:xmm;imm:i32;
+  begin
+    CheckParameters(2);
+    resultdef:=x86_m128type;
+  end;
+in_x86_pshufhw_from_mem
+,in_x86_pshuflw_from_mem
+,in_x86_pshufd_from_mem
+,in_x86_roundps_from_mem
+,in_x86_roundpd_from_mem
+: //out r0:xmm;r1:ptr128;imm:i32;
+  begin
+    CheckParameters(2);
+    resultdef:=x86_m128type;
+  end;
+in_x86_blendvps
+,in_x86_blendvpd
+,in_x86_pblendvb
+: //var r0:xmm;r1:xmm;mask:implicit_xmm0;
+  begin
+    CheckParameters(3);
+    resultdef:=x86_m128type;
+  end;
+in_x86_blendvps_from_mem
+,in_x86_blendvpd_from_mem
+,in_x86_pblendvb_from_mem
+: //var r0:xmm;r1:ptr128;mask:implicit_xmm0;
+  begin
+    CheckParameters(3);
+    resultdef:=x86_m128type;
+  end;
+in_x86_roundss_from_mem
+: //out r0:xmm;r1:ptr32;imm:i32;
+  begin
+    CheckParameters(2);
+    resultdef:=x86_m128type;
+  end;
+in_x86_roundsd_from_mem
+: //out r0:xmm;r1:ptr64;imm:i32;
+  begin
+    CheckParameters(2);
+    resultdef:=x86_m128type;
+  end;
+in_x86_extractps
+,in_x86_pextrd
+: //out r0:r32;r1:xmm;imm:i32;
+  begin
+    CheckParameters(2);
+    resultdef:=u32inttype;
+  end;
+in_x86_extractps_from_mem
+,in_x86_pextrd_to_mem
+: //r0:ptr32;r1:xmm;imm:i32;
+  begin
+    CheckParameters(3);
+    resultdef:=voidtype;
+  end;
+in_x86_pinsrb_from_mem
+: //var r0:xmm;r1:ptr8;imm:i32;
+  begin
+    CheckParameters(3);
+    resultdef:=x86_m128type;
+  end;
+{$ifdef X86_64}
+in_x86_pinsrq
+: //var r0:xmm;r1:reg;imm:i32;
+  begin
+    CheckParameters(3);
+    resultdef:=x86_m128type;
+  end;
+{$endif}
+{$ifdef X86_64}
+in_x86_pinsrq_from_mem
+: //var r0:xmm;r1:ptr64;imm:i32;
+  begin
+    CheckParameters(3);
+    resultdef:=x86_m128type;
+  end;
+{$endif}
+in_x86_pextrb
+: //out r0:r8;r1:xmm;imm:i32;
+  begin
+    CheckParameters(2);
+    resultdef:=u8inttype;
+  end;
+in_x86_pextrb_to_mem
+: //r0:ptr8;r1:xmm;imm:i32;
+  begin
+    CheckParameters(3);
+    resultdef:=voidtype;
+  end;
+in_x86_pextrw_sse41_to_mem
+: //r0:ptr16;r1:xmm;imm:i32;
+  begin
+    CheckParameters(3);
+    resultdef:=voidtype;
+  end;
+{$ifdef X86_64}
+in_x86_pextrq
+: //out r0:r32;r1:xmm;imm:i32;
+  begin
+    CheckParameters(2);
+    resultdef:=u32inttype;
+  end;
+{$endif}
+{$ifdef X86_64}
+in_x86_pextrq_to_mem
+: //r0:ptr64;r1:xmm;imm:i32;
+  begin
+    CheckParameters(3);
+    resultdef:=voidtype;
+  end;
+{$endif}
+in_x86_pmovsxbq_from_mem
+,in_x86_pmovzxbq_from_mem
+: //out r0:xmm;r1:ptr16;
+  begin
+    CheckParameters(1);
+    resultdef:=x86_m128type;
+  end;

+ 492 - 57
rtl/i386/cpumminnr.inc

@@ -8,60 +8,495 @@
   fpc_in_x86_movss_from_val = fpc_in_x86_mm_first+7;
   fpc_in_x86_movss_from_val = fpc_in_x86_mm_first+7;
   fpc_in_x86_movlps = fpc_in_x86_mm_first+8;
   fpc_in_x86_movlps = fpc_in_x86_mm_first+8;
   fpc_in_x86_movhps = fpc_in_x86_mm_first+9;
   fpc_in_x86_movhps = fpc_in_x86_mm_first+9;
-  fpc_in_x86_movlhps = fpc_in_x86_mm_first+10;
-  fpc_in_x86_movhlps = fpc_in_x86_mm_first+11;
-  fpc_in_x86_addss = fpc_in_x86_mm_first+12;
-  fpc_in_x86_subss = fpc_in_x86_mm_first+13;
-  fpc_in_x86_mulss = fpc_in_x86_mm_first+14;
-  fpc_in_x86_divss = fpc_in_x86_mm_first+15;
-  fpc_in_x86_rcpss = fpc_in_x86_mm_first+16;
-  fpc_in_x86_sqrtss = fpc_in_x86_mm_first+17;
-  fpc_in_x86_maxss = fpc_in_x86_mm_first+18;
-  fpc_in_x86_minss = fpc_in_x86_mm_first+19;
-  fpc_in_x86_rsqrtss = fpc_in_x86_mm_first+20;
-  fpc_in_x86_addps = fpc_in_x86_mm_first+21;
-  fpc_in_x86_subps = fpc_in_x86_mm_first+22;
-  fpc_in_x86_mulps = fpc_in_x86_mm_first+23;
-  fpc_in_x86_divps = fpc_in_x86_mm_first+24;
-  fpc_in_x86_rcpps = fpc_in_x86_mm_first+25;
-  fpc_in_x86_sqrtps = fpc_in_x86_mm_first+26;
-  fpc_in_x86_maxps = fpc_in_x86_mm_first+27;
-  fpc_in_x86_minps = fpc_in_x86_mm_first+28;
-  fpc_in_x86_rsqrtps = fpc_in_x86_mm_first+29;
-  fpc_in_x86_andps = fpc_in_x86_mm_first+30;
-  fpc_in_x86_orps = fpc_in_x86_mm_first+31;
-  fpc_in_x86_xorps = fpc_in_x86_mm_first+32;
-  fpc_in_x86_andnps = fpc_in_x86_mm_first+33;
-  fpc_in_x86_cmpss = fpc_in_x86_mm_first+34;
-  fpc_in_x86_cmpps = fpc_in_x86_mm_first+35;
-  fpc_in_x86_shufps = fpc_in_x86_mm_first+36;
-  fpc_in_x86_unpckhps = fpc_in_x86_mm_first+37;
-  fpc_in_x86_unpcklps = fpc_in_x86_mm_first+38;
-  fpc_in_x86_cvtsi2ss = fpc_in_x86_mm_first+39;
-  fpc_in_x86_cvtss2si = fpc_in_x86_mm_first+40;
-  fpc_in_x86_cvttss2si = fpc_in_x86_mm_first+41;
-  fpc_in_x86_cvtpi2ps = fpc_in_x86_mm_first+42;
-  fpc_in_x86_cvtps2pi = fpc_in_x86_mm_first+43;
-  fpc_in_x86_cvttps2pi = fpc_in_x86_mm_first+44;
-  fpc_in_x86_pmulhuw_mmx = fpc_in_x86_mm_first+45;
-  fpc_in_x86_psadbw_mmx = fpc_in_x86_mm_first+46;
-  fpc_in_x86_pavgb_mmx = fpc_in_x86_mm_first+47;
-  fpc_in_x86_pavgw_mmx = fpc_in_x86_mm_first+48;
-  fpc_in_x86_pmaxub_mmx = fpc_in_x86_mm_first+49;
-  fpc_in_x86_pminub_mmx = fpc_in_x86_mm_first+50;
-  fpc_in_x86_pmaxsw_mmx = fpc_in_x86_mm_first+51;
-  fpc_in_x86_pminsw_mmx = fpc_in_x86_mm_first+52;
-  fpc_in_x86_pextrw_mmx = fpc_in_x86_mm_first+53;
-  fpc_in_x86_pinsrw_mmx = fpc_in_x86_mm_first+54;
-  fpc_in_x86_pmovmskb = fpc_in_x86_mm_first+55;
-  fpc_in_x86_pshufw = fpc_in_x86_mm_first+56;
-  fpc_in_x86_pmulhuw = fpc_in_x86_mm_first+57;
-  fpc_in_x86_psadbw = fpc_in_x86_mm_first+58;
-  fpc_in_x86_pavgb = fpc_in_x86_mm_first+59;
-  fpc_in_x86_pavgw = fpc_in_x86_mm_first+60;
-  fpc_in_x86_pmaxub = fpc_in_x86_mm_first+61;
-  fpc_in_x86_pminub = fpc_in_x86_mm_first+62;
-  fpc_in_x86_pmaxsw = fpc_in_x86_mm_first+63;
-  fpc_in_x86_pminsw = fpc_in_x86_mm_first+64;
-  fpc_in_x86_pextrw = fpc_in_x86_mm_first+65;
-  fpc_in_x86_pinsrw = fpc_in_x86_mm_first+66;
+  fpc_in_x86_movlps_to_mem = fpc_in_x86_mm_first+10;
+  fpc_in_x86_movhps_to_mem = fpc_in_x86_mm_first+11;
+  fpc_in_x86_movlhps = fpc_in_x86_mm_first+12;
+  fpc_in_x86_movhlps = fpc_in_x86_mm_first+13;
+  fpc_in_x86_addss = fpc_in_x86_mm_first+14;
+  fpc_in_x86_addss_from_mem = fpc_in_x86_mm_first+15;
+  fpc_in_x86_subss = fpc_in_x86_mm_first+16;
+  fpc_in_x86_subss_from_mem = fpc_in_x86_mm_first+17;
+  fpc_in_x86_mulss = fpc_in_x86_mm_first+18;
+  fpc_in_x86_mulss_from_mem = fpc_in_x86_mm_first+19;
+  fpc_in_x86_divss = fpc_in_x86_mm_first+20;
+  fpc_in_x86_divss_from_mem = fpc_in_x86_mm_first+21;
+  fpc_in_x86_rcpss = fpc_in_x86_mm_first+22;
+  fpc_in_x86_rcpss_from_mem = fpc_in_x86_mm_first+23;
+  fpc_in_x86_sqrtss = fpc_in_x86_mm_first+24;
+  fpc_in_x86_sqrtss_from_mem = fpc_in_x86_mm_first+25;
+  fpc_in_x86_maxss = fpc_in_x86_mm_first+26;
+  fpc_in_x86_maxss_from_mem = fpc_in_x86_mm_first+27;
+  fpc_in_x86_minss = fpc_in_x86_mm_first+28;
+  fpc_in_x86_minss_from_mem = fpc_in_x86_mm_first+29;
+  fpc_in_x86_rsqrtss = fpc_in_x86_mm_first+30;
+  fpc_in_x86_rsqrtss_from_mem = fpc_in_x86_mm_first+31;
+  fpc_in_x86_addps = fpc_in_x86_mm_first+32;
+  fpc_in_x86_addps_from_mem = fpc_in_x86_mm_first+33;
+  fpc_in_x86_subps = fpc_in_x86_mm_first+34;
+  fpc_in_x86_subps_from_mem = fpc_in_x86_mm_first+35;
+  fpc_in_x86_mulps = fpc_in_x86_mm_first+36;
+  fpc_in_x86_mulps_from_mem = fpc_in_x86_mm_first+37;
+  fpc_in_x86_divps = fpc_in_x86_mm_first+38;
+  fpc_in_x86_divps_from_mem = fpc_in_x86_mm_first+39;
+  fpc_in_x86_rcpps = fpc_in_x86_mm_first+40;
+  fpc_in_x86_rcpps_from_mem = fpc_in_x86_mm_first+41;
+  fpc_in_x86_sqrtps = fpc_in_x86_mm_first+42;
+  fpc_in_x86_sqrtps_from_mem = fpc_in_x86_mm_first+43;
+  fpc_in_x86_maxps = fpc_in_x86_mm_first+44;
+  fpc_in_x86_maxps_from_mem = fpc_in_x86_mm_first+45;
+  fpc_in_x86_minps = fpc_in_x86_mm_first+46;
+  fpc_in_x86_minps_from_mem = fpc_in_x86_mm_first+47;
+  fpc_in_x86_rsqrtps = fpc_in_x86_mm_first+48;
+  fpc_in_x86_rsqrtps_from_mem = fpc_in_x86_mm_first+49;
+  fpc_in_x86_andps = fpc_in_x86_mm_first+50;
+  fpc_in_x86_andps_from_mem = fpc_in_x86_mm_first+51;
+  fpc_in_x86_orps = fpc_in_x86_mm_first+52;
+  fpc_in_x86_orps_from_mem = fpc_in_x86_mm_first+53;
+  fpc_in_x86_xorps = fpc_in_x86_mm_first+54;
+  fpc_in_x86_xorps_from_mem = fpc_in_x86_mm_first+55;
+  fpc_in_x86_andnps = fpc_in_x86_mm_first+56;
+  fpc_in_x86_andnps_from_mem = fpc_in_x86_mm_first+57;
+  fpc_in_x86_cmpss = fpc_in_x86_mm_first+58;
+  fpc_in_x86_cmpss_from_mem = fpc_in_x86_mm_first+59;
+  fpc_in_x86_cmpps = fpc_in_x86_mm_first+60;
+  fpc_in_x86_cmpps_from_mem = fpc_in_x86_mm_first+61;
+  fpc_in_x86_shufps = fpc_in_x86_mm_first+62;
+  fpc_in_x86_shufps_from_mem = fpc_in_x86_mm_first+63;
+  fpc_in_x86_unpckhps = fpc_in_x86_mm_first+64;
+  fpc_in_x86_unpckhps_from_mem = fpc_in_x86_mm_first+65;
+  fpc_in_x86_unpcklps = fpc_in_x86_mm_first+66;
+  fpc_in_x86_unpcklps_from_mem = fpc_in_x86_mm_first+67;
+  fpc_in_x86_cvtsi2ss = fpc_in_x86_mm_first+68;
+  fpc_in_x86_cvtsi2ss_from_mem = fpc_in_x86_mm_first+69;
+  fpc_in_x86_cvtss2si = fpc_in_x86_mm_first+70;
+  fpc_in_x86_cvtss2si_from_mem = fpc_in_x86_mm_first+71;
+  fpc_in_x86_cvttss2si = fpc_in_x86_mm_first+72;
+  fpc_in_x86_cvttss2si_from_mem = fpc_in_x86_mm_first+73;
+  fpc_in_x86_cvtpi2ps = fpc_in_x86_mm_first+74;
+  fpc_in_x86_cvtpi2ps_from_mem = fpc_in_x86_mm_first+75;
+  fpc_in_x86_cvtps2pi = fpc_in_x86_mm_first+76;
+  fpc_in_x86_cvtps2pi_from_mem = fpc_in_x86_mm_first+77;
+  fpc_in_x86_cvttps2pi = fpc_in_x86_mm_first+78;
+  fpc_in_x86_cvttps2pi_from_mem = fpc_in_x86_mm_first+79;
+  fpc_in_x86_pmulhuw_mmx = fpc_in_x86_mm_first+80;
+  fpc_in_x86_pmulhuw_mmx_from_mem = fpc_in_x86_mm_first+81;
+  fpc_in_x86_psadbw_mmx = fpc_in_x86_mm_first+82;
+  fpc_in_x86_psadbw_mmx_from_mem = fpc_in_x86_mm_first+83;
+  fpc_in_x86_pavgb_mmx = fpc_in_x86_mm_first+84;
+  fpc_in_x86_pavgb_mmx_from_mem = fpc_in_x86_mm_first+85;
+  fpc_in_x86_pavgw_mmx = fpc_in_x86_mm_first+86;
+  fpc_in_x86_pavgw_mmx_from_mem = fpc_in_x86_mm_first+87;
+  fpc_in_x86_pmaxub_mmx = fpc_in_x86_mm_first+88;
+  fpc_in_x86_pmaxub_mmx_from_mem = fpc_in_x86_mm_first+89;
+  fpc_in_x86_pminub_mmx = fpc_in_x86_mm_first+90;
+  fpc_in_x86_pminub_mmx_from_mem = fpc_in_x86_mm_first+91;
+  fpc_in_x86_pmaxsw_mmx = fpc_in_x86_mm_first+92;
+  fpc_in_x86_pmaxsw_mmx_from_mem = fpc_in_x86_mm_first+93;
+  fpc_in_x86_pminsw_mmx = fpc_in_x86_mm_first+94;
+  fpc_in_x86_pminsw_mmx_from_mem = fpc_in_x86_mm_first+95;
+  fpc_in_x86_pextrw_mmx = fpc_in_x86_mm_first+96;
+  fpc_in_x86_pinsrw_mmx = fpc_in_x86_mm_first+97;
+  fpc_in_x86_pmovmskb_mmx = fpc_in_x86_mm_first+98;
+  fpc_in_x86_pshufw = fpc_in_x86_mm_first+99;
+  fpc_in_x86_pshufw_from_mem = fpc_in_x86_mm_first+100;
+  fpc_in_x86_movapd = fpc_in_x86_mm_first+101;
+  fpc_in_x86_movapd_to_mem = fpc_in_x86_mm_first+102;
+  fpc_in_x86_movntpd_to_mem = fpc_in_x86_mm_first+103;
+  fpc_in_x86_movhpd = fpc_in_x86_mm_first+104;
+  fpc_in_x86_movhpd_to_mem = fpc_in_x86_mm_first+105;
+  fpc_in_x86_movlpd = fpc_in_x86_mm_first+106;
+  fpc_in_x86_movlpd_to_mem = fpc_in_x86_mm_first+107;
+  fpc_in_x86_movupd = fpc_in_x86_mm_first+108;
+  fpc_in_x86_movupd_to_mem = fpc_in_x86_mm_first+109;
+  fpc_in_x86_movmskpd = fpc_in_x86_mm_first+110;
+  fpc_in_x86_movsd_from_mem = fpc_in_x86_mm_first+111;
+  fpc_in_x86_movsd_to_mem = fpc_in_x86_mm_first+112;
+  fpc_in_x86_movsd_to_val = fpc_in_x86_mm_first+113;
+  fpc_in_x86_movsd_from_val = fpc_in_x86_mm_first+114;
+  fpc_in_x86_addpd = fpc_in_x86_mm_first+115;
+  fpc_in_x86_addpd_from_mem = fpc_in_x86_mm_first+116;
+  fpc_in_x86_addsd = fpc_in_x86_mm_first+117;
+  fpc_in_x86_addsd_from_mem = fpc_in_x86_mm_first+118;
+  fpc_in_x86_divpd = fpc_in_x86_mm_first+119;
+  fpc_in_x86_divpd_from_mem = fpc_in_x86_mm_first+120;
+  fpc_in_x86_divsd = fpc_in_x86_mm_first+121;
+  fpc_in_x86_divsd_from_mem = fpc_in_x86_mm_first+122;
+  fpc_in_x86_maxpd = fpc_in_x86_mm_first+123;
+  fpc_in_x86_maxpd_from_mem = fpc_in_x86_mm_first+124;
+  fpc_in_x86_maxsd = fpc_in_x86_mm_first+125;
+  fpc_in_x86_maxsd_from_mem = fpc_in_x86_mm_first+126;
+  fpc_in_x86_minpd = fpc_in_x86_mm_first+127;
+  fpc_in_x86_minpd_from_mem = fpc_in_x86_mm_first+128;
+  fpc_in_x86_minsd = fpc_in_x86_mm_first+129;
+  fpc_in_x86_minsd_from_mem = fpc_in_x86_mm_first+130;
+  fpc_in_x86_mulpd = fpc_in_x86_mm_first+131;
+  fpc_in_x86_mulpd_from_mem = fpc_in_x86_mm_first+132;
+  fpc_in_x86_mulsd = fpc_in_x86_mm_first+133;
+  fpc_in_x86_mulsd_from_mem = fpc_in_x86_mm_first+134;
+  fpc_in_x86_sqrtpd = fpc_in_x86_mm_first+135;
+  fpc_in_x86_sqrtpd_from_mem = fpc_in_x86_mm_first+136;
+  fpc_in_x86_sqrtsd = fpc_in_x86_mm_first+137;
+  fpc_in_x86_sqrtsd_from_mem = fpc_in_x86_mm_first+138;
+  fpc_in_x86_subpd = fpc_in_x86_mm_first+139;
+  fpc_in_x86_subpd_from_mem = fpc_in_x86_mm_first+140;
+  fpc_in_x86_subsd = fpc_in_x86_mm_first+141;
+  fpc_in_x86_subsd_from_mem = fpc_in_x86_mm_first+142;
+  fpc_in_x86_andpd = fpc_in_x86_mm_first+143;
+  fpc_in_x86_andpd_from_mem = fpc_in_x86_mm_first+144;
+  fpc_in_x86_andnpd = fpc_in_x86_mm_first+145;
+  fpc_in_x86_andnpd_from_mem = fpc_in_x86_mm_first+146;
+  fpc_in_x86_orpd = fpc_in_x86_mm_first+147;
+  fpc_in_x86_orpd_from_mem = fpc_in_x86_mm_first+148;
+  fpc_in_x86_xorpd = fpc_in_x86_mm_first+149;
+  fpc_in_x86_xorpd_from_mem = fpc_in_x86_mm_first+150;
+  fpc_in_x86_cmppd = fpc_in_x86_mm_first+151;
+  fpc_in_x86_cmppd_from_mem = fpc_in_x86_mm_first+152;
+  fpc_in_x86_cmpsd = fpc_in_x86_mm_first+153;
+  fpc_in_x86_cmpsd_from_mem = fpc_in_x86_mm_first+154;
+  fpc_in_x86_comisd = fpc_in_x86_mm_first+155;
+  fpc_in_x86_comisd_from_mem = fpc_in_x86_mm_first+156;
+  fpc_in_x86_ucomisd = fpc_in_x86_mm_first+157;
+  fpc_in_x86_ucomisd_from_mem = fpc_in_x86_mm_first+158;
+  fpc_in_x86_shufpd = fpc_in_x86_mm_first+159;
+  fpc_in_x86_shufpd_from_mem = fpc_in_x86_mm_first+160;
+  fpc_in_x86_unpckhpd = fpc_in_x86_mm_first+161;
+  fpc_in_x86_unpckhpd_from_mem = fpc_in_x86_mm_first+162;
+  fpc_in_x86_unpcklpd = fpc_in_x86_mm_first+163;
+  fpc_in_x86_unpcklpd_from_mem = fpc_in_x86_mm_first+164;
+  fpc_in_x86_cvtdq2pd = fpc_in_x86_mm_first+165;
+  fpc_in_x86_cvtdq2pd_from_mem = fpc_in_x86_mm_first+166;
+  fpc_in_x86_cvtdq2ps = fpc_in_x86_mm_first+167;
+  fpc_in_x86_cvtdq2ps_from_mem = fpc_in_x86_mm_first+168;
+  fpc_in_x86_cvtpd2dq = fpc_in_x86_mm_first+169;
+  fpc_in_x86_cvtpd2dq_from_mem = fpc_in_x86_mm_first+170;
+  fpc_in_x86_cvtpd2pi = fpc_in_x86_mm_first+171;
+  fpc_in_x86_cvtpd2pi_from_mem = fpc_in_x86_mm_first+172;
+  fpc_in_x86_cvtpd2ps = fpc_in_x86_mm_first+173;
+  fpc_in_x86_cvtpd2ps_from_mem = fpc_in_x86_mm_first+174;
+  fpc_in_x86_cvtpi2pd = fpc_in_x86_mm_first+175;
+  fpc_in_x86_cvtpi2pd_from_mem = fpc_in_x86_mm_first+176;
+  fpc_in_x86_cvtps2dq = fpc_in_x86_mm_first+177;
+  fpc_in_x86_cvtps2dq_from_mem = fpc_in_x86_mm_first+178;
+  fpc_in_x86_cvtps2pd = fpc_in_x86_mm_first+179;
+  fpc_in_x86_cvtps2pd_from_mem = fpc_in_x86_mm_first+180;
+  fpc_in_x86_cvtsd2si = fpc_in_x86_mm_first+181;
+  fpc_in_x86_cvtsd2si_from_mem = fpc_in_x86_mm_first+182;
+  fpc_in_x86_cvtsd2ss = fpc_in_x86_mm_first+183;
+  fpc_in_x86_cvtsd2ss_from_mem = fpc_in_x86_mm_first+184;
+  fpc_in_x86_cvtsi2sd = fpc_in_x86_mm_first+185;
+  fpc_in_x86_cvtsi2sd_from_mem = fpc_in_x86_mm_first+186;
+  fpc_in_x86_cvtss2sd = fpc_in_x86_mm_first+187;
+  fpc_in_x86_cvtss2sd_from_mem = fpc_in_x86_mm_first+188;
+  fpc_in_x86_cvttpd2dq = fpc_in_x86_mm_first+189;
+  fpc_in_x86_cvttpd2dq_from_mem = fpc_in_x86_mm_first+190;
+  fpc_in_x86_cvttpd2pi = fpc_in_x86_mm_first+191;
+  fpc_in_x86_cvttpd2pi_from_mem = fpc_in_x86_mm_first+192;
+  fpc_in_x86_cvttps2dq = fpc_in_x86_mm_first+193;
+  fpc_in_x86_cvttps2dq_from_mem = fpc_in_x86_mm_first+194;
+  fpc_in_x86_cvttsd2si = fpc_in_x86_mm_first+195;
+  fpc_in_x86_cvttsd2si_from_mem = fpc_in_x86_mm_first+196;
+  fpc_in_x86_movd_from_reg = fpc_in_x86_mm_first+197;
+  fpc_in_x86_movd_from_mem = fpc_in_x86_mm_first+198;
+  fpc_in_x86_movd_to_reg = fpc_in_x86_mm_first+199;
+  fpc_in_x86_movd_to_mem = fpc_in_x86_mm_first+200;
+  fpc_in_x86_movq_from_mem = fpc_in_x86_mm_first+201;
+  fpc_in_x86_movq_to_mem = fpc_in_x86_mm_first+202;
+  fpc_in_x86_pmovmskb = fpc_in_x86_mm_first+203;
+  fpc_in_x86_pextrw_sse2 = fpc_in_x86_mm_first+204;
+  fpc_in_x86_pinsrw_sse2 = fpc_in_x86_mm_first+205;
+  fpc_in_x86_pinsrw_from_mem = fpc_in_x86_mm_first+206;
+  fpc_in_x86_packssdw = fpc_in_x86_mm_first+207;
+  fpc_in_x86_packssdw_from_mem = fpc_in_x86_mm_first+208;
+  fpc_in_x86_packsswb = fpc_in_x86_mm_first+209;
+  fpc_in_x86_packsswb_from_mem = fpc_in_x86_mm_first+210;
+  fpc_in_x86_packuswb = fpc_in_x86_mm_first+211;
+  fpc_in_x86_packuswb_from_mem = fpc_in_x86_mm_first+212;
+  fpc_in_x86_paddb = fpc_in_x86_mm_first+213;
+  fpc_in_x86_paddb_from_mem = fpc_in_x86_mm_first+214;
+  fpc_in_x86_paddw = fpc_in_x86_mm_first+215;
+  fpc_in_x86_paddw_from_mem = fpc_in_x86_mm_first+216;
+  fpc_in_x86_paddd = fpc_in_x86_mm_first+217;
+  fpc_in_x86_paddd_from_mem = fpc_in_x86_mm_first+218;
+  fpc_in_x86_paddq = fpc_in_x86_mm_first+219;
+  fpc_in_x86_paddq_from_mem = fpc_in_x86_mm_first+220;
+  fpc_in_x86_paddsb = fpc_in_x86_mm_first+221;
+  fpc_in_x86_paddsb_from_mem = fpc_in_x86_mm_first+222;
+  fpc_in_x86_paddsw = fpc_in_x86_mm_first+223;
+  fpc_in_x86_paddsw_from_mem = fpc_in_x86_mm_first+224;
+  fpc_in_x86_paddusb = fpc_in_x86_mm_first+225;
+  fpc_in_x86_paddusb_from_mem = fpc_in_x86_mm_first+226;
+  fpc_in_x86_paddusw = fpc_in_x86_mm_first+227;
+  fpc_in_x86_paddusw_from_mem = fpc_in_x86_mm_first+228;
+  fpc_in_x86_pand = fpc_in_x86_mm_first+229;
+  fpc_in_x86_pand_from_mem = fpc_in_x86_mm_first+230;
+  fpc_in_x86_pandn = fpc_in_x86_mm_first+231;
+  fpc_in_x86_pandn_from_mem = fpc_in_x86_mm_first+232;
+  fpc_in_x86_por = fpc_in_x86_mm_first+233;
+  fpc_in_x86_por_from_mem = fpc_in_x86_mm_first+234;
+  fpc_in_x86_pxor = fpc_in_x86_mm_first+235;
+  fpc_in_x86_pxor_from_mem = fpc_in_x86_mm_first+236;
+  fpc_in_x86_pcmpeqb = fpc_in_x86_mm_first+237;
+  fpc_in_x86_pcmpeqb_from_mem = fpc_in_x86_mm_first+238;
+  fpc_in_x86_pcmpeqw = fpc_in_x86_mm_first+239;
+  fpc_in_x86_pcmpeqw_from_mem = fpc_in_x86_mm_first+240;
+  fpc_in_x86_pcmpeqd = fpc_in_x86_mm_first+241;
+  fpc_in_x86_pcmpeqd_from_mem = fpc_in_x86_mm_first+242;
+  fpc_in_x86_pcmpgtb = fpc_in_x86_mm_first+243;
+  fpc_in_x86_pcmpgtb_from_mem = fpc_in_x86_mm_first+244;
+  fpc_in_x86_pcmpgtw = fpc_in_x86_mm_first+245;
+  fpc_in_x86_pcmpgtw_from_mem = fpc_in_x86_mm_first+246;
+  fpc_in_x86_pcmpgtd = fpc_in_x86_mm_first+247;
+  fpc_in_x86_pcmpgtd_from_mem = fpc_in_x86_mm_first+248;
+  fpc_in_x86_pmullw = fpc_in_x86_mm_first+249;
+  fpc_in_x86_pmullw_from_mem = fpc_in_x86_mm_first+250;
+  fpc_in_x86_pmulhw = fpc_in_x86_mm_first+251;
+  fpc_in_x86_pmulhw_from_mem = fpc_in_x86_mm_first+252;
+  fpc_in_x86_pmulhuw_sse2 = fpc_in_x86_mm_first+253;
+  fpc_in_x86_pmulhuw_from_mem = fpc_in_x86_mm_first+254;
+  fpc_in_x86_pmuludq = fpc_in_x86_mm_first+255;
+  fpc_in_x86_pmuludq_from_mem = fpc_in_x86_mm_first+256;
+  fpc_in_x86_psllw_sse2 = fpc_in_x86_mm_first+257;
+  fpc_in_x86_psllw_from_mem = fpc_in_x86_mm_first+258;
+  fpc_in_x86_psllw_sse2_imm = fpc_in_x86_mm_first+259;
+  fpc_in_x86_pslld_sse2 = fpc_in_x86_mm_first+260;
+  fpc_in_x86_pslld_from_mem = fpc_in_x86_mm_first+261;
+  fpc_in_x86_pslld_sse2_imm = fpc_in_x86_mm_first+262;
+  fpc_in_x86_psllq_sse2 = fpc_in_x86_mm_first+263;
+  fpc_in_x86_psllq_from_mem = fpc_in_x86_mm_first+264;
+  fpc_in_x86_psllq_sse2_imm = fpc_in_x86_mm_first+265;
+  fpc_in_x86_psrad_sse2 = fpc_in_x86_mm_first+266;
+  fpc_in_x86_psrad_from_mem = fpc_in_x86_mm_first+267;
+  fpc_in_x86_psrad_sse2_imm = fpc_in_x86_mm_first+268;
+  fpc_in_x86_psraw_sse2 = fpc_in_x86_mm_first+269;
+  fpc_in_x86_psraw_from_mem = fpc_in_x86_mm_first+270;
+  fpc_in_x86_psraw_sse2_imm = fpc_in_x86_mm_first+271;
+  fpc_in_x86_psrlw_sse2 = fpc_in_x86_mm_first+272;
+  fpc_in_x86_psrlw_from_mem = fpc_in_x86_mm_first+273;
+  fpc_in_x86_psrlw_sse2_imm = fpc_in_x86_mm_first+274;
+  fpc_in_x86_psrld_sse2 = fpc_in_x86_mm_first+275;
+  fpc_in_x86_psrld_from_mem = fpc_in_x86_mm_first+276;
+  fpc_in_x86_psrld_sse2_imm = fpc_in_x86_mm_first+277;
+  fpc_in_x86_psrlq_sse2 = fpc_in_x86_mm_first+278;
+  fpc_in_x86_psrlq_from_mem = fpc_in_x86_mm_first+279;
+  fpc_in_x86_psrlq_sse2_imm = fpc_in_x86_mm_first+280;
+  fpc_in_x86_psubb = fpc_in_x86_mm_first+281;
+  fpc_in_x86_psubb_from_mem = fpc_in_x86_mm_first+282;
+  fpc_in_x86_psubw = fpc_in_x86_mm_first+283;
+  fpc_in_x86_psubw_from_mem = fpc_in_x86_mm_first+284;
+  fpc_in_x86_psubd = fpc_in_x86_mm_first+285;
+  fpc_in_x86_psubd_from_mem = fpc_in_x86_mm_first+286;
+  fpc_in_x86_psubq = fpc_in_x86_mm_first+287;
+  fpc_in_x86_psubq_from_mem = fpc_in_x86_mm_first+288;
+  fpc_in_x86_psubsb = fpc_in_x86_mm_first+289;
+  fpc_in_x86_psubsb_from_mem = fpc_in_x86_mm_first+290;
+  fpc_in_x86_psubsw = fpc_in_x86_mm_first+291;
+  fpc_in_x86_psubsw_from_mem = fpc_in_x86_mm_first+292;
+  fpc_in_x86_pmaddwd = fpc_in_x86_mm_first+293;
+  fpc_in_x86_pmaddwd_from_mem = fpc_in_x86_mm_first+294;
+  fpc_in_x86_psubusb = fpc_in_x86_mm_first+295;
+  fpc_in_x86_psubusb_from_mem = fpc_in_x86_mm_first+296;
+  fpc_in_x86_psubusw = fpc_in_x86_mm_first+297;
+  fpc_in_x86_psubusw_from_mem = fpc_in_x86_mm_first+298;
+  fpc_in_x86_punpckhbw = fpc_in_x86_mm_first+299;
+  fpc_in_x86_punpckhbw_from_mem = fpc_in_x86_mm_first+300;
+  fpc_in_x86_punpckhwd = fpc_in_x86_mm_first+301;
+  fpc_in_x86_punpckhwd_from_mem = fpc_in_x86_mm_first+302;
+  fpc_in_x86_punpckhdq = fpc_in_x86_mm_first+303;
+  fpc_in_x86_punpckhdq_from_mem = fpc_in_x86_mm_first+304;
+  fpc_in_x86_punpcklbw = fpc_in_x86_mm_first+305;
+  fpc_in_x86_punpcklbw_from_mem = fpc_in_x86_mm_first+306;
+  fpc_in_x86_punpcklwd = fpc_in_x86_mm_first+307;
+  fpc_in_x86_punpcklwd_from_mem = fpc_in_x86_mm_first+308;
+  fpc_in_x86_punpckldq = fpc_in_x86_mm_first+309;
+  fpc_in_x86_punpckldq_from_mem = fpc_in_x86_mm_first+310;
+  fpc_in_x86_pavgb_sse2 = fpc_in_x86_mm_first+311;
+  fpc_in_x86_pavgb_from_mem = fpc_in_x86_mm_first+312;
+  fpc_in_x86_pavgw_sse2 = fpc_in_x86_mm_first+313;
+  fpc_in_x86_pavgw_from_mem = fpc_in_x86_mm_first+314;
+  fpc_in_x86_pminub_sse2 = fpc_in_x86_mm_first+315;
+  fpc_in_x86_pminub_from_mem = fpc_in_x86_mm_first+316;
+  fpc_in_x86_pminsw_sse2 = fpc_in_x86_mm_first+317;
+  fpc_in_x86_pminsw_from_mem = fpc_in_x86_mm_first+318;
+  fpc_in_x86_pmaxsw_sse2 = fpc_in_x86_mm_first+319;
+  fpc_in_x86_pmaxsw_from_mem = fpc_in_x86_mm_first+320;
+  fpc_in_x86_pmaxub_sse2 = fpc_in_x86_mm_first+321;
+  fpc_in_x86_pmaxub_from_mem = fpc_in_x86_mm_first+322;
+  fpc_in_x86_psadbw_sse2 = fpc_in_x86_mm_first+323;
+  fpc_in_x86_psadbw_from_mem = fpc_in_x86_mm_first+324;
+  fpc_in_x86_maskmovdqu = fpc_in_x86_mm_first+325;
+  fpc_in_x86_movdq2q = fpc_in_x86_mm_first+326;
+  fpc_in_x86_movdqa_from_mem = fpc_in_x86_mm_first+327;
+  fpc_in_x86_movdqa = fpc_in_x86_mm_first+328;
+  fpc_in_x86_movdqu_from_mem = fpc_in_x86_mm_first+329;
+  fpc_in_x86_movdqu = fpc_in_x86_mm_first+330;
+  fpc_in_x86_movq2dq = fpc_in_x86_mm_first+331;
+  fpc_in_x86_movntdq = fpc_in_x86_mm_first+332;
+  fpc_in_x86_pshufhw = fpc_in_x86_mm_first+333;
+  fpc_in_x86_pshuflw = fpc_in_x86_mm_first+334;
+  fpc_in_x86_pshufd = fpc_in_x86_mm_first+335;
+  fpc_in_x86_pshufhw_from_mem = fpc_in_x86_mm_first+336;
+  fpc_in_x86_pshuflw_from_mem = fpc_in_x86_mm_first+337;
+  fpc_in_x86_pshufd_from_mem = fpc_in_x86_mm_first+338;
+  fpc_in_x86_pslldq = fpc_in_x86_mm_first+339;
+  fpc_in_x86_psrldq = fpc_in_x86_mm_first+340;
+  fpc_in_x86_punpckhqdq = fpc_in_x86_mm_first+341;
+  fpc_in_x86_punpckhqdq_from_mem = fpc_in_x86_mm_first+342;
+  fpc_in_x86_punpcklqdq = fpc_in_x86_mm_first+343;
+  fpc_in_x86_punpcklqdq_from_mem = fpc_in_x86_mm_first+344;
+  fpc_in_x86_addsubps = fpc_in_x86_mm_first+345;
+  fpc_in_x86_addsubps_from_mem = fpc_in_x86_mm_first+346;
+  fpc_in_x86_addsubpd = fpc_in_x86_mm_first+347;
+  fpc_in_x86_addsubpd_from_mem = fpc_in_x86_mm_first+348;
+  fpc_in_x86_movddup = fpc_in_x86_mm_first+349;
+  fpc_in_x86_movddup_from_mem = fpc_in_x86_mm_first+350;
+  fpc_in_x86_movsldup = fpc_in_x86_mm_first+351;
+  fpc_in_x86_movsldup_from_mem = fpc_in_x86_mm_first+352;
+  fpc_in_x86_movshdup = fpc_in_x86_mm_first+353;
+  fpc_in_x86_movshdup_from_mem = fpc_in_x86_mm_first+354;
+  fpc_in_x86_haddps = fpc_in_x86_mm_first+355;
+  fpc_in_x86_haddps_from_mem = fpc_in_x86_mm_first+356;
+  fpc_in_x86_haddpd = fpc_in_x86_mm_first+357;
+  fpc_in_x86_haddpd_from_mem = fpc_in_x86_mm_first+358;
+  fpc_in_x86_hsubps = fpc_in_x86_mm_first+359;
+  fpc_in_x86_hsubps_from_mem = fpc_in_x86_mm_first+360;
+  fpc_in_x86_hsubpd = fpc_in_x86_mm_first+361;
+  fpc_in_x86_hsubpd_from_mem = fpc_in_x86_mm_first+362;
+  fpc_in_x86_lddqu = fpc_in_x86_mm_first+363;
+  fpc_in_x86_psignb = fpc_in_x86_mm_first+364;
+  fpc_in_x86_psignb_from_mem = fpc_in_x86_mm_first+365;
+  fpc_in_x86_psignw = fpc_in_x86_mm_first+366;
+  fpc_in_x86_psignw_from_mem = fpc_in_x86_mm_first+367;
+  fpc_in_x86_psignd = fpc_in_x86_mm_first+368;
+  fpc_in_x86_psignd_from_mem = fpc_in_x86_mm_first+369;
+  fpc_in_x86_pshufb = fpc_in_x86_mm_first+370;
+  fpc_in_x86_pshufb_from_mem = fpc_in_x86_mm_first+371;
+  fpc_in_x86_pmulhrsw = fpc_in_x86_mm_first+372;
+  fpc_in_x86_pmulhrsw_from_mem = fpc_in_x86_mm_first+373;
+  fpc_in_x86_pmaddubsw = fpc_in_x86_mm_first+374;
+  fpc_in_x86_pmaddubsw_from_mem = fpc_in_x86_mm_first+375;
+  fpc_in_x86_phsubw = fpc_in_x86_mm_first+376;
+  fpc_in_x86_phsubw_from_mem = fpc_in_x86_mm_first+377;
+  fpc_in_x86_phsubsw = fpc_in_x86_mm_first+378;
+  fpc_in_x86_phsubsw_from_mem = fpc_in_x86_mm_first+379;
+  fpc_in_x86_phsubd = fpc_in_x86_mm_first+380;
+  fpc_in_x86_phsubd_from_mem = fpc_in_x86_mm_first+381;
+  fpc_in_x86_phaddsw = fpc_in_x86_mm_first+382;
+  fpc_in_x86_phaddsw_from_mem = fpc_in_x86_mm_first+383;
+  fpc_in_x86_phaddw = fpc_in_x86_mm_first+384;
+  fpc_in_x86_phaddw_from_mem = fpc_in_x86_mm_first+385;
+  fpc_in_x86_phaddd = fpc_in_x86_mm_first+386;
+  fpc_in_x86_phaddd_from_mem = fpc_in_x86_mm_first+387;
+  fpc_in_x86_palignr = fpc_in_x86_mm_first+388;
+  fpc_in_x86_palignr_from_mem = fpc_in_x86_mm_first+389;
+  fpc_in_x86_pabsb = fpc_in_x86_mm_first+390;
+  fpc_in_x86_pabsb_from_mem = fpc_in_x86_mm_first+391;
+  fpc_in_x86_pabsw = fpc_in_x86_mm_first+392;
+  fpc_in_x86_pabsw_from_mem = fpc_in_x86_mm_first+393;
+  fpc_in_x86_pabsd = fpc_in_x86_mm_first+394;
+  fpc_in_x86_pabsd_from_mem = fpc_in_x86_mm_first+395;
+  fpc_in_x86_dpps = fpc_in_x86_mm_first+396;
+  fpc_in_x86_dpps_from_mem = fpc_in_x86_mm_first+397;
+  fpc_in_x86_dppd = fpc_in_x86_mm_first+398;
+  fpc_in_x86_dppd_from_mem = fpc_in_x86_mm_first+399;
+  fpc_in_x86_blendps = fpc_in_x86_mm_first+400;
+  fpc_in_x86_blendps_from_mem = fpc_in_x86_mm_first+401;
+  fpc_in_x86_blendvps = fpc_in_x86_mm_first+402;
+  fpc_in_x86_blendvps_from_mem = fpc_in_x86_mm_first+403;
+  fpc_in_x86_blendpd = fpc_in_x86_mm_first+404;
+  fpc_in_x86_blendpd_from_mem = fpc_in_x86_mm_first+405;
+  fpc_in_x86_blendvpd = fpc_in_x86_mm_first+406;
+  fpc_in_x86_blendvpd_from_mem = fpc_in_x86_mm_first+407;
+  fpc_in_x86_roundps = fpc_in_x86_mm_first+408;
+  fpc_in_x86_roundps_from_mem = fpc_in_x86_mm_first+409;
+  fpc_in_x86_roundss = fpc_in_x86_mm_first+410;
+  fpc_in_x86_roundss_from_mem = fpc_in_x86_mm_first+411;
+  fpc_in_x86_roundpd = fpc_in_x86_mm_first+412;
+  fpc_in_x86_roundpd_from_mem = fpc_in_x86_mm_first+413;
+  fpc_in_x86_roundsd = fpc_in_x86_mm_first+414;
+  fpc_in_x86_roundsd_from_mem = fpc_in_x86_mm_first+415;
+  fpc_in_x86_insertps = fpc_in_x86_mm_first+416;
+  fpc_in_x86_insertps_from_mem = fpc_in_x86_mm_first+417;
+  fpc_in_x86_extractps = fpc_in_x86_mm_first+418;
+  fpc_in_x86_extractps_from_mem = fpc_in_x86_mm_first+419;
+  fpc_in_x86_mpsadbw = fpc_in_x86_mm_first+420;
+  fpc_in_x86_mpsadbw_from_mem = fpc_in_x86_mm_first+421;
+  fpc_in_x86_phminposuw = fpc_in_x86_mm_first+422;
+  fpc_in_x86_phminposuw_from_mem = fpc_in_x86_mm_first+423;
+  fpc_in_x86_pmulld = fpc_in_x86_mm_first+424;
+  fpc_in_x86_pmulld_from_mem = fpc_in_x86_mm_first+425;
+  fpc_in_x86_pmuldq = fpc_in_x86_mm_first+426;
+  fpc_in_x86_pmuldq_from_mem = fpc_in_x86_mm_first+427;
+  fpc_in_x86_pblendvb = fpc_in_x86_mm_first+428;
+  fpc_in_x86_pblendvb_from_mem = fpc_in_x86_mm_first+429;
+  fpc_in_x86_pblendw = fpc_in_x86_mm_first+430;
+  fpc_in_x86_pblendw_from_mem = fpc_in_x86_mm_first+431;
+  fpc_in_x86_pminsb = fpc_in_x86_mm_first+432;
+  fpc_in_x86_pminsb_from_mem = fpc_in_x86_mm_first+433;
+  fpc_in_x86_pminuw = fpc_in_x86_mm_first+434;
+  fpc_in_x86_pminuw_from_mem = fpc_in_x86_mm_first+435;
+  fpc_in_x86_pminsd = fpc_in_x86_mm_first+436;
+  fpc_in_x86_pminsd_from_mem = fpc_in_x86_mm_first+437;
+  fpc_in_x86_pminud = fpc_in_x86_mm_first+438;
+  fpc_in_x86_pminud_from_mem = fpc_in_x86_mm_first+439;
+  fpc_in_x86_pmaxsb = fpc_in_x86_mm_first+440;
+  fpc_in_x86_pmaxsb_from_mem = fpc_in_x86_mm_first+441;
+  fpc_in_x86_pmaxuw = fpc_in_x86_mm_first+442;
+  fpc_in_x86_pmaxuw_from_mem = fpc_in_x86_mm_first+443;
+  fpc_in_x86_pmaxsd = fpc_in_x86_mm_first+444;
+  fpc_in_x86_pmaxsd_from_mem = fpc_in_x86_mm_first+445;
+  fpc_in_x86_pmaxud = fpc_in_x86_mm_first+446;
+  fpc_in_x86_pmaxud_from_mem = fpc_in_x86_mm_first+447;
+  fpc_in_x86_pinsrb = fpc_in_x86_mm_first+448;
+  fpc_in_x86_pinsrb_from_mem = fpc_in_x86_mm_first+449;
+  fpc_in_x86_pinsrd = fpc_in_x86_mm_first+450;
+  fpc_in_x86_pinsrd_from_mem = fpc_in_x86_mm_first+451;
+  fpc_in_x86_pinsrq = fpc_in_x86_mm_first+452;
+  fpc_in_x86_pinsrq_from_mem = fpc_in_x86_mm_first+453;
+  fpc_in_x86_pextrb = fpc_in_x86_mm_first+454;
+  fpc_in_x86_pextrb_to_mem = fpc_in_x86_mm_first+455;
+  fpc_in_x86_pextrw_sse41_to_mem = fpc_in_x86_mm_first+456;
+  fpc_in_x86_pextrd = fpc_in_x86_mm_first+457;
+  fpc_in_x86_pextrd_to_mem = fpc_in_x86_mm_first+458;
+  fpc_in_x86_pextrq = fpc_in_x86_mm_first+459;
+  fpc_in_x86_pextrq_to_mem = fpc_in_x86_mm_first+460;
+  fpc_in_x86_pmovsxbw = fpc_in_x86_mm_first+461;
+  fpc_in_x86_pmovsxbw_from_mem = fpc_in_x86_mm_first+462;
+  fpc_in_x86_pmovzxbw = fpc_in_x86_mm_first+463;
+  fpc_in_x86_pmovzxbw_from_mem = fpc_in_x86_mm_first+464;
+  fpc_in_x86_pmovsxbd = fpc_in_x86_mm_first+465;
+  fpc_in_x86_pmovsxbd_from_mem = fpc_in_x86_mm_first+466;
+  fpc_in_x86_pmovzxbd = fpc_in_x86_mm_first+467;
+  fpc_in_x86_pmovzxbd_from_mem = fpc_in_x86_mm_first+468;
+  fpc_in_x86_pmovsxbq = fpc_in_x86_mm_first+469;
+  fpc_in_x86_pmovsxbq_from_mem = fpc_in_x86_mm_first+470;
+  fpc_in_x86_pmovzxbq = fpc_in_x86_mm_first+471;
+  fpc_in_x86_pmovzxbq_from_mem = fpc_in_x86_mm_first+472;
+  fpc_in_x86_pmovsxwd = fpc_in_x86_mm_first+473;
+  fpc_in_x86_pmovsxwd_from_mem = fpc_in_x86_mm_first+474;
+  fpc_in_x86_pmovzxwd = fpc_in_x86_mm_first+475;
+  fpc_in_x86_pmovzxwd_from_mem = fpc_in_x86_mm_first+476;
+  fpc_in_x86_pmovsxwq = fpc_in_x86_mm_first+477;
+  fpc_in_x86_pmovsxwq_from_mem = fpc_in_x86_mm_first+478;
+  fpc_in_x86_pmovzxwq = fpc_in_x86_mm_first+479;
+  fpc_in_x86_pmovzxwq_from_mem = fpc_in_x86_mm_first+480;
+  fpc_in_x86_pmovsxdq = fpc_in_x86_mm_first+481;
+  fpc_in_x86_pmovsxdq_from_mem = fpc_in_x86_mm_first+482;
+  fpc_in_x86_pmovzxdq = fpc_in_x86_mm_first+483;
+  fpc_in_x86_pmovzxdq_from_mem = fpc_in_x86_mm_first+484;
+  fpc_in_x86_ptest = fpc_in_x86_mm_first+485;
+  fpc_in_x86_ptest_from_mem = fpc_in_x86_mm_first+486;
+  fpc_in_x86_pcmpeqq = fpc_in_x86_mm_first+487;
+  fpc_in_x86_pcmpeqq_from_mem = fpc_in_x86_mm_first+488;
+  fpc_in_x86_packusdw = fpc_in_x86_mm_first+489;
+  fpc_in_x86_packusdw_from_mem = fpc_in_x86_mm_first+490;
+  fpc_in_x86_movntdqa = fpc_in_x86_mm_first+491;
+  fpc_in_x86_pcmpestri = fpc_in_x86_mm_first+492;
+  fpc_in_x86_pcmpestri_from_mem = fpc_in_x86_mm_first+493;
+  fpc_in_x86_pcmpestrm = fpc_in_x86_mm_first+494;
+  fpc_in_x86_pcmpestrm_from_mem = fpc_in_x86_mm_first+495;
+  fpc_in_x86_pcmpistri = fpc_in_x86_mm_first+496;
+  fpc_in_x86_pcmpistri_from_mem = fpc_in_x86_mm_first+497;
+  fpc_in_x86_pcmpistrm = fpc_in_x86_mm_first+498;
+  fpc_in_x86_pcmpistrm_from_mem = fpc_in_x86_mm_first+499;
+  fpc_in_x86_pcmpgtq = fpc_in_x86_mm_first+500;
+  fpc_in_x86_pcmpgtq_from_mem = fpc_in_x86_mm_first+501;

+ 459 - 16
rtl/i386/cpummprocs.inc

@@ -8,60 +8,503 @@ function x86_movss(r1: __m128): single; [INTERNPROC: fpc_in_x86_movss_to_val];
 function x86_movss(r1: single): __m128; [INTERNPROC: fpc_in_x86_movss_from_val];
 function x86_movss(r1: single): __m128; [INTERNPROC: fpc_in_x86_movss_from_val];
 function x86_movlps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movlps];
 function x86_movlps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movlps];
 function x86_movhps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movhps];
 function x86_movhps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movhps];
+procedure x86_movlps(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movlps_to_mem];
+procedure x86_movhps(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movhps_to_mem];
 function x86_movlhps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_movlhps];
 function x86_movlhps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_movlhps];
 function x86_movhlps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_movhlps];
 function x86_movhlps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_movhlps];
 function x86_addss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addss];
 function x86_addss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addss];
+function x86_addss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addss_from_mem];
 function x86_subss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subss];
 function x86_subss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subss];
+function x86_subss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_subss_from_mem];
 function x86_mulss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_mulss];
 function x86_mulss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_mulss];
+function x86_mulss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_mulss_from_mem];
 function x86_divss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_divss];
 function x86_divss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_divss];
+function x86_divss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_divss_from_mem];
 function x86_rcpss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rcpss];
 function x86_rcpss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rcpss];
+function x86_rcpss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_rcpss_from_mem];
 function x86_sqrtss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_sqrtss];
 function x86_sqrtss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_sqrtss];
+function x86_sqrtss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_sqrtss_from_mem];
 function x86_maxss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_maxss];
 function x86_maxss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_maxss];
+function x86_maxss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_maxss_from_mem];
 function x86_minss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_minss];
 function x86_minss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_minss];
+function x86_minss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_minss_from_mem];
 function x86_rsqrtss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rsqrtss];
 function x86_rsqrtss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rsqrtss];
+function x86_rsqrtss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_rsqrtss_from_mem];
 function x86_addps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addps];
 function x86_addps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addps];
+function x86_addps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addps_from_mem];
 function x86_subps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subps];
 function x86_subps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subps];
+function x86_subps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_subps_from_mem];
 function x86_mulps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_mulps];
 function x86_mulps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_mulps];
+function x86_mulps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_mulps_from_mem];
 function x86_divps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_divps];
 function x86_divps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_divps];
+function x86_divps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_divps_from_mem];
 function x86_rcpps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rcpps];
 function x86_rcpps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rcpps];
+function x86_rcpps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_rcpps_from_mem];
 function x86_sqrtps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_sqrtps];
 function x86_sqrtps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_sqrtps];
+function x86_sqrtps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_sqrtps_from_mem];
 function x86_maxps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_maxps];
 function x86_maxps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_maxps];
+function x86_maxps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_maxps_from_mem];
 function x86_minps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_minps];
 function x86_minps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_minps];
+function x86_minps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_minps_from_mem];
 function x86_rsqrtps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rsqrtps];
 function x86_rsqrtps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rsqrtps];
+function x86_rsqrtps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_rsqrtps_from_mem];
 function x86_andps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_andps];
 function x86_andps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_andps];
+function x86_andps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_andps_from_mem];
 function x86_orps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_orps];
 function x86_orps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_orps];
+function x86_orps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_orps_from_mem];
 function x86_xorps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_xorps];
 function x86_xorps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_xorps];
+function x86_xorps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_xorps_from_mem];
 function x86_andnps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_andnps];
 function x86_andnps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_andnps];
+function x86_andnps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_andnps_from_mem];
 function x86_cmpss(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpss];
 function x86_cmpss(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpss];
+function x86_cmpss(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpss_from_mem];
 function x86_cmpps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpps];
 function x86_cmpps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpps];
+function x86_cmpps(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpps_from_mem];
 function x86_shufps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_shufps];
 function x86_shufps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_shufps];
+function x86_shufps(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_shufps_from_mem];
 function x86_unpckhps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_unpckhps];
 function x86_unpckhps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_unpckhps];
+function x86_unpckhps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_unpckhps_from_mem];
 function x86_unpcklps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_unpcklps];
 function x86_unpcklps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_unpcklps];
-function x86_cvtsi2ss(r0: __m128; r1: longword): __m128; [INTERNPROC: fpc_in_x86_cvtsi2ss];
-function x86_cvtss2si(r1: __m128): longword; [INTERNPROC: fpc_in_x86_cvtss2si];
-function x86_cvttss2si(r1: __m128): longword; [INTERNPROC: fpc_in_x86_cvttss2si];
+function x86_unpcklps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_unpcklps_from_mem];
+function x86_cvtsi2ss(r0: __m128; r1: NativeUInt): __m128; [INTERNPROC: fpc_in_x86_cvtsi2ss];
+function x86_cvtsi2ss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtsi2ss_from_mem];
+function x86_cvtss2si(r1: __m128): NativeUInt; [INTERNPROC: fpc_in_x86_cvtss2si];
+function x86_cvtss2si(r1: pointer): NativeUInt; [INTERNPROC: fpc_in_x86_cvtss2si_from_mem];
+function x86_cvttss2si(r1: __m128): NativeUInt; [INTERNPROC: fpc_in_x86_cvttss2si];
+function x86_cvttss2si(r1: pointer): NativeUInt; [INTERNPROC: fpc_in_x86_cvttss2si_from_mem];
 function x86_cvtpi2ps(r0: __m128; r1: __m64): __m128; [INTERNPROC: fpc_in_x86_cvtpi2ps];
 function x86_cvtpi2ps(r0: __m128; r1: __m64): __m128; [INTERNPROC: fpc_in_x86_cvtpi2ps];
+function x86_cvtpi2ps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtpi2ps_from_mem];
 function x86_cvtps2pi(r1: __m128): __m64; [INTERNPROC: fpc_in_x86_cvtps2pi];
 function x86_cvtps2pi(r1: __m128): __m64; [INTERNPROC: fpc_in_x86_cvtps2pi];
+function x86_cvtps2pi(r1: pointer): __m64; [INTERNPROC: fpc_in_x86_cvtps2pi_from_mem];
 function x86_cvttps2pi(r1: __m128): __m64; [INTERNPROC: fpc_in_x86_cvttps2pi];
 function x86_cvttps2pi(r1: __m128): __m64; [INTERNPROC: fpc_in_x86_cvttps2pi];
+function x86_cvttps2pi(r1: pointer): __m64; [INTERNPROC: fpc_in_x86_cvttps2pi_from_mem];
 function x86_pmulhuw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pmulhuw_mmx];
 function x86_pmulhuw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pmulhuw_mmx];
+function x86_pmulhuw(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_pmulhuw_mmx_from_mem];
 function x86_psadbw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_psadbw_mmx];
 function x86_psadbw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_psadbw_mmx];
+function x86_psadbw(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_psadbw_mmx_from_mem];
 function x86_pavgb(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pavgb_mmx];
 function x86_pavgb(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pavgb_mmx];
+function x86_pavgb(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_pavgb_mmx_from_mem];
 function x86_pavgw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pavgw_mmx];
 function x86_pavgw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pavgw_mmx];
+function x86_pavgw(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_pavgw_mmx_from_mem];
 function x86_pmaxub(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pmaxub_mmx];
 function x86_pmaxub(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pmaxub_mmx];
+function x86_pmaxub(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_pmaxub_mmx_from_mem];
 function x86_pminub(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pminub_mmx];
 function x86_pminub(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pminub_mmx];
+function x86_pminub(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_pminub_mmx_from_mem];
 function x86_pmaxsw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pmaxsw_mmx];
 function x86_pmaxsw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pmaxsw_mmx];
+function x86_pmaxsw(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_pmaxsw_mmx_from_mem];
 function x86_pminsw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pminsw_mmx];
 function x86_pminsw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pminsw_mmx];
-function x86_pextrw(r1: __m64; imm: longint): longword; [INTERNPROC: fpc_in_x86_pextrw_mmx];
-function x86_pinsrw(r0: __m64; r1: longword; imm: longint): __m64; [INTERNPROC: fpc_in_x86_pinsrw_mmx];
-function x86_pmovmskb(r1: __m64): longword; [INTERNPROC: fpc_in_x86_pmovmskb];
+function x86_pminsw(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_pminsw_mmx_from_mem];
+function x86_pextrw(r1: __m64; imm: longint): NativeUInt; [INTERNPROC: fpc_in_x86_pextrw_mmx];
+function x86_pinsrw(r0: __m64; r1: NativeUInt; imm: longint): __m64; [INTERNPROC: fpc_in_x86_pinsrw_mmx];
+function x86_pmovmskb(r1: __m64): NativeUInt; [INTERNPROC: fpc_in_x86_pmovmskb_mmx];
 function x86_pshufw(r1: __m64; imm: longint): __m64; [INTERNPROC: fpc_in_x86_pshufw];
 function x86_pshufw(r1: __m64; imm: longint): __m64; [INTERNPROC: fpc_in_x86_pshufw];
-function x86_pmulhuw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmulhuw];
-function x86_psadbw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psadbw];
-function x86_pavgb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pavgb];
-function x86_pavgw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pavgw];
-function x86_pmaxub(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaxub];
-function x86_pminub(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pminub];
-function x86_pmaxsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaxsw];
-function x86_pminsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pminsw];
-function x86_pextrw(r1: __m128; imm: longint): longword; [INTERNPROC: fpc_in_x86_pextrw];
-function x86_pinsrw(r0: __m128; r1: longword; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrw];
+function x86_pshufw(r1: pointer; imm: longint): __m64; [INTERNPROC: fpc_in_x86_pshufw_from_mem];
+function x86_movapd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movapd];
+procedure x86_movapd(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movapd_to_mem];
+procedure x86_movntpd(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movntpd_to_mem];
+function x86_movhpd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movhpd];
+procedure x86_movhpd(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movhpd_to_mem];
+function x86_movlpd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movlpd];
+procedure x86_movlpd(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movlpd_to_mem];
+function x86_movupd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movupd];
+procedure x86_movupd(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movupd_to_mem];
+function x86_movmskpd(r1: __m128): longword; [INTERNPROC: fpc_in_x86_movmskpd];
+function x86_movsd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movsd_from_mem];
+procedure x86_movsd(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movsd_to_mem];
+function x86_movsd(r1: __m128): double; [INTERNPROC: fpc_in_x86_movsd_to_val];
+function x86_movsd(r1: double): __m128; [INTERNPROC: fpc_in_x86_movsd_from_val];
+function x86_addpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addpd];
+function x86_addpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addpd_from_mem];
+function x86_addsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addsd];
+function x86_addsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addsd_from_mem];
+function x86_divpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_divpd];
+function x86_divpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_divpd_from_mem];
+function x86_divsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_divsd];
+function x86_divsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_divsd_from_mem];
+function x86_maxpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_maxpd];
+function x86_maxpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_maxpd_from_mem];
+function x86_maxsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_maxsd];
+function x86_maxsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_maxsd_from_mem];
+function x86_minpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_minpd];
+function x86_minpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_minpd_from_mem];
+function x86_minsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_minsd];
+function x86_minsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_minsd_from_mem];
+function x86_mulpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_mulpd];
+function x86_mulpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_mulpd_from_mem];
+function x86_mulsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_mulsd];
+function x86_mulsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_mulsd_from_mem];
+function x86_sqrtpd(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_sqrtpd];
+function x86_sqrtpd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_sqrtpd_from_mem];
+function x86_sqrtsd(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_sqrtsd];
+function x86_sqrtsd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_sqrtsd_from_mem];
+function x86_subpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subpd];
+function x86_subpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_subpd_from_mem];
+function x86_subsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subsd];
+function x86_subsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_subsd_from_mem];
+function x86_andpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_andpd];
+function x86_andpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_andpd_from_mem];
+function x86_andnpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_andnpd];
+function x86_andnpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_andnpd_from_mem];
+function x86_orpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_orpd];
+function x86_orpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_orpd_from_mem];
+function x86_xorpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_xorpd];
+function x86_xorpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_xorpd_from_mem];
+function x86_cmppd(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmppd];
+function x86_cmppd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmppd_from_mem];
+function x86_cmpsd(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpsd];
+function x86_cmpsd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpsd_from_mem];
+function x86_comisd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_comisd];
+function x86_comisd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_comisd_from_mem];
+function x86_ucomisd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_ucomisd];
+function x86_ucomisd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_ucomisd_from_mem];
+function x86_shufpd(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_shufpd];
+function x86_shufpd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_shufpd_from_mem];
+function x86_unpckhpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_unpckhpd];
+function x86_unpckhpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_unpckhpd_from_mem];
+function x86_unpcklpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_unpcklpd];
+function x86_unpcklpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_unpcklpd_from_mem];
+function x86_cvtdq2pd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtdq2pd];
+function x86_cvtdq2pd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtdq2pd_from_mem];
+function x86_cvtdq2ps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtdq2ps];
+function x86_cvtdq2ps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtdq2ps_from_mem];
+function x86_cvtpd2dq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtpd2dq];
+function x86_cvtpd2dq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtpd2dq_from_mem];
+function x86_cvtpd2pi(r0: __m64; r1: __m128): __m64; [INTERNPROC: fpc_in_x86_cvtpd2pi];
+function x86_cvtpd2pi(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_cvtpd2pi_from_mem];
+function x86_cvtpd2ps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtpd2ps];
+function x86_cvtpd2ps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtpd2ps_from_mem];
+function x86_cvtpi2pd(r0: __m128; r1: __m64): __m128; [INTERNPROC: fpc_in_x86_cvtpi2pd];
+function x86_cvtpi2pd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtpi2pd_from_mem];
+function x86_cvtps2dq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtps2dq];
+function x86_cvtps2dq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtps2dq_from_mem];
+function x86_cvtps2pd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtps2pd];
+function x86_cvtps2pd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtps2pd_from_mem];
+function x86_cvtsd2si(r0: NativeInt; r1: __m128): NativeInt; [INTERNPROC: fpc_in_x86_cvtsd2si];
+function x86_cvtsd2si(r0: NativeInt; r1: pointer): NativeInt; [INTERNPROC: fpc_in_x86_cvtsd2si_from_mem];
+function x86_cvtsd2ss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtsd2ss];
+function x86_cvtsd2ss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtsd2ss_from_mem];
+function x86_cvtsi2sd(r0: __m128; r1: longword): __m128; [INTERNPROC: fpc_in_x86_cvtsi2sd];
+function x86_cvtsi2sd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtsi2sd_from_mem];
+function x86_cvtss2sd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtss2sd];
+function x86_cvtss2sd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtss2sd_from_mem];
+function x86_cvttpd2dq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvttpd2dq];
+function x86_cvttpd2dq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvttpd2dq_from_mem];
+function x86_cvttpd2pi(r0: __m64; r1: __m128): __m64; [INTERNPROC: fpc_in_x86_cvttpd2pi];
+function x86_cvttpd2pi(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_cvttpd2pi_from_mem];
+function x86_cvttps2dq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvttps2dq];
+function x86_cvttps2dq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvttps2dq_from_mem];
+function x86_cvttsd2si(r0: NativeInt; r1: __m128): NativeInt; [INTERNPROC: fpc_in_x86_cvttsd2si];
+function x86_cvttsd2si(r0: NativeInt; r1: pointer): NativeInt; [INTERNPROC: fpc_in_x86_cvttsd2si_from_mem];
+function x86_movd(r1: longword): __m128; [INTERNPROC: fpc_in_x86_movd_from_reg];
+function x86_movd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movd_from_mem];
+function x86_movd(r1: __m128): longword; [INTERNPROC: fpc_in_x86_movd_to_reg];
+procedure x86_movd(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movd_to_mem];
+function x86_movq(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movq_from_mem];
+procedure x86_movq(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movq_to_mem];
+function x86_pmovmskb(r0: longword; r1: __m128): longword; [INTERNPROC: fpc_in_x86_pmovmskb];
+function x86_pextrw(r1: __m128; imm: longint): word; [INTERNPROC: fpc_in_x86_pextrw_sse2];
+function x86_pinsrw(r0: __m128; r1: longword; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrw_sse2];
+function x86_pinsrw(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrw_from_mem];
+function x86_packssdw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_packssdw];
+function x86_packssdw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_packssdw_from_mem];
+function x86_packsswb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_packsswb];
+function x86_packsswb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_packsswb_from_mem];
+function x86_packuswb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_packuswb];
+function x86_packuswb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_packuswb_from_mem];
+function x86_paddb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_paddb];
+function x86_paddb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_paddb_from_mem];
+function x86_paddw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_paddw];
+function x86_paddw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_paddw_from_mem];
+function x86_paddd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_paddd];
+function x86_paddd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_paddd_from_mem];
+function x86_paddq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_paddq];
+function x86_paddq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_paddq_from_mem];
+function x86_paddsb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_paddsb];
+function x86_paddsb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_paddsb_from_mem];
+function x86_paddsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_paddsw];
+function x86_paddsw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_paddsw_from_mem];
+function x86_paddusb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_paddusb];
+function x86_paddusb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_paddusb_from_mem];
+function x86_paddusw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_paddusw];
+function x86_paddusw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_paddusw_from_mem];
+function x86_pand(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pand];
+function x86_pand(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pand_from_mem];
+function x86_pandn(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pandn];
+function x86_pandn(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pandn_from_mem];
+function x86_por(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_por];
+function x86_por(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_por_from_mem];
+function x86_pxor(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pxor];
+function x86_pxor(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pxor_from_mem];
+function x86_pcmpeqb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pcmpeqb];
+function x86_pcmpeqb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pcmpeqb_from_mem];
+function x86_pcmpeqw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pcmpeqw];
+function x86_pcmpeqw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pcmpeqw_from_mem];
+function x86_pcmpeqd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pcmpeqd];
+function x86_pcmpeqd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pcmpeqd_from_mem];
+function x86_pcmpgtb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pcmpgtb];
+function x86_pcmpgtb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pcmpgtb_from_mem];
+function x86_pcmpgtw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pcmpgtw];
+function x86_pcmpgtw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pcmpgtw_from_mem];
+function x86_pcmpgtd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pcmpgtd];
+function x86_pcmpgtd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pcmpgtd_from_mem];
+function x86_pmullw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmullw];
+function x86_pmullw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmullw_from_mem];
+function x86_pmulhw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmulhw];
+function x86_pmulhw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmulhw_from_mem];
+function x86_pmulhuw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmulhuw_sse2];
+function x86_pmulhuw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmulhuw_from_mem];
+function x86_pmuludq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmuludq];
+function x86_pmuludq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmuludq_from_mem];
+function x86_psllw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psllw_sse2];
+function x86_psllw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psllw_from_mem];
+function x86_psllw(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_psllw_sse2_imm];
+function x86_pslld(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pslld_sse2];
+function x86_pslld(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pslld_from_mem];
+function x86_pslld(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pslld_sse2_imm];
+function x86_psllq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psllq_sse2];
+function x86_psllq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psllq_from_mem];
+function x86_psllq(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_psllq_sse2_imm];
+function x86_psrad(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psrad_sse2];
+function x86_psrad(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psrad_from_mem];
+function x86_psrad(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_psrad_sse2_imm];
+function x86_psraw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psraw_sse2];
+function x86_psraw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psraw_from_mem];
+function x86_psraw(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_psraw_sse2_imm];
+function x86_psrlw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psrlw_sse2];
+function x86_psrlw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psrlw_from_mem];
+function x86_psrlw(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_psrlw_sse2_imm];
+function x86_psrld(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psrld_sse2];
+function x86_psrld(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psrld_from_mem];
+function x86_psrld(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_psrld_sse2_imm];
+function x86_psrlq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psrlq_sse2];
+function x86_psrlq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psrlq_from_mem];
+function x86_psrlq(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_psrlq_sse2_imm];
+function x86_psubb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psubb];
+function x86_psubb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psubb_from_mem];
+function x86_psubw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psubw];
+function x86_psubw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psubw_from_mem];
+function x86_psubd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psubd];
+function x86_psubd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psubd_from_mem];
+function x86_psubq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psubq];
+function x86_psubq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psubq_from_mem];
+function x86_psubsb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psubsb];
+function x86_psubsb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psubsb_from_mem];
+function x86_psubsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psubsw];
+function x86_psubsw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psubsw_from_mem];
+function x86_pmaddwd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaddwd];
+function x86_pmaddwd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmaddwd_from_mem];
+function x86_psubusb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psubusb];
+function x86_psubusb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psubusb_from_mem];
+function x86_psubusw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psubusw];
+function x86_psubusw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psubusw_from_mem];
+function x86_punpckhbw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_punpckhbw];
+function x86_punpckhbw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_punpckhbw_from_mem];
+function x86_punpckhwd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_punpckhwd];
+function x86_punpckhwd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_punpckhwd_from_mem];
+function x86_punpckhdq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_punpckhdq];
+function x86_punpckhdq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_punpckhdq_from_mem];
+function x86_punpcklbw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_punpcklbw];
+function x86_punpcklbw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_punpcklbw_from_mem];
+function x86_punpcklwd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_punpcklwd];
+function x86_punpcklwd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_punpcklwd_from_mem];
+function x86_punpckldq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_punpckldq];
+function x86_punpckldq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_punpckldq_from_mem];
+function x86_pavgb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pavgb_sse2];
+function x86_pavgb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pavgb_from_mem];
+function x86_pavgw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pavgw_sse2];
+function x86_pavgw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pavgw_from_mem];
+function x86_pminub(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pminub_sse2];
+function x86_pminub(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pminub_from_mem];
+function x86_pminsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pminsw_sse2];
+function x86_pminsw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pminsw_from_mem];
+function x86_pmaxsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaxsw_sse2];
+function x86_pmaxsw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmaxsw_from_mem];
+function x86_pmaxub(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaxub_sse2];
+function x86_pmaxub(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmaxub_from_mem];
+function x86_psadbw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psadbw_sse2];
+function x86_psadbw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psadbw_from_mem];
+procedure x86_maskmovdqu(addr: pointer; r0, r1: __m128); [INTERNPROC: fpc_in_x86_maskmovdqu];
+function x86_movdq2q(r1: __m128): __m64; [INTERNPROC: fpc_in_x86_movdq2q];
+function x86_movdqa(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movdqa_from_mem];
+procedure x86_movdqa(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movdqa];
+function x86_movdqu(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movdqu_from_mem];
+procedure x86_movdqu(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movdqu];
+function x86_movq2dq(r1: __m64): __m128; [INTERNPROC: fpc_in_x86_movq2dq];
+procedure x86_movntdq(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movntdq];
+function x86_pshufhw(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pshufhw];
+function x86_pshuflw(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pshuflw];
+function x86_pshufd(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pshufd];
+function x86_pshufhw(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pshufhw_from_mem];
+function x86_pshuflw(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pshuflw_from_mem];
+function x86_pshufd(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pshufd_from_mem];
+function x86_pslldq(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pslldq];
+function x86_psrldq(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_psrldq];
+function x86_punpckhqdq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_punpckhqdq];
+function x86_punpckhqdq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_punpckhqdq_from_mem];
+function x86_punpcklqdq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_punpcklqdq];
+function x86_punpcklqdq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_punpcklqdq_from_mem];
+function x86_addsubps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addsubps];
+function x86_addsubps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addsubps_from_mem];
+function x86_addsubpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addsubpd];
+function x86_addsubpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addsubpd_from_mem];
+function x86_movddup(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_movddup];
+function x86_movddup(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movddup_from_mem];
+function x86_movsldup(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_movsldup];
+function x86_movsldup(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movsldup_from_mem];
+function x86_movshdup(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_movshdup];
+function x86_movshdup(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movshdup_from_mem];
+function x86_haddps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_haddps];
+function x86_haddps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_haddps_from_mem];
+function x86_haddpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_haddpd];
+function x86_haddpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_haddpd_from_mem];
+function x86_hsubps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_hsubps];
+function x86_hsubps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_hsubps_from_mem];
+function x86_hsubpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_hsubpd];
+function x86_hsubpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_hsubpd_from_mem];
+function x86_lddqu(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_lddqu];
+function x86_psignb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psignb];
+function x86_psignb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psignb_from_mem];
+function x86_psignw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psignw];
+function x86_psignw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psignw_from_mem];
+function x86_psignd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psignd];
+function x86_psignd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psignd_from_mem];
+function x86_pshufb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pshufb];
+function x86_pshufb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pshufb_from_mem];
+function x86_pmulhrsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmulhrsw];
+function x86_pmulhrsw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmulhrsw_from_mem];
+function x86_pmaddubsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaddubsw];
+function x86_pmaddubsw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmaddubsw_from_mem];
+function x86_phsubw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_phsubw];
+function x86_phsubw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_phsubw_from_mem];
+function x86_phsubsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_phsubsw];
+function x86_phsubsw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_phsubsw_from_mem];
+function x86_phsubd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_phsubd];
+function x86_phsubd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_phsubd_from_mem];
+function x86_phaddsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_phaddsw];
+function x86_phaddsw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_phaddsw_from_mem];
+function x86_phaddw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_phaddw];
+function x86_phaddw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_phaddw_from_mem];
+function x86_phaddd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_phaddd];
+function x86_phaddd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_phaddd_from_mem];
+function x86_palignr(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_palignr];
+function x86_palignr(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_palignr_from_mem];
+function x86_pabsb(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pabsb];
+function x86_pabsb(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pabsb_from_mem];
+function x86_pabsw(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pabsw];
+function x86_pabsw(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pabsw_from_mem];
+function x86_pabsd(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pabsd];
+function x86_pabsd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pabsd_from_mem];
+function x86_dpps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_dpps];
+function x86_dpps(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_dpps_from_mem];
+function x86_dppd(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_dppd];
+function x86_dppd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_dppd_from_mem];
+function x86_blendps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_blendps];
+function x86_blendps(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_blendps_from_mem];
+function x86_blendvps(r0, r1: __m128; mask: __m128): __m128; [INTERNPROC: fpc_in_x86_blendvps];
+function x86_blendvps(r0: __m128; r1: pointer; mask: __m128): __m128; [INTERNPROC: fpc_in_x86_blendvps_from_mem];
+function x86_blendpd(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_blendpd];
+function x86_blendpd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_blendpd_from_mem];
+function x86_blendvpd(r0, r1: __m128; mask: __m128): __m128; [INTERNPROC: fpc_in_x86_blendvpd];
+function x86_blendvpd(r0: __m128; r1: pointer; mask: __m128): __m128; [INTERNPROC: fpc_in_x86_blendvpd_from_mem];
+function x86_roundps(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundps];
+function x86_roundps(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundps_from_mem];
+function x86_roundss(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundss];
+function x86_roundss(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundss_from_mem];
+function x86_roundpd(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundpd];
+function x86_roundpd(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundpd_from_mem];
+function x86_roundsd(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundsd];
+function x86_roundsd(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundsd_from_mem];
+function x86_insertps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_insertps];
+function x86_insertps(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_insertps_from_mem];
+function x86_extractps(r1: __m128; imm: longint): longword; [INTERNPROC: fpc_in_x86_extractps];
+procedure x86_extractps(r0: pointer; r1: __m128; imm: longint); [INTERNPROC: fpc_in_x86_extractps_from_mem];
+function x86_mpsadbw(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_mpsadbw];
+function x86_mpsadbw(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_mpsadbw_from_mem];
+function x86_phminposuw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_phminposuw];
+function x86_phminposuw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_phminposuw_from_mem];
+function x86_pmulld(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmulld];
+function x86_pmulld(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmulld_from_mem];
+function x86_pmuldq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmuldq];
+function x86_pmuldq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmuldq_from_mem];
+function x86_pblendvb(r0, r1: __m128; mask: __m128): __m128; [INTERNPROC: fpc_in_x86_pblendvb];
+function x86_pblendvb(r0: __m128; r1: pointer; mask: __m128): __m128; [INTERNPROC: fpc_in_x86_pblendvb_from_mem];
+function x86_pblendw(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pblendw];
+function x86_pblendw(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pblendw_from_mem];
+function x86_pminsb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pminsb];
+function x86_pminsb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pminsb_from_mem];
+function x86_pminuw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pminuw];
+function x86_pminuw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pminuw_from_mem];
+function x86_pminsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pminsd];
+function x86_pminsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pminsd_from_mem];
+function x86_pminud(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pminud];
+function x86_pminud(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pminud_from_mem];
+function x86_pmaxsb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaxsb];
+function x86_pmaxsb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmaxsb_from_mem];
+function x86_pmaxuw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaxuw];
+function x86_pmaxuw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmaxuw_from_mem];
+function x86_pmaxsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaxsd];
+function x86_pmaxsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmaxsd_from_mem];
+function x86_pmaxud(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaxud];
+function x86_pmaxud(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmaxud_from_mem];
+function x86_pinsrb(r0: __m128; r1: longword; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrb];
+function x86_pinsrb(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrb_from_mem];
+function x86_pinsrd(r0: __m128; r1: longword; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrd];
+function x86_pinsrd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrd_from_mem];
+{$ifdef X86_64}
+function x86_pinsrq(r0: __m128; r1: NativeUInt; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrq];
+{$endif}
+{$ifdef X86_64}
+function x86_pinsrq(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrq_from_mem];
+{$endif}
+function x86_pextrb(r1: __m128; imm: longint): byte; [INTERNPROC: fpc_in_x86_pextrb];
+procedure x86_pextrb(r0: pointer; r1: __m128; imm: longint); [INTERNPROC: fpc_in_x86_pextrb_to_mem];
+procedure x86_pextrw(r0: pointer; r1: __m128; imm: longint); [INTERNPROC: fpc_in_x86_pextrw_sse41_to_mem];
+function x86_pextrd(r1: __m128; imm: longint): longword; [INTERNPROC: fpc_in_x86_pextrd];
+procedure x86_pextrd(r0: pointer; r1: __m128; imm: longint); [INTERNPROC: fpc_in_x86_pextrd_to_mem];
+{$ifdef X86_64}
+function x86_pextrq(r1: __m128; imm: longint): longword; [INTERNPROC: fpc_in_x86_pextrq];
+{$endif}
+{$ifdef X86_64}
+procedure x86_pextrq(r0: pointer; r1: __m128; imm: longint); [INTERNPROC: fpc_in_x86_pextrq_to_mem];
+{$endif}
+function x86_pmovsxbw(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovsxbw];
+function x86_pmovsxbw(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovsxbw_from_mem];
+function x86_pmovzxbw(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovzxbw];
+function x86_pmovzxbw(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovzxbw_from_mem];
+function x86_pmovsxbd(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovsxbd];
+function x86_pmovsxbd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovsxbd_from_mem];
+function x86_pmovzxbd(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovzxbd];
+function x86_pmovzxbd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovzxbd_from_mem];
+function x86_pmovsxbq(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovsxbq];
+function x86_pmovsxbq(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovsxbq_from_mem];
+function x86_pmovzxbq(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovzxbq];
+function x86_pmovzxbq(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovzxbq_from_mem];
+function x86_pmovsxwd(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovsxwd];
+function x86_pmovsxwd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovsxwd_from_mem];
+function x86_pmovzxwd(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovzxwd];
+function x86_pmovzxwd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovzxwd_from_mem];
+function x86_pmovsxwq(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovsxwq];
+function x86_pmovsxwq(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovsxwq_from_mem];
+function x86_pmovzxwq(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovzxwq];
+function x86_pmovzxwq(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovzxwq_from_mem];
+function x86_pmovsxdq(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovsxdq];
+function x86_pmovsxdq(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovsxdq_from_mem];
+function x86_pmovzxdq(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovzxdq];
+function x86_pmovzxdq(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovzxdq_from_mem];
+function x86_ptest(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_ptest];
+function x86_ptest(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_ptest_from_mem];
+function x86_pcmpeqq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pcmpeqq];
+function x86_pcmpeqq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pcmpeqq_from_mem];
+function x86_packusdw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_packusdw];
+function x86_packusdw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_packusdw_from_mem];
+function x86_movntdqa(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movntdqa];
+function x86_pcmpestri(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pcmpestri];
+function x86_pcmpestri(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pcmpestri_from_mem];
+function x86_pcmpestrm(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pcmpestrm];
+function x86_pcmpestrm(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pcmpestrm_from_mem];
+function x86_pcmpistri(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pcmpistri];
+function x86_pcmpistri(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pcmpistri_from_mem];
+function x86_pcmpistrm(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pcmpistrm];
+function x86_pcmpistrm(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pcmpistrm_from_mem];
+function x86_pcmpgtq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pcmpgtq];
+function x86_pcmpgtq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pcmpgtq_from_mem];

+ 2 - 0
rtl/x86_64/cpuinnr.inc

@@ -28,4 +28,6 @@
   fpc_in_x86_get_gs   = fpc_in_cpu_first+13;
   fpc_in_x86_get_gs   = fpc_in_cpu_first+13;
   
   
    { include automatically generated numbers }
    { include automatically generated numbers }
+   {$ifdef VER3_3_1}
    {$i cpumminnr.inc}
    {$i cpumminnr.inc}
+   {$endif}

+ 493 - 57
rtl/x86_64/cpumminnr.inc

@@ -8,60 +8,496 @@
   fpc_in_x86_movss_from_val = fpc_in_x86_mm_first+7;
   fpc_in_x86_movss_from_val = fpc_in_x86_mm_first+7;
   fpc_in_x86_movlps = fpc_in_x86_mm_first+8;
   fpc_in_x86_movlps = fpc_in_x86_mm_first+8;
   fpc_in_x86_movhps = fpc_in_x86_mm_first+9;
   fpc_in_x86_movhps = fpc_in_x86_mm_first+9;
-  fpc_in_x86_movlhps = fpc_in_x86_mm_first+10;
-  fpc_in_x86_movhlps = fpc_in_x86_mm_first+11;
-  fpc_in_x86_addss = fpc_in_x86_mm_first+12;
-  fpc_in_x86_subss = fpc_in_x86_mm_first+13;
-  fpc_in_x86_mulss = fpc_in_x86_mm_first+14;
-  fpc_in_x86_divss = fpc_in_x86_mm_first+15;
-  fpc_in_x86_rcpss = fpc_in_x86_mm_first+16;
-  fpc_in_x86_sqrtss = fpc_in_x86_mm_first+17;
-  fpc_in_x86_maxss = fpc_in_x86_mm_first+18;
-  fpc_in_x86_minss = fpc_in_x86_mm_first+19;
-  fpc_in_x86_rsqrtss = fpc_in_x86_mm_first+20;
-  fpc_in_x86_addps = fpc_in_x86_mm_first+21;
-  fpc_in_x86_subps = fpc_in_x86_mm_first+22;
-  fpc_in_x86_mulps = fpc_in_x86_mm_first+23;
-  fpc_in_x86_divps = fpc_in_x86_mm_first+24;
-  fpc_in_x86_rcpps = fpc_in_x86_mm_first+25;
-  fpc_in_x86_sqrtps = fpc_in_x86_mm_first+26;
-  fpc_in_x86_maxps = fpc_in_x86_mm_first+27;
-  fpc_in_x86_minps = fpc_in_x86_mm_first+28;
-  fpc_in_x86_rsqrtps = fpc_in_x86_mm_first+29;
-  fpc_in_x86_andps = fpc_in_x86_mm_first+30;
-  fpc_in_x86_orps = fpc_in_x86_mm_first+31;
-  fpc_in_x86_xorps = fpc_in_x86_mm_first+32;
-  fpc_in_x86_andnps = fpc_in_x86_mm_first+33;
-  fpc_in_x86_cmpss = fpc_in_x86_mm_first+34;
-  fpc_in_x86_cmpps = fpc_in_x86_mm_first+35;
-  fpc_in_x86_shufps = fpc_in_x86_mm_first+36;
-  fpc_in_x86_unpckhps = fpc_in_x86_mm_first+37;
-  fpc_in_x86_unpcklps = fpc_in_x86_mm_first+38;
-  fpc_in_x86_cvtsi2ss = fpc_in_x86_mm_first+39;
-  fpc_in_x86_cvtss2si = fpc_in_x86_mm_first+40;
-  fpc_in_x86_cvttss2si = fpc_in_x86_mm_first+41;
-  fpc_in_x86_cvtpi2ps = fpc_in_x86_mm_first+42;
-  fpc_in_x86_cvtps2pi = fpc_in_x86_mm_first+43;
-  fpc_in_x86_cvttps2pi = fpc_in_x86_mm_first+44;
-  fpc_in_x86_pmulhuw_mmx = fpc_in_x86_mm_first+45;
-  fpc_in_x86_psadbw_mmx = fpc_in_x86_mm_first+46;
-  fpc_in_x86_pavgb_mmx = fpc_in_x86_mm_first+47;
-  fpc_in_x86_pavgw_mmx = fpc_in_x86_mm_first+48;
-  fpc_in_x86_pmaxub_mmx = fpc_in_x86_mm_first+49;
-  fpc_in_x86_pminub_mmx = fpc_in_x86_mm_first+50;
-  fpc_in_x86_pmaxsw_mmx = fpc_in_x86_mm_first+51;
-  fpc_in_x86_pminsw_mmx = fpc_in_x86_mm_first+52;
-  fpc_in_x86_pextrw_mmx = fpc_in_x86_mm_first+53;
-  fpc_in_x86_pinsrw_mmx = fpc_in_x86_mm_first+54;
-  fpc_in_x86_pmovmskb = fpc_in_x86_mm_first+55;
-  fpc_in_x86_pshufw = fpc_in_x86_mm_first+56;
-  fpc_in_x86_pmulhuw = fpc_in_x86_mm_first+57;
-  fpc_in_x86_psadbw = fpc_in_x86_mm_first+58;
-  fpc_in_x86_pavgb = fpc_in_x86_mm_first+59;
-  fpc_in_x86_pavgw = fpc_in_x86_mm_first+60;
-  fpc_in_x86_pmaxub = fpc_in_x86_mm_first+61;
-  fpc_in_x86_pminub = fpc_in_x86_mm_first+62;
-  fpc_in_x86_pmaxsw = fpc_in_x86_mm_first+63;
-  fpc_in_x86_pminsw = fpc_in_x86_mm_first+64;
-  fpc_in_x86_pextrw = fpc_in_x86_mm_first+65;
-  fpc_in_x86_pinsrw = fpc_in_x86_mm_first+66;
+  fpc_in_x86_movlps_to_mem = fpc_in_x86_mm_first+10;
+  fpc_in_x86_movhps_to_mem = fpc_in_x86_mm_first+11;
+  fpc_in_x86_movlhps = fpc_in_x86_mm_first+12;
+  fpc_in_x86_movhlps = fpc_in_x86_mm_first+13;
+  fpc_in_x86_addss = fpc_in_x86_mm_first+14;
+  fpc_in_x86_addss_from_mem = fpc_in_x86_mm_first+15;
+  fpc_in_x86_subss = fpc_in_x86_mm_first+16;
+  fpc_in_x86_subss_from_mem = fpc_in_x86_mm_first+17;
+  fpc_in_x86_mulss = fpc_in_x86_mm_first+18;
+  fpc_in_x86_mulss_from_mem = fpc_in_x86_mm_first+19;
+  fpc_in_x86_divss = fpc_in_x86_mm_first+20;
+  fpc_in_x86_divss_from_mem = fpc_in_x86_mm_first+21;
+  fpc_in_x86_rcpss = fpc_in_x86_mm_first+22;
+  fpc_in_x86_rcpss_from_mem = fpc_in_x86_mm_first+23;
+  fpc_in_x86_sqrtss = fpc_in_x86_mm_first+24;
+  fpc_in_x86_sqrtss_from_mem = fpc_in_x86_mm_first+25;
+  fpc_in_x86_maxss = fpc_in_x86_mm_first+26;
+  fpc_in_x86_maxss_from_mem = fpc_in_x86_mm_first+27;
+  fpc_in_x86_minss = fpc_in_x86_mm_first+28;
+  fpc_in_x86_minss_from_mem = fpc_in_x86_mm_first+29;
+  fpc_in_x86_rsqrtss = fpc_in_x86_mm_first+30;
+  fpc_in_x86_rsqrtss_from_mem = fpc_in_x86_mm_first+31;
+  fpc_in_x86_addps = fpc_in_x86_mm_first+32;
+  fpc_in_x86_addps_from_mem = fpc_in_x86_mm_first+33;
+  fpc_in_x86_subps = fpc_in_x86_mm_first+34;
+  fpc_in_x86_subps_from_mem = fpc_in_x86_mm_first+35;
+  fpc_in_x86_mulps = fpc_in_x86_mm_first+36;
+  fpc_in_x86_mulps_from_mem = fpc_in_x86_mm_first+37;
+  fpc_in_x86_divps = fpc_in_x86_mm_first+38;
+  fpc_in_x86_divps_from_mem = fpc_in_x86_mm_first+39;
+  fpc_in_x86_rcpps = fpc_in_x86_mm_first+40;
+  fpc_in_x86_rcpps_from_mem = fpc_in_x86_mm_first+41;
+  fpc_in_x86_sqrtps = fpc_in_x86_mm_first+42;
+  fpc_in_x86_sqrtps_from_mem = fpc_in_x86_mm_first+43;
+  fpc_in_x86_maxps = fpc_in_x86_mm_first+44;
+  fpc_in_x86_maxps_from_mem = fpc_in_x86_mm_first+45;
+  fpc_in_x86_minps = fpc_in_x86_mm_first+46;
+  fpc_in_x86_minps_from_mem = fpc_in_x86_mm_first+47;
+  fpc_in_x86_rsqrtps = fpc_in_x86_mm_first+48;
+  fpc_in_x86_rsqrtps_from_mem = fpc_in_x86_mm_first+49;
+  fpc_in_x86_andps = fpc_in_x86_mm_first+50;
+  fpc_in_x86_andps_from_mem = fpc_in_x86_mm_first+51;
+  fpc_in_x86_orps = fpc_in_x86_mm_first+52;
+  fpc_in_x86_orps_from_mem = fpc_in_x86_mm_first+53;
+  fpc_in_x86_xorps = fpc_in_x86_mm_first+54;
+  fpc_in_x86_xorps_from_mem = fpc_in_x86_mm_first+55;
+  fpc_in_x86_andnps = fpc_in_x86_mm_first+56;
+  fpc_in_x86_andnps_from_mem = fpc_in_x86_mm_first+57;
+  fpc_in_x86_cmpss = fpc_in_x86_mm_first+58;
+  fpc_in_x86_cmpss_from_mem = fpc_in_x86_mm_first+59;
+  fpc_in_x86_cmpps = fpc_in_x86_mm_first+60;
+  fpc_in_x86_cmpps_from_mem = fpc_in_x86_mm_first+61;
+  fpc_in_x86_shufps = fpc_in_x86_mm_first+62;
+  fpc_in_x86_shufps_from_mem = fpc_in_x86_mm_first+63;
+  fpc_in_x86_unpckhps = fpc_in_x86_mm_first+64;
+  fpc_in_x86_unpckhps_from_mem = fpc_in_x86_mm_first+65;
+  fpc_in_x86_unpcklps = fpc_in_x86_mm_first+66;
+  fpc_in_x86_unpcklps_from_mem = fpc_in_x86_mm_first+67;
+  fpc_in_x86_cvtsi2ss = fpc_in_x86_mm_first+68;
+  fpc_in_x86_cvtsi2ss_from_mem = fpc_in_x86_mm_first+69;
+  fpc_in_x86_cvtss2si = fpc_in_x86_mm_first+70;
+  fpc_in_x86_cvtss2si_from_mem = fpc_in_x86_mm_first+71;
+  fpc_in_x86_cvttss2si = fpc_in_x86_mm_first+72;
+  fpc_in_x86_cvttss2si_from_mem = fpc_in_x86_mm_first+73;
+  fpc_in_x86_cvtpi2ps = fpc_in_x86_mm_first+74;
+  fpc_in_x86_cvtpi2ps_from_mem = fpc_in_x86_mm_first+75;
+  fpc_in_x86_cvtps2pi = fpc_in_x86_mm_first+76;
+  fpc_in_x86_cvtps2pi_from_mem = fpc_in_x86_mm_first+77;
+  fpc_in_x86_cvttps2pi = fpc_in_x86_mm_first+78;
+  fpc_in_x86_cvttps2pi_from_mem = fpc_in_x86_mm_first+79;
+  fpc_in_x86_pmulhuw_mmx = fpc_in_x86_mm_first+80;
+  fpc_in_x86_pmulhuw_mmx_from_mem = fpc_in_x86_mm_first+81;
+  fpc_in_x86_psadbw_mmx = fpc_in_x86_mm_first+82;
+  fpc_in_x86_psadbw_mmx_from_mem = fpc_in_x86_mm_first+83;
+  fpc_in_x86_pavgb_mmx = fpc_in_x86_mm_first+84;
+  fpc_in_x86_pavgb_mmx_from_mem = fpc_in_x86_mm_first+85;
+  fpc_in_x86_pavgw_mmx = fpc_in_x86_mm_first+86;
+  fpc_in_x86_pavgw_mmx_from_mem = fpc_in_x86_mm_first+87;
+  fpc_in_x86_pmaxub_mmx = fpc_in_x86_mm_first+88;
+  fpc_in_x86_pmaxub_mmx_from_mem = fpc_in_x86_mm_first+89;
+  fpc_in_x86_pminub_mmx = fpc_in_x86_mm_first+90;
+  fpc_in_x86_pminub_mmx_from_mem = fpc_in_x86_mm_first+91;
+  fpc_in_x86_pmaxsw_mmx = fpc_in_x86_mm_first+92;
+  fpc_in_x86_pmaxsw_mmx_from_mem = fpc_in_x86_mm_first+93;
+  fpc_in_x86_pminsw_mmx = fpc_in_x86_mm_first+94;
+  fpc_in_x86_pminsw_mmx_from_mem = fpc_in_x86_mm_first+95;
+  fpc_in_x86_pextrw_mmx = fpc_in_x86_mm_first+96;
+  fpc_in_x86_pinsrw_mmx = fpc_in_x86_mm_first+97;
+  fpc_in_x86_pmovmskb_mmx = fpc_in_x86_mm_first+98;
+  fpc_in_x86_pshufw = fpc_in_x86_mm_first+99;
+  fpc_in_x86_pshufw_from_mem = fpc_in_x86_mm_first+100;
+  fpc_in_x86_movapd = fpc_in_x86_mm_first+101;
+  fpc_in_x86_movapd_to_mem = fpc_in_x86_mm_first+102;
+  fpc_in_x86_movntpd_to_mem = fpc_in_x86_mm_first+103;
+  fpc_in_x86_movhpd = fpc_in_x86_mm_first+104;
+  fpc_in_x86_movhpd_to_mem = fpc_in_x86_mm_first+105;
+  fpc_in_x86_movlpd = fpc_in_x86_mm_first+106;
+  fpc_in_x86_movlpd_to_mem = fpc_in_x86_mm_first+107;
+  fpc_in_x86_movupd = fpc_in_x86_mm_first+108;
+  fpc_in_x86_movupd_to_mem = fpc_in_x86_mm_first+109;
+  fpc_in_x86_movmskpd = fpc_in_x86_mm_first+110;
+  fpc_in_x86_movsd_from_mem = fpc_in_x86_mm_first+111;
+  fpc_in_x86_movsd_to_mem = fpc_in_x86_mm_first+112;
+  fpc_in_x86_movsd_to_val = fpc_in_x86_mm_first+113;
+  fpc_in_x86_movsd_from_val = fpc_in_x86_mm_first+114;
+  fpc_in_x86_addpd = fpc_in_x86_mm_first+115;
+  fpc_in_x86_addpd_from_mem = fpc_in_x86_mm_first+116;
+  fpc_in_x86_addsd = fpc_in_x86_mm_first+117;
+  fpc_in_x86_addsd_from_mem = fpc_in_x86_mm_first+118;
+  fpc_in_x86_divpd = fpc_in_x86_mm_first+119;
+  fpc_in_x86_divpd_from_mem = fpc_in_x86_mm_first+120;
+  fpc_in_x86_divsd = fpc_in_x86_mm_first+121;
+  fpc_in_x86_divsd_from_mem = fpc_in_x86_mm_first+122;
+  fpc_in_x86_maxpd = fpc_in_x86_mm_first+123;
+  fpc_in_x86_maxpd_from_mem = fpc_in_x86_mm_first+124;
+  fpc_in_x86_maxsd = fpc_in_x86_mm_first+125;
+  fpc_in_x86_maxsd_from_mem = fpc_in_x86_mm_first+126;
+  fpc_in_x86_minpd = fpc_in_x86_mm_first+127;
+  fpc_in_x86_minpd_from_mem = fpc_in_x86_mm_first+128;
+  fpc_in_x86_minsd = fpc_in_x86_mm_first+129;
+  fpc_in_x86_minsd_from_mem = fpc_in_x86_mm_first+130;
+  fpc_in_x86_mulpd = fpc_in_x86_mm_first+131;
+  fpc_in_x86_mulpd_from_mem = fpc_in_x86_mm_first+132;
+  fpc_in_x86_mulsd = fpc_in_x86_mm_first+133;
+  fpc_in_x86_mulsd_from_mem = fpc_in_x86_mm_first+134;
+  fpc_in_x86_sqrtpd = fpc_in_x86_mm_first+135;
+  fpc_in_x86_sqrtpd_from_mem = fpc_in_x86_mm_first+136;
+  fpc_in_x86_sqrtsd = fpc_in_x86_mm_first+137;
+  fpc_in_x86_sqrtsd_from_mem = fpc_in_x86_mm_first+138;
+  fpc_in_x86_subpd = fpc_in_x86_mm_first+139;
+  fpc_in_x86_subpd_from_mem = fpc_in_x86_mm_first+140;
+  fpc_in_x86_subsd = fpc_in_x86_mm_first+141;
+  fpc_in_x86_subsd_from_mem = fpc_in_x86_mm_first+142;
+  fpc_in_x86_andpd = fpc_in_x86_mm_first+143;
+  fpc_in_x86_andpd_from_mem = fpc_in_x86_mm_first+144;
+  fpc_in_x86_andnpd = fpc_in_x86_mm_first+145;
+  fpc_in_x86_andnpd_from_mem = fpc_in_x86_mm_first+146;
+  fpc_in_x86_orpd = fpc_in_x86_mm_first+147;
+  fpc_in_x86_orpd_from_mem = fpc_in_x86_mm_first+148;
+  fpc_in_x86_xorpd = fpc_in_x86_mm_first+149;
+  fpc_in_x86_xorpd_from_mem = fpc_in_x86_mm_first+150;
+  fpc_in_x86_cmppd = fpc_in_x86_mm_first+151;
+  fpc_in_x86_cmppd_from_mem = fpc_in_x86_mm_first+152;
+  fpc_in_x86_cmpsd = fpc_in_x86_mm_first+153;
+  fpc_in_x86_cmpsd_from_mem = fpc_in_x86_mm_first+154;
+  fpc_in_x86_comisd = fpc_in_x86_mm_first+155;
+  fpc_in_x86_comisd_from_mem = fpc_in_x86_mm_first+156;
+  fpc_in_x86_ucomisd = fpc_in_x86_mm_first+157;
+  fpc_in_x86_ucomisd_from_mem = fpc_in_x86_mm_first+158;
+  fpc_in_x86_shufpd = fpc_in_x86_mm_first+159;
+  fpc_in_x86_shufpd_from_mem = fpc_in_x86_mm_first+160;
+  fpc_in_x86_unpckhpd = fpc_in_x86_mm_first+161;
+  fpc_in_x86_unpckhpd_from_mem = fpc_in_x86_mm_first+162;
+  fpc_in_x86_unpcklpd = fpc_in_x86_mm_first+163;
+  fpc_in_x86_unpcklpd_from_mem = fpc_in_x86_mm_first+164;
+  fpc_in_x86_cvtdq2pd = fpc_in_x86_mm_first+165;
+  fpc_in_x86_cvtdq2pd_from_mem = fpc_in_x86_mm_first+166;
+  fpc_in_x86_cvtdq2ps = fpc_in_x86_mm_first+167;
+  fpc_in_x86_cvtdq2ps_from_mem = fpc_in_x86_mm_first+168;
+  fpc_in_x86_cvtpd2dq = fpc_in_x86_mm_first+169;
+  fpc_in_x86_cvtpd2dq_from_mem = fpc_in_x86_mm_first+170;
+  fpc_in_x86_cvtpd2pi = fpc_in_x86_mm_first+171;
+  fpc_in_x86_cvtpd2pi_from_mem = fpc_in_x86_mm_first+172;
+  fpc_in_x86_cvtpd2ps = fpc_in_x86_mm_first+173;
+  fpc_in_x86_cvtpd2ps_from_mem = fpc_in_x86_mm_first+174;
+  fpc_in_x86_cvtpi2pd = fpc_in_x86_mm_first+175;
+  fpc_in_x86_cvtpi2pd_from_mem = fpc_in_x86_mm_first+176;
+  fpc_in_x86_cvtps2dq = fpc_in_x86_mm_first+177;
+  fpc_in_x86_cvtps2dq_from_mem = fpc_in_x86_mm_first+178;
+  fpc_in_x86_cvtps2pd = fpc_in_x86_mm_first+179;
+  fpc_in_x86_cvtps2pd_from_mem = fpc_in_x86_mm_first+180;
+  fpc_in_x86_cvtsd2si = fpc_in_x86_mm_first+181;
+  fpc_in_x86_cvtsd2si_from_mem = fpc_in_x86_mm_first+182;
+  fpc_in_x86_cvtsd2ss = fpc_in_x86_mm_first+183;
+  fpc_in_x86_cvtsd2ss_from_mem = fpc_in_x86_mm_first+184;
+  fpc_in_x86_cvtsi2sd = fpc_in_x86_mm_first+185;
+  fpc_in_x86_cvtsi2sd_from_mem = fpc_in_x86_mm_first+186;
+  fpc_in_x86_cvtss2sd = fpc_in_x86_mm_first+187;
+  fpc_in_x86_cvtss2sd_from_mem = fpc_in_x86_mm_first+188;
+  fpc_in_x86_cvttpd2dq = fpc_in_x86_mm_first+189;
+  fpc_in_x86_cvttpd2dq_from_mem = fpc_in_x86_mm_first+190;
+  fpc_in_x86_cvttpd2pi = fpc_in_x86_mm_first+191;
+  fpc_in_x86_cvttpd2pi_from_mem = fpc_in_x86_mm_first+192;
+  fpc_in_x86_cvttps2dq = fpc_in_x86_mm_first+193;
+  fpc_in_x86_cvttps2dq_from_mem = fpc_in_x86_mm_first+194;
+  fpc_in_x86_cvttsd2si = fpc_in_x86_mm_first+195;
+  fpc_in_x86_cvttsd2si_from_mem = fpc_in_x86_mm_first+196;
+  fpc_in_x86_movd_from_reg = fpc_in_x86_mm_first+197;
+  fpc_in_x86_movd_from_mem = fpc_in_x86_mm_first+198;
+  fpc_in_x86_movd_to_reg = fpc_in_x86_mm_first+199;
+  fpc_in_x86_movd_to_mem = fpc_in_x86_mm_first+200;
+  fpc_in_x86_movq_from_mem = fpc_in_x86_mm_first+201;
+  fpc_in_x86_movq_to_mem = fpc_in_x86_mm_first+202;
+  fpc_in_x86_pmovmskb = fpc_in_x86_mm_first+203;
+  fpc_in_x86_pextrw_sse2 = fpc_in_x86_mm_first+204;
+  fpc_in_x86_pinsrw_sse2 = fpc_in_x86_mm_first+205;
+  fpc_in_x86_pinsrw_from_mem = fpc_in_x86_mm_first+206;
+  fpc_in_x86_packssdw = fpc_in_x86_mm_first+207;
+  fpc_in_x86_packssdw_from_mem = fpc_in_x86_mm_first+208;
+  fpc_in_x86_packsswb = fpc_in_x86_mm_first+209;
+  fpc_in_x86_packsswb_from_mem = fpc_in_x86_mm_first+210;
+  fpc_in_x86_packuswb = fpc_in_x86_mm_first+211;
+  fpc_in_x86_packuswb_from_mem = fpc_in_x86_mm_first+212;
+  fpc_in_x86_paddb = fpc_in_x86_mm_first+213;
+  fpc_in_x86_paddb_from_mem = fpc_in_x86_mm_first+214;
+  fpc_in_x86_paddw = fpc_in_x86_mm_first+215;
+  fpc_in_x86_paddw_from_mem = fpc_in_x86_mm_first+216;
+  fpc_in_x86_paddd = fpc_in_x86_mm_first+217;
+  fpc_in_x86_paddd_from_mem = fpc_in_x86_mm_first+218;
+  fpc_in_x86_paddq = fpc_in_x86_mm_first+219;
+  fpc_in_x86_paddq_from_mem = fpc_in_x86_mm_first+220;
+  fpc_in_x86_paddsb = fpc_in_x86_mm_first+221;
+  fpc_in_x86_paddsb_from_mem = fpc_in_x86_mm_first+222;
+  fpc_in_x86_paddsw = fpc_in_x86_mm_first+223;
+  fpc_in_x86_paddsw_from_mem = fpc_in_x86_mm_first+224;
+  fpc_in_x86_paddusb = fpc_in_x86_mm_first+225;
+  fpc_in_x86_paddusb_from_mem = fpc_in_x86_mm_first+226;
+  fpc_in_x86_paddusw = fpc_in_x86_mm_first+227;
+  fpc_in_x86_paddusw_from_mem = fpc_in_x86_mm_first+228;
+  fpc_in_x86_pand = fpc_in_x86_mm_first+229;
+  fpc_in_x86_pand_from_mem = fpc_in_x86_mm_first+230;
+  fpc_in_x86_pandn = fpc_in_x86_mm_first+231;
+  fpc_in_x86_pandn_from_mem = fpc_in_x86_mm_first+232;
+  fpc_in_x86_por = fpc_in_x86_mm_first+233;
+  fpc_in_x86_por_from_mem = fpc_in_x86_mm_first+234;
+  fpc_in_x86_pxor = fpc_in_x86_mm_first+235;
+  fpc_in_x86_pxor_from_mem = fpc_in_x86_mm_first+236;
+  fpc_in_x86_pcmpeqb = fpc_in_x86_mm_first+237;
+  fpc_in_x86_pcmpeqb_from_mem = fpc_in_x86_mm_first+238;
+  fpc_in_x86_pcmpeqw = fpc_in_x86_mm_first+239;
+  fpc_in_x86_pcmpeqw_from_mem = fpc_in_x86_mm_first+240;
+  fpc_in_x86_pcmpeqd = fpc_in_x86_mm_first+241;
+  fpc_in_x86_pcmpeqd_from_mem = fpc_in_x86_mm_first+242;
+  fpc_in_x86_pcmpgtb = fpc_in_x86_mm_first+243;
+  fpc_in_x86_pcmpgtb_from_mem = fpc_in_x86_mm_first+244;
+  fpc_in_x86_pcmpgtw = fpc_in_x86_mm_first+245;
+  fpc_in_x86_pcmpgtw_from_mem = fpc_in_x86_mm_first+246;
+  fpc_in_x86_pcmpgtd = fpc_in_x86_mm_first+247;
+  fpc_in_x86_pcmpgtd_from_mem = fpc_in_x86_mm_first+248;
+  fpc_in_x86_pmullw = fpc_in_x86_mm_first+249;
+  fpc_in_x86_pmullw_from_mem = fpc_in_x86_mm_first+250;
+  fpc_in_x86_pmulhw = fpc_in_x86_mm_first+251;
+  fpc_in_x86_pmulhw_from_mem = fpc_in_x86_mm_first+252;
+  fpc_in_x86_pmulhuw_sse2 = fpc_in_x86_mm_first+253;
+  fpc_in_x86_pmulhuw_from_mem = fpc_in_x86_mm_first+254;
+  fpc_in_x86_pmuludq = fpc_in_x86_mm_first+255;
+  fpc_in_x86_pmuludq_from_mem = fpc_in_x86_mm_first+256;
+  fpc_in_x86_psllw_sse2 = fpc_in_x86_mm_first+257;
+  fpc_in_x86_psllw_from_mem = fpc_in_x86_mm_first+258;
+  fpc_in_x86_psllw_sse2_imm = fpc_in_x86_mm_first+259;
+  fpc_in_x86_pslld_sse2 = fpc_in_x86_mm_first+260;
+  fpc_in_x86_pslld_from_mem = fpc_in_x86_mm_first+261;
+  fpc_in_x86_pslld_sse2_imm = fpc_in_x86_mm_first+262;
+  fpc_in_x86_psllq_sse2 = fpc_in_x86_mm_first+263;
+  fpc_in_x86_psllq_from_mem = fpc_in_x86_mm_first+264;
+  fpc_in_x86_psllq_sse2_imm = fpc_in_x86_mm_first+265;
+  fpc_in_x86_psrad_sse2 = fpc_in_x86_mm_first+266;
+  fpc_in_x86_psrad_from_mem = fpc_in_x86_mm_first+267;
+  fpc_in_x86_psrad_sse2_imm = fpc_in_x86_mm_first+268;
+  fpc_in_x86_psraw_sse2 = fpc_in_x86_mm_first+269;
+  fpc_in_x86_psraw_from_mem = fpc_in_x86_mm_first+270;
+  fpc_in_x86_psraw_sse2_imm = fpc_in_x86_mm_first+271;
+  fpc_in_x86_psrlw_sse2 = fpc_in_x86_mm_first+272;
+  fpc_in_x86_psrlw_from_mem = fpc_in_x86_mm_first+273;
+  fpc_in_x86_psrlw_sse2_imm = fpc_in_x86_mm_first+274;
+  fpc_in_x86_psrld_sse2 = fpc_in_x86_mm_first+275;
+  fpc_in_x86_psrld_from_mem = fpc_in_x86_mm_first+276;
+  fpc_in_x86_psrld_sse2_imm = fpc_in_x86_mm_first+277;
+  fpc_in_x86_psrlq_sse2 = fpc_in_x86_mm_first+278;
+  fpc_in_x86_psrlq_from_mem = fpc_in_x86_mm_first+279;
+  fpc_in_x86_psrlq_sse2_imm = fpc_in_x86_mm_first+280;
+  fpc_in_x86_psubb = fpc_in_x86_mm_first+281;
+  fpc_in_x86_psubb_from_mem = fpc_in_x86_mm_first+282;
+  fpc_in_x86_psubw = fpc_in_x86_mm_first+283;
+  fpc_in_x86_psubw_from_mem = fpc_in_x86_mm_first+284;
+  fpc_in_x86_psubd = fpc_in_x86_mm_first+285;
+  fpc_in_x86_psubd_from_mem = fpc_in_x86_mm_first+286;
+  fpc_in_x86_psubq = fpc_in_x86_mm_first+287;
+  fpc_in_x86_psubq_from_mem = fpc_in_x86_mm_first+288;
+  fpc_in_x86_psubsb = fpc_in_x86_mm_first+289;
+  fpc_in_x86_psubsb_from_mem = fpc_in_x86_mm_first+290;
+  fpc_in_x86_psubsw = fpc_in_x86_mm_first+291;
+  fpc_in_x86_psubsw_from_mem = fpc_in_x86_mm_first+292;
+  fpc_in_x86_pmaddwd = fpc_in_x86_mm_first+293;
+  fpc_in_x86_pmaddwd_from_mem = fpc_in_x86_mm_first+294;
+  fpc_in_x86_psubusb = fpc_in_x86_mm_first+295;
+  fpc_in_x86_psubusb_from_mem = fpc_in_x86_mm_first+296;
+  fpc_in_x86_psubusw = fpc_in_x86_mm_first+297;
+  fpc_in_x86_psubusw_from_mem = fpc_in_x86_mm_first+298;
+  fpc_in_x86_punpckhbw = fpc_in_x86_mm_first+299;
+  fpc_in_x86_punpckhbw_from_mem = fpc_in_x86_mm_first+300;
+  fpc_in_x86_punpckhwd = fpc_in_x86_mm_first+301;
+  fpc_in_x86_punpckhwd_from_mem = fpc_in_x86_mm_first+302;
+  fpc_in_x86_punpckhdq = fpc_in_x86_mm_first+303;
+  fpc_in_x86_punpckhdq_from_mem = fpc_in_x86_mm_first+304;
+  fpc_in_x86_punpcklbw = fpc_in_x86_mm_first+305;
+  fpc_in_x86_punpcklbw_from_mem = fpc_in_x86_mm_first+306;
+  fpc_in_x86_punpcklwd = fpc_in_x86_mm_first+307;
+  fpc_in_x86_punpcklwd_from_mem = fpc_in_x86_mm_first+308;
+  fpc_in_x86_punpckldq = fpc_in_x86_mm_first+309;
+  fpc_in_x86_punpckldq_from_mem = fpc_in_x86_mm_first+310;
+  fpc_in_x86_pavgb_sse2 = fpc_in_x86_mm_first+311;
+  fpc_in_x86_pavgb_from_mem = fpc_in_x86_mm_first+312;
+  fpc_in_x86_pavgw_sse2 = fpc_in_x86_mm_first+313;
+  fpc_in_x86_pavgw_from_mem = fpc_in_x86_mm_first+314;
+  fpc_in_x86_pminub_sse2 = fpc_in_x86_mm_first+315;
+  fpc_in_x86_pminub_from_mem = fpc_in_x86_mm_first+316;
+  fpc_in_x86_pminsw_sse2 = fpc_in_x86_mm_first+317;
+  fpc_in_x86_pminsw_from_mem = fpc_in_x86_mm_first+318;
+  fpc_in_x86_pmaxsw_sse2 = fpc_in_x86_mm_first+319;
+  fpc_in_x86_pmaxsw_from_mem = fpc_in_x86_mm_first+320;
+  fpc_in_x86_pmaxub_sse2 = fpc_in_x86_mm_first+321;
+  fpc_in_x86_pmaxub_from_mem = fpc_in_x86_mm_first+322;
+  fpc_in_x86_psadbw_sse2 = fpc_in_x86_mm_first+323;
+  fpc_in_x86_psadbw_from_mem = fpc_in_x86_mm_first+324;
+  fpc_in_x86_maskmovdqu = fpc_in_x86_mm_first+325;
+  fpc_in_x86_movdq2q = fpc_in_x86_mm_first+326;
+  fpc_in_x86_movdqa_from_mem = fpc_in_x86_mm_first+327;
+  fpc_in_x86_movdqa = fpc_in_x86_mm_first+328;
+  fpc_in_x86_movdqu_from_mem = fpc_in_x86_mm_first+329;
+  fpc_in_x86_movdqu = fpc_in_x86_mm_first+330;
+  fpc_in_x86_movq2dq = fpc_in_x86_mm_first+331;
+  fpc_in_x86_movntdq = fpc_in_x86_mm_first+332;
+  fpc_in_x86_pshufhw = fpc_in_x86_mm_first+333;
+  fpc_in_x86_pshuflw = fpc_in_x86_mm_first+334;
+  fpc_in_x86_pshufd = fpc_in_x86_mm_first+335;
+  fpc_in_x86_pshufhw_from_mem = fpc_in_x86_mm_first+336;
+  fpc_in_x86_pshuflw_from_mem = fpc_in_x86_mm_first+337;
+  fpc_in_x86_pshufd_from_mem = fpc_in_x86_mm_first+338;
+  fpc_in_x86_pslldq = fpc_in_x86_mm_first+339;
+  fpc_in_x86_psrldq = fpc_in_x86_mm_first+340;
+  fpc_in_x86_punpckhqdq = fpc_in_x86_mm_first+341;
+  fpc_in_x86_punpckhqdq_from_mem = fpc_in_x86_mm_first+342;
+  fpc_in_x86_punpcklqdq = fpc_in_x86_mm_first+343;
+  fpc_in_x86_punpcklqdq_from_mem = fpc_in_x86_mm_first+344;
+  fpc_in_x86_addsubps = fpc_in_x86_mm_first+345;
+  fpc_in_x86_addsubps_from_mem = fpc_in_x86_mm_first+346;
+  fpc_in_x86_addsubpd = fpc_in_x86_mm_first+347;
+  fpc_in_x86_addsubpd_from_mem = fpc_in_x86_mm_first+348;
+  fpc_in_x86_movddup = fpc_in_x86_mm_first+349;
+  fpc_in_x86_movddup_from_mem = fpc_in_x86_mm_first+350;
+  fpc_in_x86_movsldup = fpc_in_x86_mm_first+351;
+  fpc_in_x86_movsldup_from_mem = fpc_in_x86_mm_first+352;
+  fpc_in_x86_movshdup = fpc_in_x86_mm_first+353;
+  fpc_in_x86_movshdup_from_mem = fpc_in_x86_mm_first+354;
+  fpc_in_x86_haddps = fpc_in_x86_mm_first+355;
+  fpc_in_x86_haddps_from_mem = fpc_in_x86_mm_first+356;
+  fpc_in_x86_haddpd = fpc_in_x86_mm_first+357;
+  fpc_in_x86_haddpd_from_mem = fpc_in_x86_mm_first+358;
+  fpc_in_x86_hsubps = fpc_in_x86_mm_first+359;
+  fpc_in_x86_hsubps_from_mem = fpc_in_x86_mm_first+360;
+  fpc_in_x86_hsubpd = fpc_in_x86_mm_first+361;
+  fpc_in_x86_hsubpd_from_mem = fpc_in_x86_mm_first+362;
+  fpc_in_x86_lddqu = fpc_in_x86_mm_first+363;
+  fpc_in_x86_psignb = fpc_in_x86_mm_first+364;
+  fpc_in_x86_psignb_from_mem = fpc_in_x86_mm_first+365;
+  fpc_in_x86_psignw = fpc_in_x86_mm_first+366;
+  fpc_in_x86_psignw_from_mem = fpc_in_x86_mm_first+367;
+  fpc_in_x86_psignd = fpc_in_x86_mm_first+368;
+  fpc_in_x86_psignd_from_mem = fpc_in_x86_mm_first+369;
+  fpc_in_x86_pshufb = fpc_in_x86_mm_first+370;
+  fpc_in_x86_pshufb_from_mem = fpc_in_x86_mm_first+371;
+  fpc_in_x86_pmulhrsw = fpc_in_x86_mm_first+372;
+  fpc_in_x86_pmulhrsw_from_mem = fpc_in_x86_mm_first+373;
+  fpc_in_x86_pmaddubsw = fpc_in_x86_mm_first+374;
+  fpc_in_x86_pmaddubsw_from_mem = fpc_in_x86_mm_first+375;
+  fpc_in_x86_phsubw = fpc_in_x86_mm_first+376;
+  fpc_in_x86_phsubw_from_mem = fpc_in_x86_mm_first+377;
+  fpc_in_x86_phsubsw = fpc_in_x86_mm_first+378;
+  fpc_in_x86_phsubsw_from_mem = fpc_in_x86_mm_first+379;
+  fpc_in_x86_phsubd = fpc_in_x86_mm_first+380;
+  fpc_in_x86_phsubd_from_mem = fpc_in_x86_mm_first+381;
+  fpc_in_x86_phaddsw = fpc_in_x86_mm_first+382;
+  fpc_in_x86_phaddsw_from_mem = fpc_in_x86_mm_first+383;
+  fpc_in_x86_phaddw = fpc_in_x86_mm_first+384;
+  fpc_in_x86_phaddw_from_mem = fpc_in_x86_mm_first+385;
+  fpc_in_x86_phaddd = fpc_in_x86_mm_first+386;
+  fpc_in_x86_phaddd_from_mem = fpc_in_x86_mm_first+387;
+  fpc_in_x86_palignr = fpc_in_x86_mm_first+388;
+  fpc_in_x86_palignr_from_mem = fpc_in_x86_mm_first+389;
+  fpc_in_x86_pabsb = fpc_in_x86_mm_first+390;
+  fpc_in_x86_pabsb_from_mem = fpc_in_x86_mm_first+391;
+  fpc_in_x86_pabsw = fpc_in_x86_mm_first+392;
+  fpc_in_x86_pabsw_from_mem = fpc_in_x86_mm_first+393;
+  fpc_in_x86_pabsd = fpc_in_x86_mm_first+394;
+  fpc_in_x86_pabsd_from_mem = fpc_in_x86_mm_first+395;
+  fpc_in_x86_dpps = fpc_in_x86_mm_first+396;
+  fpc_in_x86_dpps_from_mem = fpc_in_x86_mm_first+397;
+  fpc_in_x86_dppd = fpc_in_x86_mm_first+398;
+  fpc_in_x86_dppd_from_mem = fpc_in_x86_mm_first+399;
+  fpc_in_x86_blendps = fpc_in_x86_mm_first+400;
+  fpc_in_x86_blendps_from_mem = fpc_in_x86_mm_first+401;
+  fpc_in_x86_blendvps = fpc_in_x86_mm_first+402;
+  fpc_in_x86_blendvps_from_mem = fpc_in_x86_mm_first+403;
+  fpc_in_x86_blendpd = fpc_in_x86_mm_first+404;
+  fpc_in_x86_blendpd_from_mem = fpc_in_x86_mm_first+405;
+  fpc_in_x86_blendvpd = fpc_in_x86_mm_first+406;
+  fpc_in_x86_blendvpd_from_mem = fpc_in_x86_mm_first+407;
+  fpc_in_x86_roundps = fpc_in_x86_mm_first+408;
+  fpc_in_x86_roundps_from_mem = fpc_in_x86_mm_first+409;
+  fpc_in_x86_roundss = fpc_in_x86_mm_first+410;
+  fpc_in_x86_roundss_from_mem = fpc_in_x86_mm_first+411;
+  fpc_in_x86_roundpd = fpc_in_x86_mm_first+412;
+  fpc_in_x86_roundpd_from_mem = fpc_in_x86_mm_first+413;
+  fpc_in_x86_roundsd = fpc_in_x86_mm_first+414;
+  fpc_in_x86_roundsd_from_mem = fpc_in_x86_mm_first+415;
+  fpc_in_x86_insertps = fpc_in_x86_mm_first+416;
+  fpc_in_x86_insertps_from_mem = fpc_in_x86_mm_first+417;
+  fpc_in_x86_extractps = fpc_in_x86_mm_first+418;
+  fpc_in_x86_extractps_from_mem = fpc_in_x86_mm_first+419;
+  fpc_in_x86_mpsadbw = fpc_in_x86_mm_first+420;
+  fpc_in_x86_mpsadbw_from_mem = fpc_in_x86_mm_first+421;
+  fpc_in_x86_phminposuw = fpc_in_x86_mm_first+422;
+  fpc_in_x86_phminposuw_from_mem = fpc_in_x86_mm_first+423;
+  fpc_in_x86_pmulld = fpc_in_x86_mm_first+424;
+  fpc_in_x86_pmulld_from_mem = fpc_in_x86_mm_first+425;
+  fpc_in_x86_pmuldq = fpc_in_x86_mm_first+426;
+  fpc_in_x86_pmuldq_from_mem = fpc_in_x86_mm_first+427;
+  fpc_in_x86_pblendvb = fpc_in_x86_mm_first+428;
+  fpc_in_x86_pblendvb_from_mem = fpc_in_x86_mm_first+429;
+  fpc_in_x86_pblendw = fpc_in_x86_mm_first+430;
+  fpc_in_x86_pblendw_from_mem = fpc_in_x86_mm_first+431;
+  fpc_in_x86_pminsb = fpc_in_x86_mm_first+432;
+  fpc_in_x86_pminsb_from_mem = fpc_in_x86_mm_first+433;
+  fpc_in_x86_pminuw = fpc_in_x86_mm_first+434;
+  fpc_in_x86_pminuw_from_mem = fpc_in_x86_mm_first+435;
+  fpc_in_x86_pminsd = fpc_in_x86_mm_first+436;
+  fpc_in_x86_pminsd_from_mem = fpc_in_x86_mm_first+437;
+  fpc_in_x86_pminud = fpc_in_x86_mm_first+438;
+  fpc_in_x86_pminud_from_mem = fpc_in_x86_mm_first+439;
+  fpc_in_x86_pmaxsb = fpc_in_x86_mm_first+440;
+  fpc_in_x86_pmaxsb_from_mem = fpc_in_x86_mm_first+441;
+  fpc_in_x86_pmaxuw = fpc_in_x86_mm_first+442;
+  fpc_in_x86_pmaxuw_from_mem = fpc_in_x86_mm_first+443;
+  fpc_in_x86_pmaxsd = fpc_in_x86_mm_first+444;
+  fpc_in_x86_pmaxsd_from_mem = fpc_in_x86_mm_first+445;
+  fpc_in_x86_pmaxud = fpc_in_x86_mm_first+446;
+  fpc_in_x86_pmaxud_from_mem = fpc_in_x86_mm_first+447;
+  fpc_in_x86_pinsrb = fpc_in_x86_mm_first+448;
+  fpc_in_x86_pinsrb_from_mem = fpc_in_x86_mm_first+449;
+  fpc_in_x86_pinsrd = fpc_in_x86_mm_first+450;
+  fpc_in_x86_pinsrd_from_mem = fpc_in_x86_mm_first+451;
+  fpc_in_x86_pinsrq = fpc_in_x86_mm_first+452;
+  fpc_in_x86_pinsrq_from_mem = fpc_in_x86_mm_first+453;
+  fpc_in_x86_pextrb = fpc_in_x86_mm_first+454;
+  fpc_in_x86_pextrb_to_mem = fpc_in_x86_mm_first+455;
+  fpc_in_x86_pextrw_sse41 = fpc_in_x86_mm_first+456;
+  fpc_in_x86_pextrw_sse41_to_mem = fpc_in_x86_mm_first+457;
+  fpc_in_x86_pextrd = fpc_in_x86_mm_first+458;
+  fpc_in_x86_pextrd_to_mem = fpc_in_x86_mm_first+459;
+  fpc_in_x86_pextrq = fpc_in_x86_mm_first+460;
+  fpc_in_x86_pextrq_to_mem = fpc_in_x86_mm_first+461;
+  fpc_in_x86_pmovsxbw = fpc_in_x86_mm_first+462;
+  fpc_in_x86_pmovsxbw_from_mem = fpc_in_x86_mm_first+463;
+  fpc_in_x86_pmovzxbw = fpc_in_x86_mm_first+464;
+  fpc_in_x86_pmovzxbw_from_mem = fpc_in_x86_mm_first+465;
+  fpc_in_x86_pmovsxbd = fpc_in_x86_mm_first+466;
+  fpc_in_x86_pmovsxbd_from_mem = fpc_in_x86_mm_first+467;
+  fpc_in_x86_pmovzxbd = fpc_in_x86_mm_first+468;
+  fpc_in_x86_pmovzxbd_from_mem = fpc_in_x86_mm_first+469;
+  fpc_in_x86_pmovsxbq = fpc_in_x86_mm_first+470;
+  fpc_in_x86_pmovsxbq_from_mem = fpc_in_x86_mm_first+471;
+  fpc_in_x86_pmovzxbq = fpc_in_x86_mm_first+472;
+  fpc_in_x86_pmovzxbq_from_mem = fpc_in_x86_mm_first+473;
+  fpc_in_x86_pmovsxwd = fpc_in_x86_mm_first+474;
+  fpc_in_x86_pmovsxwd_from_mem = fpc_in_x86_mm_first+475;
+  fpc_in_x86_pmovzxwd = fpc_in_x86_mm_first+476;
+  fpc_in_x86_pmovzxwd_from_mem = fpc_in_x86_mm_first+477;
+  fpc_in_x86_pmovsxwq = fpc_in_x86_mm_first+478;
+  fpc_in_x86_pmovsxwq_from_mem = fpc_in_x86_mm_first+479;
+  fpc_in_x86_pmovzxwq = fpc_in_x86_mm_first+480;
+  fpc_in_x86_pmovzxwq_from_mem = fpc_in_x86_mm_first+481;
+  fpc_in_x86_pmovsxdq = fpc_in_x86_mm_first+482;
+  fpc_in_x86_pmovsxdq_from_mem = fpc_in_x86_mm_first+483;
+  fpc_in_x86_pmovzxdq = fpc_in_x86_mm_first+484;
+  fpc_in_x86_pmovzxdq_from_mem = fpc_in_x86_mm_first+485;
+  fpc_in_x86_ptest = fpc_in_x86_mm_first+486;
+  fpc_in_x86_ptest_from_mem = fpc_in_x86_mm_first+487;
+  fpc_in_x86_pcmpeqq = fpc_in_x86_mm_first+488;
+  fpc_in_x86_pcmpeqq_from_mem = fpc_in_x86_mm_first+489;
+  fpc_in_x86_packusdw = fpc_in_x86_mm_first+490;
+  fpc_in_x86_packusdw_from_mem = fpc_in_x86_mm_first+491;
+  fpc_in_x86_movntdqa = fpc_in_x86_mm_first+492;
+  fpc_in_x86_pcmpestri = fpc_in_x86_mm_first+493;
+  fpc_in_x86_pcmpestri_from_mem = fpc_in_x86_mm_first+494;
+  fpc_in_x86_pcmpestrm = fpc_in_x86_mm_first+495;
+  fpc_in_x86_pcmpestrm_from_mem = fpc_in_x86_mm_first+496;
+  fpc_in_x86_pcmpistri = fpc_in_x86_mm_first+497;
+  fpc_in_x86_pcmpistri_from_mem = fpc_in_x86_mm_first+498;
+  fpc_in_x86_pcmpistrm = fpc_in_x86_mm_first+499;
+  fpc_in_x86_pcmpistrm_from_mem = fpc_in_x86_mm_first+500;
+  fpc_in_x86_pcmpgtq = fpc_in_x86_mm_first+501;
+  fpc_in_x86_pcmpgtq_from_mem = fpc_in_x86_mm_first+502;

+ 452 - 17
rtl/x86_64/cpummprocs.inc

@@ -8,61 +8,496 @@ function x86_movss(r1: __m128): single; [INTERNPROC: fpc_in_x86_movss_to_val];
 function x86_movss(r1: single): __m128; [INTERNPROC: fpc_in_x86_movss_from_val];
 function x86_movss(r1: single): __m128; [INTERNPROC: fpc_in_x86_movss_from_val];
 function x86_movlps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movlps];
 function x86_movlps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movlps];
 function x86_movhps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movhps];
 function x86_movhps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movhps];
+procedure x86_movlps(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movlps_to_mem];
+procedure x86_movhps(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movhps_to_mem];
 function x86_movlhps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_movlhps];
 function x86_movlhps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_movlhps];
 function x86_movhlps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_movhlps];
 function x86_movhlps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_movhlps];
 function x86_addss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addss];
 function x86_addss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addss];
+function x86_addss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addss_from_mem];
 function x86_subss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subss];
 function x86_subss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subss];
+function x86_subss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_subss_from_mem];
 function x86_mulss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_mulss];
 function x86_mulss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_mulss];
+function x86_mulss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_mulss_from_mem];
 function x86_divss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_divss];
 function x86_divss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_divss];
+function x86_divss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_divss_from_mem];
 function x86_rcpss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rcpss];
 function x86_rcpss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rcpss];
+function x86_rcpss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_rcpss_from_mem];
 function x86_sqrtss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_sqrtss];
 function x86_sqrtss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_sqrtss];
+function x86_sqrtss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_sqrtss_from_mem];
 function x86_maxss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_maxss];
 function x86_maxss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_maxss];
+function x86_maxss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_maxss_from_mem];
 function x86_minss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_minss];
 function x86_minss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_minss];
+function x86_minss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_minss_from_mem];
 function x86_rsqrtss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rsqrtss];
 function x86_rsqrtss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rsqrtss];
+function x86_rsqrtss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_rsqrtss_from_mem];
 function x86_addps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addps];
 function x86_addps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addps];
+function x86_addps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addps_from_mem];
 function x86_subps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subps];
 function x86_subps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subps];
+function x86_subps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_subps_from_mem];
 function x86_mulps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_mulps];
 function x86_mulps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_mulps];
+function x86_mulps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_mulps_from_mem];
 function x86_divps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_divps];
 function x86_divps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_divps];
+function x86_divps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_divps_from_mem];
 function x86_rcpps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rcpps];
 function x86_rcpps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rcpps];
+function x86_rcpps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_rcpps_from_mem];
 function x86_sqrtps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_sqrtps];
 function x86_sqrtps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_sqrtps];
+function x86_sqrtps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_sqrtps_from_mem];
 function x86_maxps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_maxps];
 function x86_maxps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_maxps];
+function x86_maxps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_maxps_from_mem];
 function x86_minps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_minps];
 function x86_minps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_minps];
+function x86_minps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_minps_from_mem];
 function x86_rsqrtps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rsqrtps];
 function x86_rsqrtps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_rsqrtps];
+function x86_rsqrtps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_rsqrtps_from_mem];
 function x86_andps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_andps];
 function x86_andps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_andps];
+function x86_andps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_andps_from_mem];
 function x86_orps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_orps];
 function x86_orps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_orps];
+function x86_orps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_orps_from_mem];
 function x86_xorps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_xorps];
 function x86_xorps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_xorps];
+function x86_xorps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_xorps_from_mem];
 function x86_andnps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_andnps];
 function x86_andnps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_andnps];
+function x86_andnps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_andnps_from_mem];
 function x86_cmpss(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpss];
 function x86_cmpss(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpss];
+function x86_cmpss(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpss_from_mem];
 function x86_cmpps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpps];
 function x86_cmpps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpps];
+function x86_cmpps(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpps_from_mem];
 function x86_shufps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_shufps];
 function x86_shufps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_shufps];
+function x86_shufps(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_shufps_from_mem];
 function x86_unpckhps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_unpckhps];
 function x86_unpckhps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_unpckhps];
+function x86_unpckhps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_unpckhps_from_mem];
 function x86_unpcklps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_unpcklps];
 function x86_unpcklps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_unpcklps];
-function x86_cvtsi2ss(r0: __m128; r1: longword): __m128; [INTERNPROC: fpc_in_x86_cvtsi2ss];
-function x86_cvtss2si(r1: __m128): longword; [INTERNPROC: fpc_in_x86_cvtss2si];
-function x86_cvttss2si(r1: __m128): longword; [INTERNPROC: fpc_in_x86_cvttss2si];
+function x86_unpcklps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_unpcklps_from_mem];
+function x86_cvtsi2ss(r0: __m128; r1: NativeUInt): __m128; [INTERNPROC: fpc_in_x86_cvtsi2ss];
+function x86_cvtsi2ss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtsi2ss_from_mem];
+function x86_cvtss2si(r1: __m128): NativeUInt; [INTERNPROC: fpc_in_x86_cvtss2si];
+function x86_cvtss2si(r1: pointer): NativeUInt; [INTERNPROC: fpc_in_x86_cvtss2si_from_mem];
+function x86_cvttss2si(r1: __m128): NativeUInt; [INTERNPROC: fpc_in_x86_cvttss2si];
+function x86_cvttss2si(r1: pointer): NativeUInt; [INTERNPROC: fpc_in_x86_cvttss2si_from_mem];
 function x86_cvtpi2ps(r0: __m128; r1: __m64): __m128; [INTERNPROC: fpc_in_x86_cvtpi2ps];
 function x86_cvtpi2ps(r0: __m128; r1: __m64): __m128; [INTERNPROC: fpc_in_x86_cvtpi2ps];
+function x86_cvtpi2ps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtpi2ps_from_mem];
 function x86_cvtps2pi(r1: __m128): __m64; [INTERNPROC: fpc_in_x86_cvtps2pi];
 function x86_cvtps2pi(r1: __m128): __m64; [INTERNPROC: fpc_in_x86_cvtps2pi];
+function x86_cvtps2pi(r1: pointer): __m64; [INTERNPROC: fpc_in_x86_cvtps2pi_from_mem];
 function x86_cvttps2pi(r1: __m128): __m64; [INTERNPROC: fpc_in_x86_cvttps2pi];
 function x86_cvttps2pi(r1: __m128): __m64; [INTERNPROC: fpc_in_x86_cvttps2pi];
+function x86_cvttps2pi(r1: pointer): __m64; [INTERNPROC: fpc_in_x86_cvttps2pi_from_mem];
 function x86_pmulhuw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pmulhuw_mmx];
 function x86_pmulhuw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pmulhuw_mmx];
+function x86_pmulhuw(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_pmulhuw_mmx_from_mem];
 function x86_psadbw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_psadbw_mmx];
 function x86_psadbw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_psadbw_mmx];
+function x86_psadbw(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_psadbw_mmx_from_mem];
 function x86_pavgb(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pavgb_mmx];
 function x86_pavgb(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pavgb_mmx];
+function x86_pavgb(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_pavgb_mmx_from_mem];
 function x86_pavgw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pavgw_mmx];
 function x86_pavgw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pavgw_mmx];
+function x86_pavgw(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_pavgw_mmx_from_mem];
 function x86_pmaxub(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pmaxub_mmx];
 function x86_pmaxub(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pmaxub_mmx];
+function x86_pmaxub(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_pmaxub_mmx_from_mem];
 function x86_pminub(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pminub_mmx];
 function x86_pminub(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pminub_mmx];
+function x86_pminub(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_pminub_mmx_from_mem];
 function x86_pmaxsw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pmaxsw_mmx];
 function x86_pmaxsw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pmaxsw_mmx];
+function x86_pmaxsw(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_pmaxsw_mmx_from_mem];
 function x86_pminsw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pminsw_mmx];
 function x86_pminsw(r0, r1: __m64): __m64; [INTERNPROC: fpc_in_x86_pminsw_mmx];
-function x86_pextrw(r1: __m64; imm: longint): longword; [INTERNPROC: fpc_in_x86_pextrw_mmx];
-function x86_pinsrw(r0: __m64; r1: longword; imm: longint): __m64; [INTERNPROC: fpc_in_x86_pinsrw_mmx];
-function x86_pmovmskb(r1: __m64): longword; [INTERNPROC: fpc_in_x86_pmovmskb];
+function x86_pminsw(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_pminsw_mmx_from_mem];
+function x86_pextrw(r1: __m64; imm: longint): NativeUInt; [INTERNPROC: fpc_in_x86_pextrw_mmx];
+function x86_pinsrw(r0: __m64; r1: NativeUInt; imm: longint): __m64; [INTERNPROC: fpc_in_x86_pinsrw_mmx];
+function x86_pmovmskb(r1: __m64): NativeUInt; [INTERNPROC: fpc_in_x86_pmovmskb_mmx];
 function x86_pshufw(r1: __m64; imm: longint): __m64; [INTERNPROC: fpc_in_x86_pshufw];
 function x86_pshufw(r1: __m64; imm: longint): __m64; [INTERNPROC: fpc_in_x86_pshufw];
-function x86_pmulhuw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmulhuw];
-function x86_psadbw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psadbw];
-function x86_pavgb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pavgb];
-function x86_pavgw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pavgw];
-function x86_pmaxub(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaxub];
-function x86_pminub(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pminub];
-function x86_pmaxsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaxsw];
-function x86_pminsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pminsw];
-function x86_pextrw(r1: __m128; imm: longint): longword; [INTERNPROC: fpc_in_x86_pextrw];
-function x86_pinsrw(r0: __m128; r1: longword; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrw];
-
+function x86_pshufw(r1: pointer; imm: longint): __m64; [INTERNPROC: fpc_in_x86_pshufw_from_mem];
+function x86_movapd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movapd];
+procedure x86_movapd(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movapd_to_mem];
+procedure x86_movntpd(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movntpd_to_mem];
+function x86_movhpd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movhpd];
+procedure x86_movhpd(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movhpd_to_mem];
+function x86_movlpd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movlpd];
+procedure x86_movlpd(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movlpd_to_mem];
+function x86_movupd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movupd];
+procedure x86_movupd(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movupd_to_mem];
+function x86_movmskpd(r1: __m128): longword; [INTERNPROC: fpc_in_x86_movmskpd];
+function x86_movsd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movsd_from_mem];
+procedure x86_movsd(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movsd_to_mem];
+function x86_movsd(r1: __m128): double; [INTERNPROC: fpc_in_x86_movsd_to_val];
+function x86_movsd(r1: double): __m128; [INTERNPROC: fpc_in_x86_movsd_from_val];
+function x86_addpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addpd];
+function x86_addpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addpd_from_mem];
+function x86_addsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addsd];
+function x86_addsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addsd_from_mem];
+function x86_divpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_divpd];
+function x86_divpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_divpd_from_mem];
+function x86_divsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_divsd];
+function x86_divsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_divsd_from_mem];
+function x86_maxpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_maxpd];
+function x86_maxpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_maxpd_from_mem];
+function x86_maxsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_maxsd];
+function x86_maxsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_maxsd_from_mem];
+function x86_minpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_minpd];
+function x86_minpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_minpd_from_mem];
+function x86_minsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_minsd];
+function x86_minsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_minsd_from_mem];
+function x86_mulpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_mulpd];
+function x86_mulpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_mulpd_from_mem];
+function x86_mulsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_mulsd];
+function x86_mulsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_mulsd_from_mem];
+function x86_sqrtpd(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_sqrtpd];
+function x86_sqrtpd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_sqrtpd_from_mem];
+function x86_sqrtsd(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_sqrtsd];
+function x86_sqrtsd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_sqrtsd_from_mem];
+function x86_subpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subpd];
+function x86_subpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_subpd_from_mem];
+function x86_subsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_subsd];
+function x86_subsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_subsd_from_mem];
+function x86_andpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_andpd];
+function x86_andpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_andpd_from_mem];
+function x86_andnpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_andnpd];
+function x86_andnpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_andnpd_from_mem];
+function x86_orpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_orpd];
+function x86_orpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_orpd_from_mem];
+function x86_xorpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_xorpd];
+function x86_xorpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_xorpd_from_mem];
+function x86_cmppd(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmppd];
+function x86_cmppd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmppd_from_mem];
+function x86_cmpsd(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpsd];
+function x86_cmpsd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_cmpsd_from_mem];
+function x86_comisd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_comisd];
+function x86_comisd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_comisd_from_mem];
+function x86_ucomisd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_ucomisd];
+function x86_ucomisd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_ucomisd_from_mem];
+function x86_shufpd(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_shufpd];
+function x86_shufpd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_shufpd_from_mem];
+function x86_unpckhpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_unpckhpd];
+function x86_unpckhpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_unpckhpd_from_mem];
+function x86_unpcklpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_unpcklpd];
+function x86_unpcklpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_unpcklpd_from_mem];
+function x86_cvtdq2pd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtdq2pd];
+function x86_cvtdq2pd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtdq2pd_from_mem];
+function x86_cvtdq2ps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtdq2ps];
+function x86_cvtdq2ps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtdq2ps_from_mem];
+function x86_cvtpd2dq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtpd2dq];
+function x86_cvtpd2dq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtpd2dq_from_mem];
+function x86_cvtpd2pi(r0: __m64; r1: __m128): __m64; [INTERNPROC: fpc_in_x86_cvtpd2pi];
+function x86_cvtpd2pi(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_cvtpd2pi_from_mem];
+function x86_cvtpd2ps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtpd2ps];
+function x86_cvtpd2ps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtpd2ps_from_mem];
+function x86_cvtpi2pd(r0: __m128; r1: __m64): __m128; [INTERNPROC: fpc_in_x86_cvtpi2pd];
+function x86_cvtpi2pd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtpi2pd_from_mem];
+function x86_cvtps2dq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtps2dq];
+function x86_cvtps2dq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtps2dq_from_mem];
+function x86_cvtps2pd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtps2pd];
+function x86_cvtps2pd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtps2pd_from_mem];
+function x86_cvtsd2si(r0: NativeInt; r1: __m128): NativeInt; [INTERNPROC: fpc_in_x86_cvtsd2si];
+function x86_cvtsd2si(r0: NativeInt; r1: pointer): NativeInt; [INTERNPROC: fpc_in_x86_cvtsd2si_from_mem];
+function x86_cvtsd2ss(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtsd2ss];
+function x86_cvtsd2ss(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtsd2ss_from_mem];
+function x86_cvtsi2sd(r0: __m128; r1: longword): __m128; [INTERNPROC: fpc_in_x86_cvtsi2sd];
+function x86_cvtsi2sd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtsi2sd_from_mem];
+function x86_cvtss2sd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvtss2sd];
+function x86_cvtss2sd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvtss2sd_from_mem];
+function x86_cvttpd2dq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvttpd2dq];
+function x86_cvttpd2dq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvttpd2dq_from_mem];
+function x86_cvttpd2pi(r0: __m64; r1: __m128): __m64; [INTERNPROC: fpc_in_x86_cvttpd2pi];
+function x86_cvttpd2pi(r0: __m64; r1: pointer): __m64; [INTERNPROC: fpc_in_x86_cvttpd2pi_from_mem];
+function x86_cvttps2dq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_cvttps2dq];
+function x86_cvttps2dq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_cvttps2dq_from_mem];
+function x86_cvttsd2si(r0: NativeInt; r1: __m128): NativeInt; [INTERNPROC: fpc_in_x86_cvttsd2si];
+function x86_cvttsd2si(r0: NativeInt; r1: pointer): NativeInt; [INTERNPROC: fpc_in_x86_cvttsd2si_from_mem];
+function x86_movd(r1: longword): __m128; [INTERNPROC: fpc_in_x86_movd_from_reg];
+function x86_movd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movd_from_mem];
+function x86_movd(r1: __m128): longword; [INTERNPROC: fpc_in_x86_movd_to_reg];
+procedure x86_movd(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movd_to_mem];
+function x86_movq(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movq_from_mem];
+procedure x86_movq(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movq_to_mem];
+function x86_pmovmskb(r0: longword; r1: __m128): longword; [INTERNPROC: fpc_in_x86_pmovmskb];
+function x86_pextrw(r1: __m128; imm: longint): word; [INTERNPROC: fpc_in_x86_pextrw_sse2];
+function x86_pinsrw(r0: __m128; r1: longword; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrw_sse2];
+function x86_pinsrw(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrw_from_mem];
+function x86_packssdw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_packssdw];
+function x86_packssdw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_packssdw_from_mem];
+function x86_packsswb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_packsswb];
+function x86_packsswb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_packsswb_from_mem];
+function x86_packuswb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_packuswb];
+function x86_packuswb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_packuswb_from_mem];
+function x86_paddb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_paddb];
+function x86_paddb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_paddb_from_mem];
+function x86_paddw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_paddw];
+function x86_paddw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_paddw_from_mem];
+function x86_paddd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_paddd];
+function x86_paddd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_paddd_from_mem];
+function x86_paddq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_paddq];
+function x86_paddq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_paddq_from_mem];
+function x86_paddsb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_paddsb];
+function x86_paddsb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_paddsb_from_mem];
+function x86_paddsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_paddsw];
+function x86_paddsw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_paddsw_from_mem];
+function x86_paddusb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_paddusb];
+function x86_paddusb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_paddusb_from_mem];
+function x86_paddusw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_paddusw];
+function x86_paddusw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_paddusw_from_mem];
+function x86_pand(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pand];
+function x86_pand(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pand_from_mem];
+function x86_pandn(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pandn];
+function x86_pandn(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pandn_from_mem];
+function x86_por(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_por];
+function x86_por(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_por_from_mem];
+function x86_pxor(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pxor];
+function x86_pxor(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pxor_from_mem];
+function x86_pcmpeqb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pcmpeqb];
+function x86_pcmpeqb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pcmpeqb_from_mem];
+function x86_pcmpeqw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pcmpeqw];
+function x86_pcmpeqw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pcmpeqw_from_mem];
+function x86_pcmpeqd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pcmpeqd];
+function x86_pcmpeqd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pcmpeqd_from_mem];
+function x86_pcmpgtb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pcmpgtb];
+function x86_pcmpgtb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pcmpgtb_from_mem];
+function x86_pcmpgtw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pcmpgtw];
+function x86_pcmpgtw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pcmpgtw_from_mem];
+function x86_pcmpgtd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pcmpgtd];
+function x86_pcmpgtd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pcmpgtd_from_mem];
+function x86_pmullw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmullw];
+function x86_pmullw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmullw_from_mem];
+function x86_pmulhw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmulhw];
+function x86_pmulhw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmulhw_from_mem];
+function x86_pmulhuw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmulhuw_sse2];
+function x86_pmulhuw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmulhuw_from_mem];
+function x86_pmuludq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmuludq];
+function x86_pmuludq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmuludq_from_mem];
+function x86_psllw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psllw_sse2];
+function x86_psllw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psllw_from_mem];
+function x86_psllw(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_psllw_sse2_imm];
+function x86_pslld(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pslld_sse2];
+function x86_pslld(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pslld_from_mem];
+function x86_pslld(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pslld_sse2_imm];
+function x86_psllq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psllq_sse2];
+function x86_psllq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psllq_from_mem];
+function x86_psllq(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_psllq_sse2_imm];
+function x86_psrad(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psrad_sse2];
+function x86_psrad(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psrad_from_mem];
+function x86_psrad(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_psrad_sse2_imm];
+function x86_psraw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psraw_sse2];
+function x86_psraw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psraw_from_mem];
+function x86_psraw(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_psraw_sse2_imm];
+function x86_psrlw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psrlw_sse2];
+function x86_psrlw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psrlw_from_mem];
+function x86_psrlw(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_psrlw_sse2_imm];
+function x86_psrld(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psrld_sse2];
+function x86_psrld(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psrld_from_mem];
+function x86_psrld(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_psrld_sse2_imm];
+function x86_psrlq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psrlq_sse2];
+function x86_psrlq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psrlq_from_mem];
+function x86_psrlq(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_psrlq_sse2_imm];
+function x86_psubb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psubb];
+function x86_psubb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psubb_from_mem];
+function x86_psubw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psubw];
+function x86_psubw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psubw_from_mem];
+function x86_psubd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psubd];
+function x86_psubd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psubd_from_mem];
+function x86_psubq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psubq];
+function x86_psubq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psubq_from_mem];
+function x86_psubsb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psubsb];
+function x86_psubsb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psubsb_from_mem];
+function x86_psubsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psubsw];
+function x86_psubsw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psubsw_from_mem];
+function x86_pmaddwd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaddwd];
+function x86_pmaddwd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmaddwd_from_mem];
+function x86_psubusb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psubusb];
+function x86_psubusb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psubusb_from_mem];
+function x86_psubusw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psubusw];
+function x86_psubusw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psubusw_from_mem];
+function x86_punpckhbw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_punpckhbw];
+function x86_punpckhbw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_punpckhbw_from_mem];
+function x86_punpckhwd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_punpckhwd];
+function x86_punpckhwd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_punpckhwd_from_mem];
+function x86_punpckhdq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_punpckhdq];
+function x86_punpckhdq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_punpckhdq_from_mem];
+function x86_punpcklbw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_punpcklbw];
+function x86_punpcklbw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_punpcklbw_from_mem];
+function x86_punpcklwd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_punpcklwd];
+function x86_punpcklwd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_punpcklwd_from_mem];
+function x86_punpckldq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_punpckldq];
+function x86_punpckldq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_punpckldq_from_mem];
+function x86_pavgb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pavgb_sse2];
+function x86_pavgb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pavgb_from_mem];
+function x86_pavgw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pavgw_sse2];
+function x86_pavgw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pavgw_from_mem];
+function x86_pminub(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pminub_sse2];
+function x86_pminub(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pminub_from_mem];
+function x86_pminsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pminsw_sse2];
+function x86_pminsw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pminsw_from_mem];
+function x86_pmaxsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaxsw_sse2];
+function x86_pmaxsw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmaxsw_from_mem];
+function x86_pmaxub(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaxub_sse2];
+function x86_pmaxub(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmaxub_from_mem];
+function x86_psadbw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psadbw_sse2];
+function x86_psadbw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psadbw_from_mem];
+procedure x86_maskmovdqu(addr: pointer; r0, r1: __m128); [INTERNPROC: fpc_in_x86_maskmovdqu];
+function x86_movdq2q(r1: __m128): __m64; [INTERNPROC: fpc_in_x86_movdq2q];
+function x86_movdqa(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movdqa_from_mem];
+procedure x86_movdqa(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movdqa];
+function x86_movdqu(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movdqu_from_mem];
+procedure x86_movdqu(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movdqu];
+function x86_movq2dq(r1: __m64): __m128; [INTERNPROC: fpc_in_x86_movq2dq];
+procedure x86_movntdq(r0: pointer; r1: __m128); [INTERNPROC: fpc_in_x86_movntdq];
+function x86_pshufhw(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pshufhw];
+function x86_pshuflw(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pshuflw];
+function x86_pshufd(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pshufd];
+function x86_pshufhw(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pshufhw_from_mem];
+function x86_pshuflw(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pshuflw_from_mem];
+function x86_pshufd(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pshufd_from_mem];
+function x86_pslldq(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pslldq];
+function x86_psrldq(r0: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_psrldq];
+function x86_punpckhqdq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_punpckhqdq];
+function x86_punpckhqdq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_punpckhqdq_from_mem];
+function x86_punpcklqdq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_punpcklqdq];
+function x86_punpcklqdq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_punpcklqdq_from_mem];
+function x86_addsubps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addsubps];
+function x86_addsubps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addsubps_from_mem];
+function x86_addsubpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_addsubpd];
+function x86_addsubpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_addsubpd_from_mem];
+function x86_movddup(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_movddup];
+function x86_movddup(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movddup_from_mem];
+function x86_movsldup(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_movsldup];
+function x86_movsldup(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movsldup_from_mem];
+function x86_movshdup(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_movshdup];
+function x86_movshdup(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movshdup_from_mem];
+function x86_haddps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_haddps];
+function x86_haddps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_haddps_from_mem];
+function x86_haddpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_haddpd];
+function x86_haddpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_haddpd_from_mem];
+function x86_hsubps(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_hsubps];
+function x86_hsubps(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_hsubps_from_mem];
+function x86_hsubpd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_hsubpd];
+function x86_hsubpd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_hsubpd_from_mem];
+function x86_lddqu(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_lddqu];
+function x86_psignb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psignb];
+function x86_psignb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psignb_from_mem];
+function x86_psignw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psignw];
+function x86_psignw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psignw_from_mem];
+function x86_psignd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_psignd];
+function x86_psignd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_psignd_from_mem];
+function x86_pshufb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pshufb];
+function x86_pshufb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pshufb_from_mem];
+function x86_pmulhrsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmulhrsw];
+function x86_pmulhrsw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmulhrsw_from_mem];
+function x86_pmaddubsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaddubsw];
+function x86_pmaddubsw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmaddubsw_from_mem];
+function x86_phsubw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_phsubw];
+function x86_phsubw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_phsubw_from_mem];
+function x86_phsubsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_phsubsw];
+function x86_phsubsw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_phsubsw_from_mem];
+function x86_phsubd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_phsubd];
+function x86_phsubd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_phsubd_from_mem];
+function x86_phaddsw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_phaddsw];
+function x86_phaddsw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_phaddsw_from_mem];
+function x86_phaddw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_phaddw];
+function x86_phaddw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_phaddw_from_mem];
+function x86_phaddd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_phaddd];
+function x86_phaddd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_phaddd_from_mem];
+function x86_palignr(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_palignr];
+function x86_palignr(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_palignr_from_mem];
+function x86_pabsb(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pabsb];
+function x86_pabsb(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pabsb_from_mem];
+function x86_pabsw(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pabsw];
+function x86_pabsw(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pabsw_from_mem];
+function x86_pabsd(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pabsd];
+function x86_pabsd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pabsd_from_mem];
+function x86_dpps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_dpps];
+function x86_dpps(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_dpps_from_mem];
+function x86_dppd(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_dppd];
+function x86_dppd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_dppd_from_mem];
+function x86_blendps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_blendps];
+function x86_blendps(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_blendps_from_mem];
+function x86_blendvps(r0, r1: __m128; mask: __m128): __m128; [INTERNPROC: fpc_in_x86_blendvps];
+function x86_blendvps(r0: __m128; r1: pointer; mask: __m128): __m128; [INTERNPROC: fpc_in_x86_blendvps_from_mem];
+function x86_blendpd(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_blendpd];
+function x86_blendpd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_blendpd_from_mem];
+function x86_blendvpd(r0, r1: __m128; mask: __m128): __m128; [INTERNPROC: fpc_in_x86_blendvpd];
+function x86_blendvpd(r0: __m128; r1: pointer; mask: __m128): __m128; [INTERNPROC: fpc_in_x86_blendvpd_from_mem];
+function x86_roundps(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundps];
+function x86_roundps(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundps_from_mem];
+function x86_roundss(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundss];
+function x86_roundss(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundss_from_mem];
+function x86_roundpd(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundpd];
+function x86_roundpd(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundpd_from_mem];
+function x86_roundsd(r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundsd];
+function x86_roundsd(r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_roundsd_from_mem];
+function x86_insertps(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_insertps];
+function x86_insertps(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_insertps_from_mem];
+function x86_extractps(r1: __m128; imm: longint): longword; [INTERNPROC: fpc_in_x86_extractps];
+procedure x86_extractps(r0: pointer; r1: __m128; imm: longint); [INTERNPROC: fpc_in_x86_extractps_from_mem];
+function x86_mpsadbw(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_mpsadbw];
+function x86_mpsadbw(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_mpsadbw_from_mem];
+function x86_phminposuw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_phminposuw];
+function x86_phminposuw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_phminposuw_from_mem];
+function x86_pmulld(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmulld];
+function x86_pmulld(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmulld_from_mem];
+function x86_pmuldq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmuldq];
+function x86_pmuldq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmuldq_from_mem];
+function x86_pblendvb(r0, r1: __m128; mask: __m128): __m128; [INTERNPROC: fpc_in_x86_pblendvb];
+function x86_pblendvb(r0: __m128; r1: pointer; mask: __m128): __m128; [INTERNPROC: fpc_in_x86_pblendvb_from_mem];
+function x86_pblendw(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pblendw];
+function x86_pblendw(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pblendw_from_mem];
+function x86_pminsb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pminsb];
+function x86_pminsb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pminsb_from_mem];
+function x86_pminuw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pminuw];
+function x86_pminuw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pminuw_from_mem];
+function x86_pminsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pminsd];
+function x86_pminsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pminsd_from_mem];
+function x86_pminud(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pminud];
+function x86_pminud(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pminud_from_mem];
+function x86_pmaxsb(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaxsb];
+function x86_pmaxsb(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmaxsb_from_mem];
+function x86_pmaxuw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaxuw];
+function x86_pmaxuw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmaxuw_from_mem];
+function x86_pmaxsd(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaxsd];
+function x86_pmaxsd(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmaxsd_from_mem];
+function x86_pmaxud(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmaxud];
+function x86_pmaxud(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmaxud_from_mem];
+function x86_pinsrb(r0: __m128; r1: longword; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrb];
+function x86_pinsrb(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrb_from_mem];
+function x86_pinsrd(r0: __m128; r1: longword; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrd];
+function x86_pinsrd(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrd_from_mem];
+function x86_pinsrq(r0: __m128; r1: NativeUInt; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrq];
+function x86_pinsrq(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pinsrq_from_mem];
+function x86_pextrb(r1: __m128; imm: longint): byte; [INTERNPROC: fpc_in_x86_pextrb];
+procedure x86_pextrb(r0: pointer; r1: __m128; imm: longint); [INTERNPROC: fpc_in_x86_pextrb_to_mem];
+//function x86_pextrw(r1: __m128; imm: longint): word; [INTERNPROC: fpc_in_x86_pextrw_sse41];
+procedure x86_pextrw(r0: pointer; r1: __m128; imm: longint); [INTERNPROC: fpc_in_x86_pextrw_sse41_to_mem];
+function x86_pextrd(r1: __m128; imm: longint): longword; [INTERNPROC: fpc_in_x86_pextrd];
+procedure x86_pextrd(r0: pointer; r1: __m128; imm: longint); [INTERNPROC: fpc_in_x86_pextrd_to_mem];
+function x86_pextrq(r1: __m128; imm: longint): longword; [INTERNPROC: fpc_in_x86_pextrq];
+procedure x86_pextrq(r0: pointer; r1: __m128; imm: longint); [INTERNPROC: fpc_in_x86_pextrq_to_mem];
+function x86_pmovsxbw(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovsxbw];
+function x86_pmovsxbw(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovsxbw_from_mem];
+function x86_pmovzxbw(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovzxbw];
+function x86_pmovzxbw(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovzxbw_from_mem];
+function x86_pmovsxbd(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovsxbd];
+function x86_pmovsxbd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovsxbd_from_mem];
+function x86_pmovzxbd(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovzxbd];
+function x86_pmovzxbd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovzxbd_from_mem];
+function x86_pmovsxbq(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovsxbq];
+function x86_pmovsxbq(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovsxbq_from_mem];
+function x86_pmovzxbq(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovzxbq];
+function x86_pmovzxbq(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovzxbq_from_mem];
+function x86_pmovsxwd(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovsxwd];
+function x86_pmovsxwd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovsxwd_from_mem];
+function x86_pmovzxwd(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovzxwd];
+function x86_pmovzxwd(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovzxwd_from_mem];
+function x86_pmovsxwq(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovsxwq];
+function x86_pmovsxwq(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovsxwq_from_mem];
+function x86_pmovzxwq(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovzxwq];
+function x86_pmovzxwq(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovzxwq_from_mem];
+function x86_pmovsxdq(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovsxdq];
+function x86_pmovsxdq(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovsxdq_from_mem];
+function x86_pmovzxdq(r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pmovzxdq];
+function x86_pmovzxdq(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pmovzxdq_from_mem];
+function x86_ptest(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_ptest];
+function x86_ptest(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_ptest_from_mem];
+function x86_pcmpeqq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pcmpeqq];
+function x86_pcmpeqq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pcmpeqq_from_mem];
+function x86_packusdw(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_packusdw];
+function x86_packusdw(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_packusdw_from_mem];
+function x86_movntdqa(r1: pointer): __m128; [INTERNPROC: fpc_in_x86_movntdqa];
+function x86_pcmpestri(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pcmpestri];
+function x86_pcmpestri(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pcmpestri_from_mem];
+function x86_pcmpestrm(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pcmpestrm];
+function x86_pcmpestrm(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pcmpestrm_from_mem];
+function x86_pcmpistri(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pcmpistri];
+function x86_pcmpistri(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pcmpistri_from_mem];
+function x86_pcmpistrm(r0, r1: __m128; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pcmpistrm];
+function x86_pcmpistrm(r0: __m128; r1: pointer; imm: longint): __m128; [INTERNPROC: fpc_in_x86_pcmpistrm_from_mem];
+function x86_pcmpgtq(r0, r1: __m128): __m128; [INTERNPROC: fpc_in_x86_pcmpgtq];
+function x86_pcmpgtq(r0: __m128; r1: pointer): __m128; [INTERNPROC: fpc_in_x86_pcmpgtq_from_mem];

Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно