Browse Source

llvm: support for opaque pointers

Will be the default starting with LLVM 15, and required with LLVM 16.
Tested with LLVM 14 and '-mllvm -opaque-pointers'. See
https://releases.llvm.org/14.0.0/docs/OpaquePointers.html for more
information.
Jonas Maebe 3 years ago
parent
commit
25999ad8ff

+ 17 - 29
compiler/llvm/agllvm.pas

@@ -360,7 +360,7 @@ implementation
            if lcp_sret in para^.flags then
              owner.writer.AsmWrite(llvmparatypeattr(' sret',para^.def,true));
            if asmblock and
-              (llvmflag_opaque_ptr_transition in llvmversion_properties[current_settings.llvmversion]) and
+              (([llvmflag_opaque_ptr_transition,llvmflag_opaque_ptr]*llvmversion_properties[current_settings.llvmversion])<>[]) and
               (para^.def.typ=pointerdef) then
              owner.writer.AsmWrite(llvmparatypeattr(' elementtype',para^.def,true));
            { For byval, this means "alignment on the stack" and of the passed source data.
@@ -628,20 +628,7 @@ implementation
               owner.writer.AsmWrite(' (')
             else
               owner.writer.AsmWrite(' ');
-            { can't just dereference the type, because it may be an
-              implicit pointer type such as a class -> resort to string
-              manipulation... Not very clean :( }
-            tmpstr:=llvmencodetypename(taillvm(hp).spilling_get_reg_type(0));
-            if op=la_getelementptr then
-              begin
-                if tmpstr[length(tmpstr)]<>'*' then
-                  begin
-                    writeln(tmpstr);
-                    internalerror(2016071101);
-                  end
-                else
-                  setlength(tmpstr,length(tmpstr)-1);
-              end;
+            tmpstr:=llvmencodetypename(taillvm(hp).spilling_get_reg_type(0),op=la_getelementptr);
             owner.writer.AsmWrite(tmpstr);
             owner.writer.AsmWrite(',');
           end;
@@ -672,15 +659,8 @@ implementation
                 owner.writer.AsmWrite(tmpstr);
               end;
             opdone:=true;
-            tmpstr:=llvmencodetypename(taillvm(hp).oper[3]^.def);
-            if tmpstr[length(tmpstr)]<>'*' then
-              begin
-                writeln(tmpstr);
-                internalerror(2016071102);
-              end
-            else
-              setlength(tmpstr,length(tmpstr)-1);
-            owner.writer.AsmWrite(tmpstr);
+            owner.writer.AsmWrite(' ');
+            owner.writer.AsmWrite(llvmencodetypename(taillvm(hp).oper[taillvm.callpdopernr]^.def,true));
             opstart:=4;
           end;
         la_blockaddress:
@@ -1401,11 +1381,19 @@ implementation
                       WriteFunctionFlags(tprocdef(taillvmdecl(hp).def));
                       if assigned(tprocdef(taillvmdecl(hp).def).personality) then
                         begin
-                          writer.AsmWrite(' personality i8* bitcast (');
-                          writer.AsmWrite(llvmencodeproctype(tprocdef(taillvmdecl(hp).def).personality, '', lpd_procvar));
-                          writer.AsmWrite('* ');
-                          writer.AsmWrite(llvmmangledname(tprocdef(taillvmdecl(hp).def).personality.mangledname));
-                          writer.AsmWrite(' to i8*)');
+                          if not(llvmflag_opaque_ptr in llvmversion_properties[current_settings.llvmversion]) then
+                            begin
+                              writer.AsmWrite(' personality i8* bitcast (');
+                              writer.AsmWrite(llvmencodeproctype(tprocdef(taillvmdecl(hp).def).personality, '', lpd_procvar));
+                              writer.AsmWrite('* ');
+                              writer.AsmWrite(llvmmangledname(tprocdef(taillvmdecl(hp).def).personality.mangledname));
+                              writer.AsmWrite(' to i8*)');
+                            end
+                          else
+                            begin
+                              writer.AsmWrite(' personality ptr ');
+                              writer.AsmWrite(llvmmangledname(tprocdef(taillvmdecl(hp).def).personality.mangledname));
+                            end;
                         end;
                       InstrWriter.WriterInstructionMetadata(' ', taillvmdecl(hp).metadata);
                       writer.AsmWriteln(' {');

+ 12 - 1
compiler/llvm/hlcgllvm.pas

@@ -1610,7 +1610,8 @@ implementation
       hreg: tregister;
     begin
       { will insert a bitcast if necessary }
-      if fromdef<>todef then
+      if (fromdef<>todef) and
+         not(llvmflag_opaque_ptr in llvmversion_properties[current_settings.llvmversion]) then
         begin
           hreg:=getregisterfordef(list,todef);
           a_load_reg_reg(list,fromdef,todef,reg,hreg);
@@ -1623,6 +1624,16 @@ implementation
     var
       hreg: tregister;
     begin
+      { the reason for the array exception is that we sometimes generate
+          getelementptr array_element_ty, arrayref, 0, 0
+        to get a pointer to the first element of the array. That expression is
+        not valid if arrayref does not point to an array. Clang does the same.
+      }
+      if (llvmflag_opaque_ptr in llvmversion_properties[current_settings.llvmversion]) and
+         (((fromdef.typ=pointerdef) and (tpointerdef(fromdef).pointeddef.typ=arraydef)) <>
+          ((todef.typ=pointerdef) and (tpointerdef(todef).pointeddef.typ=arraydef))
+         ) then
+        exit;
       hreg:=getaddressregister(list,todef);
       a_loadaddr_ref_reg_intern(list,fromdef,todef,ref,hreg,false);
       reference_reset_base(ref,todef,hreg,0,ref.temppos,ref.alignment,ref.volatility);

+ 100 - 29
compiler/llvm/llvmdef.pas

@@ -57,7 +57,7 @@ interface
     function llvmencodetypedecl(def: tdef): TSymStr;
 
     { same as above, but use a type name if possible (for any use) }
-    function llvmencodetypename(def: tdef): TSymStr;
+    function llvmencodetypename(def: tdef; pointedtype: boolean = false): TSymStr;
 
     { encode a procdef/procvardef into the internal format used by LLVM }
     function llvmencodeproctype(def: tabstractprocdef; const customname: TSymStr; pddecltype: tllvmprocdefdecltype): TSymStr;
@@ -354,11 +354,34 @@ implementation
   procedure llvmaddencodedabstractrecordtype(def: tabstractrecorddef; var encodedstr: TSymStr); forward;
 
   type
-    tllvmencodeflag = (lef_inaggregate, lef_noimplicitderef, lef_typedecl);
+    tllvmencodeflag = (lef_inaggregate, lef_noimplicitderef, lef_typedecl, lef_removeouterpointer);
     tllvmencodeflags = set of tllvmencodeflag;
 
     procedure llvmaddencodedtype_intern(def: tdef; const flags: tllvmencodeflags; var encodedstr: TSymStr);
+      var
+        def_is_address: boolean;
       begin
+        def_is_address:=false;
+        if ((lef_removeouterpointer in flags) or
+            (llvmflag_opaque_ptr in llvmversion_properties[current_settings.llvmversion])) and
+           is_address(def) and
+           (def<>llvm_metadatatype) then
+          def_is_address:=true
+        else if lef_removeouterpointer in flags then
+          internalerror(2022060813);
+        if (llvmflag_opaque_ptr in llvmversion_properties[current_settings.llvmversion]) and
+           not(lef_removeouterpointer in flags) and
+           def_is_address then
+          begin
+            if not(([lef_typedecl,lef_noimplicitderef]*flags<>[]) and
+                   is_implicit_pointer_object_type(def)) and
+               not((def.typ=procdef) and
+                   not(lef_typedecl in flags)) then
+             begin
+               encodedstr:=encodedstr+'ptr';
+               exit;
+             end;
+          end;
         case def.typ of
           stringdef :
             begin
@@ -367,15 +390,23 @@ implementation
                 st_unicodestring:
                   { the variable does not point to the header, but to a
                     null-terminated string/array with undefined bounds }
-                  encodedstr:=encodedstr+'i16*';
+                  if not(lef_removeouterpointer in flags) then
+                    encodedstr:=encodedstr+'i16*'
+                  else
+                    encodedstr:=encodedstr+'i16';
                 st_ansistring:
-                  encodedstr:=encodedstr+'i8*';
-                st_shortstring:
-                  { length byte followed by string bytes }
-                  if tstringdef(def).len>0 then
-                    encodedstr:=encodedstr+'['+tostr(tstringdef(def).len+1)+' x i8]'
+                  if not(lef_removeouterpointer in flags) then
+                    encodedstr:=encodedstr+'i8*'
                   else
-                    encodedstr:=encodedstr+'[0 x i8]';
+                    encodedstr:=encodedstr+'i8';
+                st_shortstring:
+                  begin
+                    { length byte followed by string bytes }
+                    if tstringdef(def).len>0 then
+                      encodedstr:=encodedstr+'['+tostr(tstringdef(def).len+1)+' x i8]'
+                    else
+                      encodedstr:=encodedstr+'[0 x i8]';
+                  end
                 else
                   internalerror(2013100201);
               end;
@@ -402,11 +433,17 @@ implementation
           pointerdef :
             begin
               if is_voidpointer(def) then
-                encodedstr:=encodedstr+'i8*'
+                begin
+                  if not(lef_removeouterpointer in flags) then
+                    encodedstr:=encodedstr+'i8*'
+                  else
+                    encodedstr:=encodedstr+'i8';
+                end
               else
                 begin
                   llvmaddencodedtype_intern(tpointerdef(def).pointeddef,[],encodedstr);
-                  encodedstr:=encodedstr+'*';
+                  if not(lef_removeouterpointer in flags) then
+                    encodedstr:=encodedstr+'*';
                 end;
             end;
           floatdef :
@@ -478,13 +515,16 @@ implementation
             begin
               if is_class(tclassrefdef(def).pointeddef) then
                 begin
-                  llvmaddencodedtype_intern(tobjectdef(tclassrefdef(def).pointeddef).vmt_def,flags,encodedstr);
-                  encodedstr:=encodedstr+'*';
+                  llvmaddencodedtype_intern(tobjectdef(tclassrefdef(def).pointeddef).vmt_def,flags-[lef_removeouterpointer],encodedstr);
+                  if not(lef_removeouterpointer in flags) then
+                    encodedstr:=encodedstr+'*';
                 end
               else if is_objcclass(tclassrefdef(def).pointeddef) then
-                llvmaddencodedtype_intern(objc_idtype,flags,encodedstr)
-              else
+                llvmaddencodedtype_intern(objc_idtype,flags-[lef_removeouterpointer],encodedstr)
+              else if not(lef_removeouterpointer in flags) then
                 encodedstr:=encodedstr+'i8*'
+              else
+                encodedstr:=encodedstr+'i8'
             end;
           setdef :
             begin
@@ -525,7 +565,8 @@ implementation
               else if is_dynamic_array(def) then
                 begin
                   llvmaddencodedtype_intern(tarraydef(def).elementdef,[lef_inaggregate],encodedstr);
-                  encodedstr:=encodedstr+'*';
+                  if not(lef_removeouterpointer in flags) then
+                    encodedstr:=encodedstr+'*';
                 end
               else if is_packed_array(def) and
                       (tarraydef(def).elementdef.typ in [enumdef,orddef]) then
@@ -553,8 +594,11 @@ implementation
                  tprocvardef(def).is_addressonly then
                 begin
                   llvmaddencodedproctype(tabstractprocdef(def),'',lpd_procvar,encodedstr);
-                  if def.typ=procvardef then
-                    encodedstr:=encodedstr+'*';
+                  if not(lef_removeouterpointer in flags) then
+                    begin
+                      if def.typ=procvardef then
+                        encodedstr:=encodedstr+'*'
+                    end
                 end
               else if not(lef_typedecl in flags) then
                 begin
@@ -562,7 +606,8 @@ implementation
                     via a pointer }
                   encodedstr:=encodedstr+llvmtypeidentifier(def);
                   { blocks are implicit pointers }
-                  if is_block(def) then
+                  if not(lef_removeouterpointer in flags) and
+                     is_block(def) then
                     encodedstr:=encodedstr+'*'
                 end
               else if is_block(def) then
@@ -590,7 +635,7 @@ implementation
                     encodedstr:=encodedstr+llvmtypeidentifier(def)
                   else
                     llvmaddencodedabstractrecordtype(tabstractrecorddef(def),encodedstr);
-                  if ([lef_typedecl,lef_noimplicitderef]*flags=[]) and
+                  if ([lef_typedecl,lef_noimplicitderef,lef_removeouterpointer]*flags=[]) and
                      is_implicit_pointer_object_type(def) then
                     encodedstr:=encodedstr+'*'
                 end;
@@ -599,16 +644,28 @@ implementation
               odt_dispinterface:
                 begin
                   { type is a pointer to a pointer to the vmt }
-                  llvmaddencodedtype_intern(tobjectdef(def).vmt_def,flags,encodedstr);
-                  if ([lef_typedecl,lef_noimplicitderef]*flags=[]) then
-                    encodedstr:=encodedstr+'**';
+                  if ([lef_typedecl,lef_noimplicitderef]*flags=[]) and
+                     (llvmflag_opaque_ptr in llvmversion_properties[current_settings.llvmversion]) then
+                    encodedstr:=encodedstr+'ptr'
+                  else
+                    begin
+                      llvmaddencodedtype_intern(tobjectdef(def).vmt_def,flags,encodedstr);
+                      if ([lef_typedecl,lef_noimplicitderef]*flags=[]) then
+                        if not(lef_removeouterpointer in flags) then
+                          encodedstr:=encodedstr+'**'
+                        else
+                          encodedstr:=encodedstr+'*'
+                    end;
                 end;
               odt_interfacecom_function,
               odt_interfacecom_property,
               odt_objcprotocol:
                 begin
                   { opaque for now }
-                  encodedstr:=encodedstr+'i8*'
+                  if not(lef_removeouterpointer in flags) then
+                    encodedstr:=encodedstr+'i8*'
+                  else
+                    encodedstr:=encodedstr+'i8'
                 end;
               odt_helper:
                 llvmaddencodedtype_intern(tobjectdef(def).extendeddef,flags,encodedstr);
@@ -630,10 +687,16 @@ implementation
       end;
 
 
-    function llvmencodetypename(def: tdef): TSymStr;
+    function llvmencodetypename(def: tdef; pointedtype: boolean = false): TSymStr;
+      var
+        flags: tllvmencodeflags;
       begin
         result:='';
-        llvmaddencodedtype_intern(def,[],result);
+        if not pointedtype then
+          flags:=[]
+        else
+          flags:=[lef_removeouterpointer];
+        llvmaddencodedtype_intern(def,flags,result);
       end;
 
 
@@ -747,7 +810,14 @@ implementation
           { implicit zero/sign extension for ABI compliance? }
           if not first then
              encodedstr:=encodedstr+', ';
-          llvmaddencodedtype_intern(usedef,[],encodedstr);
+          if (hp.vardef=llvm_metadatatype) or
+             not((llvmflag_opaque_ptr in llvmversion_properties[current_settings.llvmversion]) and
+                 ((vo_is_funcret in hp.varoptions) or
+                  paramanager.push_addr_param(hp.varspez,hp.vardef,proccalloption) or
+                  llvmbyvalparaloc(paraloc))) then
+            llvmaddencodedtype_intern(usedef,[],encodedstr)
+          else
+            encodedstr:=encodedstr+'ptr';
           { in case signextstr<>'', there should be only one paraloc -> no need
             to clear (reason: it means that the paraloc is larger than the
             original parameter) }
@@ -756,7 +826,7 @@ implementation
           { sret: hidden pointer for structured function result }
           if vo_is_funcret in hp.varoptions then
             begin
-              { "sret" is only valid for the firstparameter, while in FPC this
+              { "sret" is only valid for the first parameter, while in FPC this
                 can sometimes be second one (self comes before). In general,
                 this is not a problem: we can just leave out sret, which means
                 the result will be a bit less well optimised), but it is for
@@ -783,7 +853,8 @@ implementation
           else if not paramanager.push_addr_param(hp.varspez,hp.vardef,proccalloption) and
              llvmbyvalparaloc(paraloc) then
             begin
-              encodedstr:=encodedstr+'*';
+              if not (llvmflag_opaque_ptr in llvmversion_properties[current_settings.llvmversion]) then
+                encodedstr:=encodedstr+'*';
               if withattributes then
                 begin
                   encodedstr:=encodedstr+llvmparatypeattr(' byval',hp.vardef,false);

+ 2 - 1
compiler/llvm/llvminfo.pas

@@ -66,7 +66,8 @@ type
      llvmflag_NoDISPFlags,                  { no DI sub program flags, but separate fields }
      llvmflag_NoDISPFlagMainSubprogram,     { MainSubprogram still in DIFlags instead of DISPFlags }
      llvmflag_para_attr_type,               { parameter attributes such as noalias and byval need to repeat the type }
-     llvmflag_opaque_ptr_transition         { initial opaque pointer introduction, needs to some elementtype attributes }
+     llvmflag_opaque_ptr_transition,        { initial opaque pointer introduction, needs to some elementtype attributes }
+     llvmflag_opaque_ptr                    { only opaque pointers }
    );
    tllvmversionflags = set of tllvmversionflag;
 

+ 10 - 2
compiler/llvm/llvmtype.pas

@@ -103,12 +103,12 @@ interface
 implementation
 
     uses
-      sysutils,cutils,cfileutl,constexp,
+      cutils,cfileutl,constexp,
       version,globals,verbose,systems,
       cpubase,cgbase,paramgr,
       fmodule,nobj,
       defutil,defcmp,symconst,symtable,
-      llvmbase,llvmdef
+      llvminfo,llvmbase,llvmdef
       ;
 
 {****************************************************************************
@@ -136,6 +136,14 @@ implementation
       begin
         if def1=def2 then
           exit(true);
+        { this function is only used to the pointees of pointer types, to know
+          whether the pointer types are equal. With opaque pointers, all
+          pointers are represented by "ptr" and hence by definition equal,
+          regardless of what they point to (there is one exception related to
+          arrays, but that is already handled during code generation in
+          thlcgllvm.g_ptrtypecast_ref) }
+        if (llvmflag_opaque_ptr in llvmversion_properties[current_settings.llvmversion]) then
+          exit(true);
         def1str:=llvmencodetypename(def1);
         def2str:=llvmencodetypename(def2);
         { normalise both type representations in case one is a procdef

+ 12 - 1
compiler/llvm/nllvmcnv.pas

@@ -77,6 +77,14 @@ uses
 
 class function tllvmtypeconvnode.target_specific_need_equal_typeconv(fromdef, todef: tdef): boolean;
   begin
+    if (llvmflag_opaque_ptr in llvmversion_properties[current_settings.llvmversion]) and
+       is_address(fromdef) and
+       is_address(todef) then
+      begin
+        result:=false;
+        exit;
+      end;
+
     result:=
       (fromdef<>todef) and
       { two procdefs that are structurally the same but semantically different
@@ -302,7 +310,10 @@ procedure tllvmtypeconvnode.second_nothing;
   begin
     { insert LLVM-level type conversions for same-sized entities that are
       nevertheless different types }
-    if left.resultdef<>resultdef then
+    if (left.resultdef<>resultdef) and
+       (not(llvmflag_opaque_ptr in llvmversion_properties[current_settings.llvmversion]) or
+        not(is_address(left.resultdef) and
+            is_address(resultdef))) then
       begin
            { handle sometype(voidptr^) and "absolute" }
         if not is_void(left.resultdef) and

+ 6 - 2
compiler/llvm/nllvmtcon.pas

@@ -129,10 +129,10 @@ interface
 implementation
 
   uses
-    verbose,systems,fmodule,
+    verbose,systems,fmodule,globals,
     aasmdata,
     procinfo,
-    cpubase,cpuinfo,llvmbase,
+    cpubase,cpuinfo,llvmbase,llvminfo,
     symtable,llvmdef,defutil,defcmp,
     ngenutil;
 
@@ -751,6 +751,10 @@ implementation
       secondop: tllvmop;
     begin
       inherited;
+      if (llvmflag_opaque_ptr in llvmversion_properties[current_settings.llvmversion]) and
+         is_address(fromdef) and
+         is_address(todef) then
+        exit;
       { special case: procdef -> procvardef/pointerdef: must take address of
         the procdef }
       if (fromdef.typ=procdef) and