Jelajahi Sumber

+ optimization that (re)orders instance fields of Delphi-style classes in
order to minimise memory losses due to alignment padding. Not yet enabled
by default at any optimization level, but can be (de)activated separately
via -Oo(no)orderfields
o added separate tdef.structalignment method that returns the alignment
of a type when it appears in a record/object/class (factors out
AIX-specific double alignment in structs)
o changed the handling of the offset of a delegate interface
implemented via a field, by taking the field offset on demand
rather than at declaration time (because the ordering optimization
causes the offsets of fields to be unknown until the entire
declaration has been parsed)

git-svn-id: trunk@21947 -

Jonas Maebe 13 tahun lalu
induk
melakukan
3798b79fd7

+ 1 - 1
compiler/arm/cpuinfo.pas

@@ -1026,7 +1026,7 @@ Const
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
                                  [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
-								  cs_opt_stackframe,cs_opt_nodecse];
+				  cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +

+ 1 - 1
compiler/avr/cpuinfo.pas

@@ -188,7 +188,7 @@ Const
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
                                  [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
-								  cs_opt_stackframe,cs_opt_nodecse];
+				  cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields];
    cpuflagsstr : array[tcpuflags] of string[20] =
       ('AVR_HAS_JMP_CALL',
        'AVR_HAS_MOVW',

+ 7 - 1
compiler/cclasses.pas

@@ -151,6 +151,7 @@ type
     function Last: TObject; {$ifdef CCLASSESINLINE}inline;{$endif}
     procedure Move(CurIndex, NewIndex: Integer); {$ifdef CCLASSESINLINE}inline;{$endif}
     procedure Assign(Obj:TFPObjectList);
+    procedure ConcatListCopy(Obj:TFPObjectList);
     procedure Pack; {$ifdef CCLASSESINLINE}inline;{$endif}
     procedure Sort(Compare: TListSortCompare); {$ifdef CCLASSESINLINE}inline;{$endif}
     procedure ForEachCall(proc2call:TObjectListCallback;arg:pointer); {$ifdef CCLASSESINLINE}inline;{$endif}
@@ -1088,10 +1089,15 @@ begin
 end;
 
 procedure TFPObjectList.Assign(Obj: TFPObjectList);
+begin
+  Clear;
+  ConcatListCopy(Obj);
+end;
+
+procedure TFPObjectList.ConcatListCopy(Obj: TFPObjectList);
 var
   i: Integer;
 begin
-  Clear;
   for I := 0 to Obj.Count - 1 do
     Add(Obj[i]);
 end;

+ 5 - 3
compiler/globtype.pas

@@ -243,7 +243,8 @@ interface
          cs_opt_level1,cs_opt_level2,cs_opt_level3,
          cs_opt_regvar,cs_opt_uncertain,cs_opt_size,cs_opt_stackframe,
          cs_opt_peephole,cs_opt_asmcse,cs_opt_loopunroll,cs_opt_tailrecursion,cs_opt_nodecse,
-         cs_opt_nodedfa,cs_opt_loopstrength,cs_opt_scheduler,cs_opt_autoinline,cs_useebp
+         cs_opt_nodedfa,cs_opt_loopstrength,cs_opt_scheduler,cs_opt_autoinline,cs_useebp,
+         cs_opt_reorder_fields
        );
        toptimizerswitches = set of toptimizerswitch;
 
@@ -263,11 +264,12 @@ interface
        end;
 
     const
-       OptimizerSwitchStr : array[toptimizerswitch] of string[10] = ('',
+       OptimizerSwitchStr : array[toptimizerswitch] of string[11] = ('',
          'LEVEL1','LEVEL2','LEVEL3',
          'REGVAR','UNCERTAIN','SIZE','STACKFRAME',
          'PEEPHOLE','ASMCSE','LOOPUNROLL','TAILREC','CSE',
-         'DFA','STRENGTH','SCHEDULE','AUTOINLINE','USEEBP'
+         'DFA','STRENGTH','SCHEDULE','AUTOINLINE','USEEBP',
+         'ORDERFIELDS'
        );
        WPOptimizerSwitchStr : array [twpoptimizerswitch] of string[14] = (
          'DEVIRTCALLS','OPTVMTS','SYMBOLLIVENESS'

+ 2 - 1
compiler/i386/cpuinfo.pas

@@ -102,7 +102,8 @@ Const
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
                                  [cs_opt_peephole,cs_opt_regvar,cs_opt_stackframe,
                                   cs_opt_asmcse,cs_opt_loopunroll,cs_opt_uncertain,
-                                  cs_opt_tailrecursion,cs_opt_nodecse,cs_useebp];
+                                  cs_opt_tailrecursion,cs_opt_nodecse,cs_useebp,
+				  cs_opt_reorder_fields];
 
    level1optimizerswitches = genericlevel1optimizerswitches + [cs_opt_peephole];
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +

+ 2 - 1
compiler/m68k/cpuinfo.pas

@@ -75,7 +75,8 @@ Const
                                  genericlevel3optimizerswitches-
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
-                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_nodecse];
+                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_nodecse,
+                                  cs_opt_reorder_fields];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches + 

+ 2 - 1
compiler/mips/cpuinfo.pas

@@ -68,7 +68,8 @@ Const
    );
 
    { Supported optimizations, only used for information }
-   supported_optimizerswitches = [cs_opt_regvar,cs_opt_loopunroll,cs_opt_nodecse];
+   supported_optimizerswitches = [cs_opt_regvar,cs_opt_loopunroll,cs_opt_nodecse,
+                                  cs_opt_reorder_fields];
 
    level1optimizerswitches = [];
    level2optimizerswitches = level1optimizerswitches + [cs_opt_regvar,cs_opt_stackframe,cs_opt_nodecse];

+ 9 - 1
compiler/pdecobj.pas

@@ -966,6 +966,7 @@ implementation
         object_member_blocktype : tblock_type;
         fields_allowed, is_classdef, class_fields, is_final, final_fields: boolean;
         vdoptions: tvar_dec_options;
+        fieldlist: tfpobjectlist;
 
 
       procedure parse_const;
@@ -1059,6 +1060,7 @@ implementation
         is_final:=false;
         final_fields:=false;
         object_member_blocktype:=bt_general;
+        fieldlist:=tfpobjectlist.create(false);
         repeat
           case token of
             _TYPE :
@@ -1173,9 +1175,11 @@ implementation
                             vdoptions:=[vd_object];
                             if class_fields then
                               include(vdoptions,vd_class);
+                            if is_class(current_structdef) then
+                              include(vdoptions,vd_canreorder);
                             if final_fields then
                               include(vdoptions,vd_final);
-                            read_record_fields(vdoptions);
+                            read_record_fields(vdoptions,fieldlist);
                           end
                         else if object_member_blocktype=bt_type then
                           types_dec(true)
@@ -1226,6 +1230,10 @@ implementation
               consume(_ID); { Give a ident expected message, like tp7 }
           end;
         until false;
+
+        if is_class(current_structdef) then
+          tabstractrecordsymtable(current_structdef.symtable).addfieldlist(fieldlist,true);
+        fieldlist.free;
       end;
 
 

+ 22 - 15
compiler/pdecvar.pas

@@ -27,17 +27,18 @@ unit pdecvar;
 interface
 
     uses
+      cclasses,
       symtable,symsym,symdef;
 
     type
-      tvar_dec_option=(vd_record,vd_object,vd_threadvar,vd_class,vd_final);
+      tvar_dec_option=(vd_record,vd_object,vd_threadvar,vd_class,vd_final,vd_canreorder);
       tvar_dec_options=set of tvar_dec_option;
 
     function  read_property_dec(is_classproperty:boolean;astruct:tabstractrecorddef):tpropertysym;
 
     procedure read_var_decls(options:Tvar_dec_options);
 
-    procedure read_record_fields(options:Tvar_dec_options);
+    procedure read_record_fields(options:Tvar_dec_options; reorderlist: TFPObjectList);
 
     procedure read_public_and_external(vs: tabstractvarsym);
 
@@ -48,7 +49,7 @@ implementation
     uses
        SysUtils,
        { common }
-       cutils,cclasses,
+       cutils,
        { global }
        globtype,globals,tokens,verbose,constexp,
        systems,
@@ -938,8 +939,10 @@ implementation
                    fieldvarsym :
                      begin
                        ImplIntf.IType:=etFieldValue;
-                       { this must be done more sophisticated, here is also probably the wrong place }
-                       ImplIntf.IOffset:=tfieldvarsym(p.propaccesslist[palt_read].firstsym^.sym).fieldoffset;
+                       { this must be done in a more robust way. Can't read the
+                         fieldvarsym's fieldoffset yet, because it may not yet
+                         be set }
+                       ImplIntf.ImplementsField:=p.propaccesslist[palt_read].firstsym^.sym;
                      end
                    else
                      internalerror(200802161);
@@ -1577,7 +1580,7 @@ implementation
       end;
 
 
-    procedure read_record_fields(options:Tvar_dec_options);
+    procedure read_record_fields(options:Tvar_dec_options; reorderlist: TFPObjectList);
       var
          sc : TFPObjectList;
          i  : longint;
@@ -1637,6 +1640,11 @@ implementation
                if token=_ID then
                  begin
                    vs:=tfieldvarsym.create(sorg,vs_value,generrordef,[]);
+                   { normally the visibility is set via addfield, but sometimes
+                     we collect symbols so we can add them in a batch of
+                     potentially mixed visibility, and then the individual
+                     symbols need to have their visibility already set }
+                   vs.visibility:=visibility;
                    sc.add(vs);
                    recst.insert(vs);
                  end;
@@ -1796,14 +1804,13 @@ implementation
                    end;
                end;
 
-             { Generate field in the recordsymtable }
-             for i:=0 to sc.count-1 do
-               begin
-                 fieldvs:=tfieldvarsym(sc[i]);
-                 { static data fields are already inserted in the globalsymtable }
-                 if not(sp_static in fieldvs.symoptions) then
-                   recst.addfield(fieldvs,visibility);
-               end;
+             if not(vd_canreorder in options) then
+               { add field(s) to the recordsymtable }
+               recst.addfieldlist(sc,false)
+             else
+               { we may reorder the fields before adding them to the symbol
+                 table }
+               reorderlist.concatlistcopy(sc)
            end;
 
          if m_delphi in current_settings.modeswitches then
@@ -1875,7 +1882,7 @@ implementation
                 consume(_LKLAMMER);
                 inc(variantrecordlevel);
                 if token<>_RKLAMMER then
-                  read_record_fields([vd_record]);
+                  read_record_fields([vd_record],nil);
                 dec(variantrecordlevel);
                 consume(_RKLAMMER);
                 { calculates maximal variant size }

+ 2 - 1
compiler/powerpc/cpuinfo.pas

@@ -77,7 +77,8 @@ Const
                                  genericlevel3optimizerswitches-
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
-                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_nodecse,cs_opt_tailrecursion];
+                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_nodecse,
+                                  cs_opt_tailrecursion,cs_opt_reorder_fields];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches + [cs_opt_regvar,cs_opt_nodecse,cs_opt_tailrecursion];

+ 2 - 1
compiler/powerpc64/cpuinfo.pas

@@ -69,7 +69,8 @@ const
                                  genericlevel3optimizerswitches-
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
-                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_nodecse,cs_opt_tailrecursion];
+                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_nodecse,
+                                  cs_opt_tailrecursion,cs_opt_reorder_fields];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches + 

+ 2 - 2
compiler/ptype.pas

@@ -648,7 +648,7 @@ implementation
                             vdoptions:=[vd_record];
                             if classfields then
                               include(vdoptions,vd_class);
-                            read_record_fields(vdoptions);
+                            read_record_fields(vdoptions,nil);
                           end
                         else if member_blocktype=bt_type then
                           types_dec(true)
@@ -813,7 +813,7 @@ implementation
            end
          else
            begin
-             read_record_fields([vd_record]);
+             read_record_fields([vd_record],nil);
 {$ifdef jvm}
              { we need a constructor to create temps, a deep copy helper, ... }
              add_java_default_record_methods_intf(trecorddef(current_structdef));

+ 2 - 1
compiler/sparc/cpuinfo.pas

@@ -77,7 +77,8 @@ const
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
                                  [cs_opt_regvar,cs_opt_loopunroll,
-								  cs_opt_tailrecursion,cs_opt_nodecse];
+                                  cs_opt_tailrecursion,cs_opt_nodecse,
+                                  cs_opt_reorder_fields];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches + 

+ 30 - 1
compiler/symdef.pas

@@ -231,14 +231,18 @@ interface
        { TImplementedInterface }
 
        TImplementedInterface = class
+        private
+         fIOffset      : longint;
+         function GetIOffset: longint;
+        public
          IntfDef      : tobjectdef;
          IntfDefDeref : tderef;
          IType        : tinterfaceentrytype;
-         IOffset      : longint;
          VtblImplIntf : TImplementedInterface;
          NameMappings : TFPHashList;
          ProcDefs     : TFPObjectList;
          ImplementsGetter :  tsym;
+         ImplementsField : tsym;
          constructor create(aintf: tobjectdef);
          constructor create_deref(d:tderef);
          destructor  destroy; override;
@@ -249,6 +253,7 @@ interface
          function  GetMapping(const origname: string):string;
          procedure AddImplProc(pd:tprocdef);
          function  IsImplMergePossible(MergingIntf:TImplementedInterface;out weight: longint): boolean;
+         property  IOffset: longint read GetIOffset write fIOffset;
        end;
 
        { tvmtentry }
@@ -422,6 +427,7 @@ interface
           function  GetTypeName:string;override;
           function  is_publishable : boolean;override;
           function alignment:shortint;override;
+          function structalignment: shortint;override;
           procedure setsize;
           function  getvardef:longint;override;
        end;
@@ -2286,6 +2292,19 @@ implementation
       end;
 
 
+    function tfloatdef.structalignment: shortint;
+      begin
+        { aix is really annoying: the recommended scalar alignment for both
+          int64 and double is 64 bits, but in structs int64 has to be aligned
+          to 8 bytes and double to 4 bytes }
+        if (target_info.system in systems_aix) and
+           (floattype=s64real) then
+          result:=4
+        else
+          result:=alignment;
+      end;
+
+
     procedure tfloatdef.setsize;
       begin
          case floattype of
@@ -6235,6 +6254,16 @@ implementation
                              TImplementedInterface
 ****************************************************************************}
 
+    function TImplementedInterface.GetIOffset: longint;
+      begin
+        if (fIOffset=-1) and
+           (IType in [etFieldValue,etFieldValueClass]) then
+          result:=tfieldvarsym(ImplementsField).fieldoffset
+        else
+          result:=fIOffset;
+      end;
+
+
     constructor TImplementedInterface.create(aintf: tobjectdef);
       begin
         inherited create;

+ 213 - 59
compiler/symtable.pas

@@ -86,6 +86,7 @@ interface
           procedure ppuwrite(ppufile:tcompilerppufile);override;
           procedure alignrecord(fieldoffset:asizeint;varalign:shortint);
           procedure addfield(sym:tfieldvarsym;vis:tvisibility);
+          procedure addfieldlist(list: tfpobjectlist; maybereorder: boolean);
           procedure addalignmentpadding;
           procedure insertdef(def:TDefEntry);override;
           function is_packed: boolean;
@@ -100,6 +101,7 @@ interface
           { size in bytes of padding }
           _paddingsize   : word;
           procedure setdatasize(val: asizeint);
+          function getfieldoffset(sym: tfieldvarsym; base: asizeint; var globalfieldalignment: shortint): asizeint;
         public
           function iscurrentunit: boolean; override;
           property datasize : asizeint read _datasize write setdatasize;
@@ -934,7 +936,6 @@ implementation
     procedure tabstractrecordsymtable.addfield(sym:tfieldvarsym;vis:tvisibility);
       var
         l      : asizeint;
-        varalignfield,
         varalign : shortint;
         vardef : tdef;
       begin
@@ -949,16 +950,7 @@ implementation
         { Calculate field offset }
         l:=sym.getsize;
         vardef:=sym.vardef;
-        varalign:=vardef.alignment;
-{$if defined(powerpc) or defined(powerpc64)}
-        { aix is really annoying: the recommended scalar alignment for both
-          int64 and double is 64 bits, but in structs int64 has to be aligned
-          to 8 bytes and double to 4 bytes }
-        if (target_info.system in systems_aix) and
-           is_double(vardef) then
-          varalign:=4;
-{$endif powerpc or powerpc64}
-
+        varalign:=vardef.structalignment;
         case usefieldalignment of
           bit_alignment:
             begin
@@ -997,61 +989,160 @@ implementation
               { rest is not applicable }
               exit;
             end;
-          { Calc the alignment size for C style records }
-          C_alignment:
+          else
             begin
-              if (varalign>4) and
-                ((varalign mod 4)<>0) and
-                (vardef.typ=arraydef) then
-                Message1(sym_w_wrong_C_pack,vardef.typename);
-              if varalign=0 then
-                varalign:=l;
-              if (fieldalignment<current_settings.alignment.maxCrecordalign) then
+              sym.fieldoffset:=getfieldoffset(sym,_datasize,fieldalignment);
+              if l>high(asizeint)-sym.fieldoffset then
                 begin
-                  if (varalign>16) and (fieldalignment<32) then
-                    fieldalignment:=32
-                  else if (varalign>12) and (fieldalignment<16) then
-                    fieldalignment:=16
-                  { 12 is needed for long double }
-                  else if (varalign>8) and (fieldalignment<12) then
-                    fieldalignment:=12
-                  else if (varalign>4) and (fieldalignment<8) then
-                    fieldalignment:=8
-                  else if (varalign>2) and (fieldalignment<4) then
-                    fieldalignment:=4
-                  else if (varalign>1) and (fieldalignment<2) then
-                    fieldalignment:=2;
-                end;
-              fieldalignment:=min(fieldalignment,current_settings.alignment.maxCrecordalign);
-            end;
-          mac68k_alignment:
-            begin
-              { mac68k alignment (C description):
-                 * char is aligned to 1 byte
-                 * everything else (except vector) is aligned to 2 bytes
-                 * vector is aligned to 16 bytes
-              }
-              if l>1 then
-                fieldalignment:=2
+                  Message(sym_e_segment_too_large);
+                  _datasize:=high(asizeint);
+                end
               else
-                fieldalignment:=1;
-              varalign:=2;
+                _datasize:=sym.fieldoffset+l;
+              { Calc alignment needed for this record }
+              alignrecord(sym.fieldoffset,varalign);
             end;
         end;
-        if varalign=0 then
-          varalign:=size_2_align(l);
-        varalignfield:=used_align(varalign,current_settings.alignment.recordalignmin,fieldalignment);
+      end;
+
+
+    function field_alignment_compare(item1, item2: pointer): integer;
+      var
+        field1: tfieldvarsym absolute item1;
+        field2: tfieldvarsym absolute item2;
+      begin
+        { we don't care about static fields, those become global variables }
+        if (sp_static in field1.symoptions) or
+           (sp_static in field2.symoptions) then
+          exit(0);
+        { sort from large to small alignment, and in case of the same alignment
+          in declaration order (items declared close together are possibly
+          also related and hence possibly used together -> putting them next
+          to each other can improve cache behaviour) }
+        result:=field2.vardef.alignment-field1.vardef.alignment;
+        if result=0 then
+          result:=field1.symid-field2.symid;
+      end;
+
 
-        sym.fieldoffset:=align(_datasize,varalignfield);
-        if l>high(asizeint)-sym.fieldoffset then
+    procedure tabstractrecordsymtable.addfieldlist(list: tfpobjectlist; maybereorder: boolean);
+      var
+        fieldvs, insertfieldvs, bestfieldvs: tfieldvarsym;
+        base, fieldoffset, space, insertfieldsize, insertfieldoffset, bestinsertfieldoffset, bestspaceleft: asizeint;
+        i, j, bestfieldindex: longint;
+        globalfieldalignment,
+        prevglobalfieldalignment,
+        newfieldalignment: shortint;
+        changed: boolean;
+      begin
+        if maybereorder and
+           (cs_opt_reorder_fields in current_settings.optimizerswitches) then
           begin
-            Message(sym_e_segment_too_large);
-            _datasize:=high(asizeint);
-          end
-        else
-          _datasize:=sym.fieldoffset+l;
-        { Calc alignment needed for this record }
-        alignrecord(sym.fieldoffset,varalign);
+            { sort the non-class fields to minimise losses due to alignment }
+            list.sort(@field_alignment_compare);
+            { now fill up gaps caused by alignment skips with smaller fields
+              where possible }
+            repeat
+              i:=0;
+              base:=_datasize;
+              globalfieldalignment:=fieldalignment;
+              changed:=false;
+              while i<list.count do
+                begin
+                  fieldvs:=tfieldvarsym(list[i]);
+                  if sp_static in fieldvs.symoptions then
+                    begin
+                      inc(i);
+                      continue;
+                    end;
+                  prevglobalfieldalignment:=globalfieldalignment;
+                  fieldoffset:=getfieldoffset(fieldvs,base,globalfieldalignment);
+                  newfieldalignment:=globalfieldalignment;
+
+                  { size of the gap between the end of the previous field and
+                    the start of the current one }
+                  space:=fieldoffset-base;
+                  bestspaceleft:=space;
+                  while space>0 do
+                    begin
+                      bestfieldindex:=-1;
+                      for j:=i+1 to list.count-1 do
+                        begin
+                          insertfieldvs:=tfieldvarsym(list[j]);
+                          if sp_static in insertfieldvs.symoptions then
+                            continue;
+                          insertfieldsize:=insertfieldvs.getsize;
+                          { can the new field fit possibly in the gap? }
+                          if insertfieldsize<=space then
+                            begin
+                             { restore globalfieldalignment to situation before
+                               the original field was inserted }
+                              globalfieldalignment:=prevglobalfieldalignment;
+                              { at what offset would it be inserted? (this new
+                                field has its own alignment requirements, which
+                                may make it impossible to fit after all) }
+                              insertfieldoffset:=getfieldoffset(insertfieldvs,base,globalfieldalignment);
+                              globalfieldalignment:=prevglobalfieldalignment;
+                              { taking into account the alignment, does it still
+                                fit and if so, does it fit better than the
+                                previously found best fit? }
+                              if (insertfieldoffset+insertfieldsize<=fieldoffset) and
+                                 (fieldoffset-insertfieldoffset-insertfieldsize<bestspaceleft) then
+                                begin
+                                  { new best fit }
+                                  bestfieldindex:=j;
+                                  bestinsertfieldoffset:=insertfieldoffset;
+                                  bestspaceleft:=fieldoffset-insertfieldoffset-insertfieldsize;
+                                  if bestspaceleft=0 then
+                                    break;
+                                end;
+                            end;
+                        end;
+                      { if we didn't find any field to fit, stop trying for this
+                        gap }
+                      if bestfieldindex=-1 then
+                        break;
+                      changed:=true;
+                      { we found a field to insert -> adjust the new base
+                        address }
+                      base:=bestinsertfieldoffset+tfieldvarsym(list[bestfieldindex]).getsize;
+                      { update globalfieldalignment for this newly inserted
+                        field }
+                      getfieldoffset(tfieldvarsym(list[bestfieldindex]),base,globalfieldalignment);
+                      { move the new field before the current one }
+                      list.move(bestfieldindex,i);
+                      { and skip the new field (which is now at position i) }
+                      inc(i);
+                      { there may be more space left -> continue }
+                      space:=bestspaceleft;
+                    end;
+                  if base>fieldoffset then
+                    internalerror(2012071302);
+                  { check the next field }
+                  base:=fieldoffset+fieldvs.getsize;
+                  { since the original field had the same or greater alignment
+                    than anything we inserted before it, the global field
+                    alignment is still the same now as it was originally after
+                    inserting that field }
+                  globalfieldalignment:=newfieldalignment;
+                  inc(i);
+                end;
+            { there may be small gaps left *before* inserted fields }
+          until not changed;
+        end;
+        { finally, set the actual field offsets }
+        for i:=0 to list.count-1 do
+          begin
+            fieldvs:=tfieldvarsym(list[i]);
+            { static data fields are already inserted in the globalsymtable }
+            if not(sp_static in fieldvs.symoptions) then
+              begin
+                { read_record_fields already set the visibility of the fields,
+                  because a single list can contain symbols with different
+                  visibility }
+                addfield(fieldvs,fieldvs.visibility);
+              end;
+          end;
       end;
 
 
@@ -1145,6 +1236,69 @@ implementation
           databitsize:=val*8;
       end;
 
+    function tabstractrecordsymtable.getfieldoffset(sym: tfieldvarsym; base: asizeint; var globalfieldalignment: shortint): asizeint;
+      var
+        l      : asizeint;
+        varalignfield,
+        varalign : shortint;
+        vardef : tdef;
+      begin
+        { Calculate field offset }
+        l:=sym.getsize;
+        vardef:=sym.vardef;
+        varalign:=vardef.structalignment;
+        case usefieldalignment of
+          bit_alignment:
+            { has to be handled separately }
+            internalerror(2012071301);
+          C_alignment:
+            begin
+              { Calc the alignment size for C style records }
+              if (varalign>4) and
+                ((varalign mod 4)<>0) and
+                (vardef.typ=arraydef) then
+                Message1(sym_w_wrong_C_pack,vardef.typename);
+              if varalign=0 then
+                varalign:=l;
+              if (globalfieldalignment<current_settings.alignment.maxCrecordalign) then
+                begin
+                  if (varalign>16) and (globalfieldalignment<32) then
+                    globalfieldalignment:=32
+                  else if (varalign>12) and (globalfieldalignment<16) then
+                    globalfieldalignment:=16
+                  { 12 is needed for long double }
+                  else if (varalign>8) and (globalfieldalignment<12) then
+                    globalfieldalignment:=12
+                  else if (varalign>4) and (globalfieldalignment<8) then
+                    globalfieldalignment:=8
+                  else if (varalign>2) and (globalfieldalignment<4) then
+                    globalfieldalignment:=4
+                  else if (varalign>1) and (globalfieldalignment<2) then
+                    globalfieldalignment:=2;
+                end;
+              globalfieldalignment:=min(globalfieldalignment,current_settings.alignment.maxCrecordalign);
+            end;
+          mac68k_alignment:
+            begin
+              { mac68k alignment (C description):
+                 * char is aligned to 1 byte
+                 * everything else (except vector) is aligned to 2 bytes
+                 * vector is aligned to 16 bytes
+              }
+              if l>1 then
+                globalfieldalignment:=2
+              else
+                globalfieldalignment:=1;
+              varalign:=2;
+            end;
+        end;
+        if varalign=0 then
+          varalign:=size_2_align(l);
+        varalignfield:=used_align(varalign,current_settings.alignment.recordalignmin,globalfieldalignment);
+
+        result:=align(base,varalignfield);
+      end;
+
     function tabstractrecordsymtable.iscurrentunit: boolean;
       begin
         Result := Assigned(current_module) and (current_module.moduleid=moduleid);

+ 8 - 0
compiler/symtype.pas

@@ -77,6 +77,8 @@ interface
          function  size:asizeint;virtual;abstract;
          function  packedbitsize:asizeint;virtual;
          function  alignment:shortint;virtual;abstract;
+         { alignment when this type appears in a record/class/... }
+         function  structalignment:shortint;virtual;
          function  getvardef:longint;virtual;abstract;
          function  getparentdef:tdef;virtual;
          function  geTSymtable(t:tgeTSymtable):TSymtable;virtual;
@@ -328,6 +330,12 @@ implementation
       end;
 
 
+    function tdef.structalignment: shortint;
+      begin
+        result:=alignment;
+      end;
+
+
     procedure tdef.ChangeOwner(st:TSymtable);
       begin
 //        if assigned(Owner) then

+ 1 - 1
compiler/x86_64/cpuinfo.pas

@@ -91,7 +91,7 @@ Const
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
                                  [cs_opt_regvar,cs_opt_loopunroll,cs_opt_stackframe,
-								  cs_opt_tailrecursion,cs_opt_nodecse];
+				  cs_opt_tailrecursion,cs_opt_nodecse,cs_opt_reorder_fields];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +