Pārlūkot izejas kodu

more memory work

Nicolas Cannasse 8 gadi atpakaļ
vecāks
revīzija
fc45237634
1 mainītis faili ar 466 papildinājumiem un 151 dzēšanām
  1. 466 151
      other/memory/Memory.hx

+ 466 - 151
other/memory/Memory.hx

@@ -45,7 +45,7 @@ class Page {
 
 	public function isLiveBlock( bid : Int ) {
 		if( sizes != null && sizes.get(bid) == 0 ) return false;
-		return (bmp.get(bid >> 3) >>> (bid & 7)) != 0;
+		return (bmp.get(bid >> 3) & (1 << (bid & 7))) != 0;
 	}
 
 	public function getBlockSize( bid : Int ) {
@@ -69,41 +69,221 @@ class Stack {
 	}
 }
 
-class DataSeg {
-	public static inline var NO_OWNER = 0x80000000;
+class Block {
 	public var page : Page;
 	public var bid : Int;
-	public var ownerType : Int = NO_OWNER;
+	public var owner : Block;
+	public var type(default,set) : TType;
+
+	public var subs : Array<Block>; // can be null
+	public var parents : Array<Block>; // if multiple owners
+
 	public function new() {
 	}
+
 	public inline function getPointer() {
 		return page.getPointer(bid);
 	}
 	public inline function getMemSize() {
 		return page.getBlockSize(bid) * page.blockSize;
 	}
+
+	function set_type( t : TType ) {
+		if( t != null && t.t == HF64 && page.kind != PNoPtr )
+			throw "!";
+		return type = t;
+	}
+
+	public function addParent(b:Block) {
+
+//		if( b.type != null && b.type.t == HF64 )
+//			throw "!";
+
+		if( owner == null ) {
+			owner = b;
+		} else {
+			if( parents == null ) parents = [owner];
+			parents.push(b);
+		}
+		if( b.subs == null ) b.subs = [];
+		b.subs.push(this);
+	}
+
+	function removeParent( p : Block ) {
+		if( parents != null ) {
+			parents.remove(p);
+			if( parents.length == 0 ) parents = null;
+		}
+		if( owner == p )
+			owner = parents == null ? null : parents[0];
+	}
+
+	public function removeChildren() {
+		if( subs != null ) {
+			for( s in subs )
+				s.removeParent(this);
+			subs = null;
+		}
+	}
+
+}
+
+
+class TType {
+	public var tid : Int;
+	public var t : HLType;
+	public var closure : HLType;
+	public var bmp : hl.Bytes;
+	public var hasPtr : Bool;
+	public var isDyn : Bool;
+	public var fields : Array<TType>;
+	public var constructs : Array<Array<TType>>;
+	public var nullWrap : TType;
+
+	public var falsePositive = 0;
+	public var falsePositiveIndexes = [];
+
+	public function new(tid, t, ?cl) {
+		this.tid = tid;
+		this.t = t;
+		this.closure = cl;
+		isDyn = t.isDynamic();
+		switch( t ) {
+		case HFun(_):
+			hasPtr = cl != null && cl.isPtr();
+		default:
+			hasPtr = t.containsPointer();
+		}
+	}
+
+	public function buildTypes( m : Memory ) {
+		if( !hasPtr ) return;
+
+		// layout of data inside memory
+
+		switch( t ) {
+		case HObj(p):
+			var protos = [p];
+			while( p.tsuper != null )
+				switch( p.tsuper ) {
+				case HObj(p2):
+					protos.unshift(p2);
+					p = p2;
+				default:
+				}
+
+			fields = [];
+			var pos = 1 << m.ptrBits; // type
+			for( p in protos )
+				for( f in p.fields ) {
+					var size = m.typeSize(f.t);
+					pos = align(pos, size);
+					fields[pos>>m.ptrBits] = m.getType(f.t);
+					pos += size;
+				}
+		case HEnum(e):
+			constructs = [];
+			for( c in e.constructs ) {
+				var pos = 4; // index
+				var fields = [];
+				for( t in c.params ) {
+					var size = m.typeSize(t);
+					pos = align(pos, size);
+					fields[pos>>m.ptrBits] = m.getType(t);
+					pos += size;
+				}
+				constructs.push(fields);
+			}
+		case HVirtual(fl):
+			fields = [
+				null, // type
+				m.getType(HDyn), // obj
+				m.getType(HDyn), // next
+			];
+			var pos = (fl.length + 3) << m.ptrBits;
+			for( f in fl ) {
+				var size = m.typeSize(f.t);
+				pos = align(pos, size);
+				fields[pos >> m.ptrBits] = m.getType(f.t);
+			}
+		case HNull(t):
+			if( m.is64 )
+				fields = [null, m.getType(t)];
+			else
+				fields = [null, null, m.getType(t)];
+		case HFun(_):
+			fields = [null, null, null, m.getType(closure)];
+		default:
+		}
+	}
+
+	inline function align(pos, size) {
+		var d = pos & (size - 1);
+		if( d != 0 ) pos += size - d;
+		return pos;
+	}
+
+	public function toString() {
+		return t.toString();
+	}
+
 }
 
 class Memory {
 
+	static inline var PAGE_BITS = 16;
+
 	public var memoryDump : sys.io.FileInput;
+
+	public var is64 : Bool;
+	public var bool32 : Bool;
+	public var ptrBits : Int;
+
 	var code : format.hl.Data;
-	var is64 : Bool;
 	var pages : Array<Page>;
 	var roots : Array<Pointer>;
 	var stacks : Array<Stack>;
-	var types : Array<Pointer>;
-	var closures : Array<Pointer>;
-	var pointerType : Map<Pointer, HLType>;
-	var pointerTypeIndex : Map<Pointer, Int>;
-	var closuresType : Map<Pointer, HLType>;
-	var pointerPage : Map<Pointer, Page>;
-	var dataPointers : Array<DataSeg>;
-	var ptrBits : Int;
+	var typesPointers : Array<Pointer>;
+	var closuresPointers : Array<Pointer>;
+	var types : Array<TType>;
+	var blocks : Array<Block>;
+	var baseTypes : Array<{ t : HLType, p : Pointer }>;
+	var all : Block;
+
+	var toProcess : Array<Block>;
+	var dynObjCandidates : Array<Block>;
+	var tdynObj : TType;
+	var pointerBlock : Map<Pointer, Block>;
+	var pointerType : Map<Pointer, TType>;
 
 	function new() {
 	}
 
+	public function typeSize( t : HLType ) {
+		return switch( t ) {
+		case HVoid: 0;
+		case HUi8: 1;
+		case HUi16: 2;
+		case HI32, HF32: 4;
+		case HF64: 8;
+		case HBool:
+			return bool32 ? 4 : 1;
+		default:
+			return is64 ? 8 : 4;
+		}
+	}
+
+	public function getType( t : HLType, isNull = false ) : TType {
+		// this is quite slow, but we can't use a Map, maybe try a more per-type specific approach ?
+		for( t2 in types )
+			if( t == t2.t )
+				return t2;
+		if( isNull )
+			return null;
+		throw "Type not found " + t.toString();
+		return null;
+	}
+
 	function loadBytecode( arg : String ) {
 		if( code != null ) throw "Duplicate code";
 		code = new format.hl.Reader(false).read(new haxe.io.BytesInput(sys.io.File.getBytes(arg)));
@@ -118,12 +298,10 @@ class Memory {
 		return cast memoryDump.readInt32();
 	}
 
-	inline function readType() {
-		return pointerType.get(readPointer());
-	}
-
 	static function MB( v : Float ) {
-		if( v < 1024 * 1024 )
+		if( v < 1000 )
+			return Std.int(v) + "B";
+		if( v < 1024 * 1000 )
 			return (Math.round(v * 10 / 1024) / 10)+"KB";
 		return (Math.round(v * 10 / (1024 * 1024)) / 10)+"MB";
 	}
@@ -136,6 +314,10 @@ class Memory {
 		var version = memoryDump.readByte() - "0".code;
 		var flags = readInt();
 		is64 = (flags & 1) != 0;
+		bool32 = (flags & 2) != 0;
+
+		throw bool32;
+
 		ptrBits = is64 ? 3 : 2;
 		if( is64 ) throw "64 bit not supported";
 
@@ -166,6 +348,8 @@ class Memory {
 					p.sizes = memoryDump.read(p.maxBlocks); // 1 byte per block
 				pages.push(p);
 				totalSize += p.size;
+				if( p.bmp != null )
+					totalSize += p.bmp.length;
 			}
 		}
 		log(pages.length + " pages, " + MB(totalSize) + " memory");
@@ -185,36 +369,74 @@ class Memory {
 		log(stacks.length + " stacks");
 
 		// load types
-		types = [for( i in 0...readInt() ) readPointer()];
-		closures = [for( i in 0...readInt() ) readPointer()];
-		log(types.length + " types, " + closures.length + " closures");
+
+		baseTypes = [];
+		while( true ) {
+			var tid = readInt();
+			if( tid < 0 ) break;
+			var ptr = readPointer();
+			baseTypes.push({ t : Type.createEnumIndex(HLType, tid), p : ptr });
+		}
+
+		typesPointers = [for( i in 0...readInt() ) readPointer()];
+		closuresPointers = [for( i in 0...readInt() ) readPointer()];
+		log(typesPointers.length + " types, " + closuresPointers.length + " closures");
+	}
+
+	function getTypeNull( t : TType ) {
+		if( t.nullWrap != null )
+			return t.nullWrap;
+		var r = new TType(types.length, HNull(t.t));
+		t.nullWrap = r;
+		types.push(r);
+		return r;
+	}
+
+	function goto( b : Block ) {
+		b.page.goto(b.bid);
 	}
 
 	function check() {
 		if( code == null ) throw "Missing .hl file";
 		if( memoryDump == null ) throw "Missing .dump file";
-		if( code.types.length != this.types.length ) throw "Types count mismatch";
+		if( code.types.length != this.typesPointers.length ) throw "Types count mismatch";
 
 		pointerType = new Map();
-		pointerTypeIndex = new Map();
-		closuresType = new Map();
 		var cid = 0;
-		for( i in 0...types.length ) {
-			pointerType.set(types[i], code.types[i]);
-			pointerTypeIndex.set(types[i], i);
+		types = [for( i in 0...code.types.length ) new TType(i, code.types[i])];
+		for( i in 0...typesPointers.length ) {
+			pointerType.set(typesPointers[i], types[i]);
 			switch( code.types[i] ) {
 			case HFun(f):
-				var ptr = closures[cid++];
+				var tid = types.length;
 				var args = f.args.copy();
-				closuresType.set(ptr, args.shift());
-				pointerType.set(ptr, HFun({ args : args, ret : f.ret }));
-				pointerTypeIndex.set(ptr, -i);
+				var ct = new TType(tid, HFun({ args : args, ret : f.ret }), args.shift());
+				types.push(ct);
+				pointerType.set(closuresPointers[cid++], ct);
 			default:
 			}
 		}
 
-		var count = 0, used = 0, free = 0;
+		for( b in baseTypes ) {
+			var t = getType(b.t, true);
+			if( t == null ) {
+				t = new TType(types.length, b.t);
+				types.push(t);
+			}
+			pointerType.set(b.p, t);
+		}
+
+		blocks = [];
+		var pageMem = new Map();
+		var used = 0, free = 0, gc = 0;
+		var progress = 0;
+		pointerBlock = new Map();
+
 		for( p in pages ) {
+			progress++;
+			if( progress % 100 == 0 )
+				Sys.print((Std.int(progress * 1000 / pages.length) / 10) + "%  \r");
+
 			var bid = p.firstBlock;
 			while( bid < p.maxBlocks ) {
 				if( !p.isLiveBlock(bid) ) {
@@ -223,158 +445,251 @@ class Memory {
 					continue;
 				}
 				var sz = p.getBlockSize(bid);
+
+				var b = new Block();
+				p.goto(bid);
+				b.page = p;
+				b.bid = bid;
+				b.type = pointerType.get(readPointer());
+				if( b.type != null && !b.type.isDyn )
+					b.type = getTypeNull(b.type);
+				blocks.push(b);
+				pointerBlock.set(b.getPointer(), b);
+
 				bid += sz;
-				count++;
+
+
 				used += sz * p.blockSize;
 			}
+
+			gc += p.size - (p.maxBlocks - p.firstBlock) * p.blockSize;
+			gc += p.bmp.length;
+
+			for( i in 0...(p.size >> PAGE_BITS) )
+				pageMem.set(p.addr.offset(i << PAGE_BITS), p);
+
 		}
-		log(count + " live blocks " + MB(used) + " used, " + MB(free) + " free");
+		log(blocks.length + " live blocks " + MB(used) + " used, " + MB(free) + " free, "+MB(gc)+" gc");
 
-		pointerPage = new Map();
-		for( p in pages ) {
-			pointerPage.set(p.addr, p);
-			for( i in 0...p.size >> 16 )
-				pointerPage.set(p.addr.offset(i  << 16), p);
+		for( t in types )
+			t.buildTypes(this);
+
+		tdynObj = getType(HDynObj);
+		toProcess = blocks.copy();
+		dynObjCandidates = [];
+		while( toProcess.length > 0 ) {
+			buildHierarchy();
+			buildDynObjs();
 		}
-	}
 
-	inline function iterLiveBlocks( callb, withPtr = false ) {
-		for( p in pages ) {
-			if( withPtr && p.kind == PNoPtr ) continue;
-			var bid = p.firstBlock;
-			while( bid < p.maxBlocks ) {
-				if( !p.isLiveBlock(bid) ) {
-					bid++;
-					continue;
-				}
-				var sz = p.getBlockSize(bid);
-				callb(p, bid, sz);
-				bid += sz;
-			}
+		// look in stack & roots
+		all = new Block();
+
+		var broot = new Block();
+		broot.addParent(all);
+
+		var rid = 0;
+		for( g in code.globals ) {
+			if( !g.isPtr() ) continue;
+			var r = roots[rid++];
+			var b = pointerBlock.get(r);
+			if( b == null ) continue;
+			b.addParent(broot);
+			if( b.type == null )
+				b.type = getType(g);
 		}
-	}
 
-	function getStats(dataTID=0,doPrint=true) {
+		for( s in stacks ) {
+			var bstack = new Block();
+			bstack.addParent(all);
+			for( r in s.contents ) {
+				var b = pointerBlock.get(r);
+				if( b != null )
+					b.addParent(bstack);
+			}
+		}
 
+		var unk = 0, unkMem = 0;
 		var byT = new Map();
-		var allT = [];
+		for( b in blocks ) {
+			if( b.owner == null ) {
+				log("  "+b.getPointer().toString() + " is not referenced");
+				continue;
+			}
 
-		dataPointers = [];
+			if( b.type != null ) continue;
 
-		iterLiveBlocks(function(p,bid,size) {
-			p.goto(bid);
+			var o = b.owner;
+			while( o != null && o.type == null )
+				o = o.owner;
+			if( o != null )
+				switch( o.type.t ) {
+				case HAbstract(_):
+					b.type = o.type; // data inside this
+					continue;
+				default:
+				}
 
-			var tid : Int = pointerTypeIndex.get(readPointer());
-			if( tid == dataTID ) {
-				var s = new DataSeg();
-				s.bid = bid;
-				s.page = p;
-				dataPointers.push(s);
-			}
-			var inf = byT.get(tid);
+			unk++;
+			unkMem += b.getMemSize();
+			var t = b.owner.type;
+			if( t == null ) t = types[0];
+			var inf = byT.get(t.tid);
 			if( inf == null ) {
-				inf = { t : code.types[tid < 0 ? -tid : tid], count : 0, mem : 0 };
-				byT.set(tid, inf);
-				allT.push(inf);
+				inf = { t : t, count : 0, mem : 0 };
+				byT.set(t.tid, inf);
 			}
 			inf.count++;
-			inf.mem += size * p.blockSize;
-		});
+			inf.mem += b.getMemSize();
+		}
 
-		allT.sort(function(i1, i2) return i1.mem - i2.mem);
+		var falses = [for( t in types ) if( t.falsePositive > 0 ) t];
+		falses.sort(function(t1, t2) return t1.falsePositive - t2.falsePositive);
+		for( f in falses )
+			log("  False positive " + f + " " + f.falsePositive+" "+f.falsePositiveIndexes+" "+f.fields);
 
+		/*
+		var all = [for( k in byT ) k];
+		all.sort(function(i1, i2) return i1.count - i2.count);
+		for( a in all )
+			log("Unknown "+a.count + " count, " + MB(a.mem)+" "+a.t.toString());
+		*/
 
-		if( doPrint )
-			for( t in allT )
-				log('${t.count} count ${MB(t.mem)} ${t.t.toString()}');
+		log("Hierarchy built, "+unk+" unknown ("+MB(unkMem)+")");
 	}
 
-	function locate( type : String ) {
-		if( type == null ) {
-			log("Require <type> parameter");
-			return;
-		}
-		for( i in 0...code.types.length )
-			if( code.types[i].toString() == type ) {
-				getStats(i, false);
-				log(dataPointers.length + " values found");
-				lookupData();
-				return;
-			}
-		log("Unknown type");
-	}
+	function buildHierarchy() {
+		// build hierarchy
+		var progress = 0;
+		var blocks = toProcess;
+		toProcess = [];
 
-	function printRoots() {
-		var rid = 0;
-		for( g in code.globals ) {
-			if( !g.isPtr() ) continue;
-			var r = roots[rid++];
-			var ppage = r.pageAddress();
-			var p = pointerPage.get(ppage);
-			var bid = Std.int(r.sub(p.addr) / p.blockSize);
-			p.goto(bid);
-			var t = readPointer();
-			Sys.println(r.toString()+" "+pointerType.get(t).toString()+" "+g.toString());
-		}
-	}
+		for( b in blocks )
+			b.removeChildren();
 
-	function gotoAddress( val : Pointer ) {
-		var ppage = val.pageAddress();
-		var p = pointerPage.get(ppage);
-		if( p == null )
-			return false;
-		var bid = Std.int(val.sub(p.addr) / p.blockSize);
-		if( !p.isLiveBlock(bid) )
-			return false;
-		p.goto(bid);
-		return true;
-	}
+		for( b in blocks ) {
 
-	function lookupData() {
-		if( dataPointers == null ) {
-			Sys.println("No data pointers found, use <stats> first");
-			return;
-		}
-		var data = new Map();
-		for( d in dataPointers )
-			data.set(d.getPointer(), d);
-
-		var byT = [];
-
-		iterLiveBlocks(function(p, bid, sz) {
-			p.goto(bid);
-			var type = 0;
-			for( i in 0...Std.int((sz * p.blockSize) >> ptrBits) ) {
-				var p = readPointer();
-				if( i == 0 ) {
-					type = pointerTypeIndex.get(p);
-					if( type < 0 ) type = -type;
+			progress++;
+			if( progress % 10000 == 0 )
+				Sys.print((Std.int(progress * 1000.0 / blocks.length) / 10) + "%  \r");
+
+			if( b.page.kind == PNoPtr )
+				continue;
+
+			if( b.type != null && !b.type.hasPtr )
+				switch(b.type.t) {
+				case HFun(_):
+				default:
+					log("  Scanning "+b.type+" "+b.getPointer().toString());
+				}
+
+			goto(b);
+			var fields = null;
+			var start = 0;
+			if( b.type != null ) {
+				fields = b.type.fields;
+				// enum
+				if( b.type.constructs != null ) {
+					start++;
+					fields = b.type.constructs[readInt()];
+					if( is64 ) readInt(); // skip, not a pointer anyway
 				}
-				var d = data.get(p);
-				if( d != null && d.ownerType == DataSeg.NO_OWNER ) {
-					d.ownerType = type;
-					var inf = byT[type];
-					if( inf == null ) {
-						inf = { t : code.types[type], mem : 0, count : 0 };
-						byT[type] = inf;
+			}
+
+			for( i in start...(b.getMemSize() >> ptrBits) ) {
+				var r = readPointer();
+				var bs = pointerBlock.get(r);
+				if( bs == null ) continue;
+				var ft = fields != null ? fields[i] : null;
+				bs.addParent(b);
+				if( ft != null && !ft.t.isPtr() ) {
+					b.type.falsePositive++;
+					b.type.falsePositiveIndexes[i]++;
+					continue;
+				}
+				if( bs.type == null && ft != null ) {
+					if( ft.t == HDyn ) {
+						dynObjCandidates.push(bs);
+						bs.type = tdynObj;
+						continue;
 					}
-					inf.count++;
-					inf.mem += d.getMemSize();
+					bs.type = ft;
+					if( bs.subs != null )
+						toProcess.push(bs);
 				}
 			}
-		}, true);
+		}
+	}
+
+	function buildDynObjs() {
+		var blocks = dynObjCandidates;
+		dynObjCandidates = [];
+		for( b in blocks ) {
+			// most likely a DynObj (which have a dynamicaly allocated hl_type*)
+			// let's check
+			goto(b);
+			var btptr = readPointer();
+			var bdptr = readPointer();
+			var nfields = readInt();
+			var dataSize = readInt();
+
+			var bt = pointerBlock.get(btptr);
+			if( bt == null ) {
+				log("  Not a dynobj " + b.getPointer().toString());
+				continue;
+			}
+			goto(bt);
+			var tid = readInt();
+			if( tid != 15 ) {
+				log("  Not a dynobj " + b.getPointer().toString());
+				continue;
+			}
+
+			bt.type = b.type; // dynobj proto
 
-		var allT = [for( t in byT ) if( t != null ) t];
-		allT.sort(function(t1, t2) return t1.mem - t2.mem);
-		for( t in allT )
-			Sys.println(t.count + " count " + MB(t.mem) + " " + t.t.toString());
+			var bd = pointerBlock.get(bdptr);
+			if( bd == null )
+				continue;
+
+			if( bd.type != null )
+				log("  Overwriting dynobj data " + bd.getPointer().toString()+" "+bd.type);
+
+			bd.type = b.type; // dynobj data
+
+			// read remaining dynobj
+			if( is64 ) readInt();
+			readPointer();
+			readPointer();
+
+			var fields = [for( i in 0...nfields ) {
+				t : pointerType.get(readPointer()),
+				name : readInt(),
+				index : readInt(),
+			}];
+
+			goto(bd);
+
+			// assign types to dynobj fields
+			var pos = 0;
+			for( i in 0...(bd.getMemSize() >> ptrBits) ) {
+				var bv = pointerBlock.get(readPointer());
+				if( bv != null )
+					for( f in fields )
+						if( f.index == pos && bv.type == null ) {
+							bv.type = f.t;
+							if( bv.subs != null ) toProcess.push(bv);
+							break;
+						}
+				pos += 1 << ptrBits;
+			}
+		}
 	}
 
 	function printPartition() {
 		var part = sys.io.File.write("part.txt",false);
 		for( p in pages ) {
 			var bid = p.firstBlock;
-			part.writeString("[" + p.blockSize+"]");
+			part.writeString(p.addr.toString()+ " [" + p.blockSize+"]");
 			while( bid < p.maxBlocks ) {
 				if( !p.isLiveBlock(bid) ) {
 					part.writeByte(".".code);
@@ -398,7 +713,7 @@ class Memory {
 	static function main() {
 		var m = new Memory();
 
-		//hl.Gc.dumpMemory(); Sys.command("cp memory.hl memory_test.hl");
+		//hl.Gc.dumpMemory(); Sys.command("cp memory.hl test.hl");
 
 		var args = Sys.args();
 		while( args.length > 0 ) {
@@ -419,14 +734,14 @@ class Memory {
 			switch( cmd ) {
 			case "exit", "quit", "q":
 				break;
-			case "stats":
+			/*case "stats":
 				m.getStats();
 			case "roots":
 				m.printRoots();
 			case "data":
 				m.lookupData();
 			case "locate":
-				m.locate(args.shift());
+				m.locate(args.shift());*/
 			case "part":
 				m.printPartition();
 			default: