瀏覽代碼

another buffer refactor: remove inputnames and bufferoffset, use multiformat + buffer bindings
allow dynamic vbuf precision mapping

Nicolas Cannasse 2 年之前
父節點
當前提交
b78173ed1a
共有 13 個文件被更改,包括 439 次插入355 次删除
  1. 0 39
      h3d/Buffer.hx
  2. 3 5
      h3d/Engine.hx
  3. 84 66
      h3d/impl/DirectXDriver.hx
  4. 1 25
      h3d/impl/Driver.hx
  5. 37 40
      h3d/impl/GlDriver.hx
  6. 2 6
      h3d/impl/LogDriver.hx
  7. 0 8
      h3d/impl/NullDriver.hx
  8. 20 35
      h3d/prim/HMDModel.hx
  9. 17 25
      h3d/prim/Instanced.hx
  10. 47 66
      h3d/prim/MeshPrimitive.hx
  11. 8 14
      h3d/prim/Polygon.hx
  12. 8 6
      h3d/scene/MeshBatch.hx
  13. 212 20
      hxd/BufferFormat.hx

+ 0 - 39
h3d/Buffer.hx

@@ -92,42 +92,3 @@ class Buffer {
 	}
 
 }
-
-class BufferOffset {
-	#if flash
-	static var UID = 0;
-	public var id : Int;
-	#end
-
-	public var buffer : Buffer;
-	public var offset : Int;
-
-	/*
-		This is used to return a list of BufferOffset without allocating an array
-	*/
-	public var next : BufferOffset;
-
-	public function new(buffer, offset) {
-		#if flash
-		this.id = UID++;
-		#end
-		this.buffer = buffer;
-		this.offset = offset;
-	}
-
-	public inline function clone() {
-		var b = new BufferOffset(buffer,offset);
-		#if flash
-		b.id = id;
-		#end
-		return b;
-	}
-
-	public function dispose() {
-		if( buffer != null ) {
-			buffer.dispose();
-			buffer = null;
-		}
-		next = null;
-	}
-}

+ 3 - 5
h3d/Engine.hx

@@ -180,11 +180,11 @@ class Engine {
 		}
 	}
 
-	public function renderMultiBuffers( buffers : Buffer.BufferOffset, indexes : Indexes, startTri = 0, drawTri = -1 ) {
+	public function renderMultiBuffers( format : hxd.BufferFormat.MultiFormat, buffers : Array<Buffer>, indexes : Indexes, startTri = 0, drawTri = -1 ) {
 		var maxTri = Std.int(indexes.count / 3);
 		if( maxTri <= 0 ) return;
 		flushTarget();
-		driver.selectMultiBuffers(buffers);
+		driver.selectMultiBuffers(format, buffers);
 		if( indexes.isDisposed() )
 			return;
 		if( drawTri < 0 ) drawTri = maxTri - startTri;
@@ -196,9 +196,7 @@ class Engine {
 		}
 	}
 
-	public function renderInstanced( buffers : Buffer.BufferOffset, indexes : Indexes, commands : h3d.impl.InstanceBuffer ) {
-		flushTarget();
-		driver.selectMultiBuffers(buffers);
+	public function renderInstanced( indexes : Indexes, commands : h3d.impl.InstanceBuffer ) {
 		if( indexes.isDisposed() )
 			return;
 		if( commands.commandCount > 0 ) {

+ 84 - 66
h3d/impl/DirectXDriver.hx

@@ -38,10 +38,10 @@ private class ShaderContext {
 private class CompiledShader {
 	public var vertex : ShaderContext;
 	public var fragment : ShaderContext;
-	public var layout : Layout;
-	public var inputs : InputNames;
-	public var offsets : Array<Int>;
 	public var format : hxd.BufferFormat;
+	public var perInst : Array<Int>;
+	public var layouts : Map<Int, Layout>;
+	public var vertexBytes : haxe.io.Bytes;
 	public function new() {
 	}
 }
@@ -94,6 +94,7 @@ class DirectXDriver extends h3d.impl.Driver {
 	var currentShader : CompiledShader;
 	var currentIndex : IndexBuffer;
 	var currentDepth : DepthBuffer;
+	var currentLayout : Layout;
 	var currentTargets = new hl.NativeArray<RenderTargetView>(16);
 	var currentTargetResources = new hl.NativeArray<ShaderResourceView>(16);
 	var vertexShader : PipelineState;
@@ -160,7 +161,9 @@ class DirectXDriver extends h3d.impl.Driver {
 			for( s in shaders ) {
 				s.fragment.shader.release();
 				s.vertex.shader.release();
-				s.layout.release();
+				for( l in s.layouts )
+					l.release();
+				s.layouts = [];
 			}
 		}
 		if( depthStates != null ) for( s in depthStates ) { if( s.def != null ) s.def.release(); for( s in s.stencils ) if( s.state != null ) s.state.release(); }
@@ -1094,17 +1097,15 @@ class DirectXDriver extends h3d.impl.Driver {
 			s = new CompiledShader();
 			var vertex = compileShader(shader.vertex);
 			var fragment = compileShader(shader.fragment);
-			var inputs = [];
 			if( hasDeviceError ) return false;
 			s.vertex = vertex.s;
 			s.fragment = fragment.s;
-			s.offsets = [];
-
-			var layout = [], offset = 0, format : Array<hxd.BufferFormat.BufferInput> = [];
+			s.vertexBytes = vertex.bytes;
+			s.perInst = [];
+			s.layouts = new Map();
+			var format : Array<hxd.BufferFormat.BufferInput> = [];
 			for( v in shader.vertex.data.vars )
 				if( v.kind == Input ) {
-					var e = new LayoutElement();
-					var name = hxsl.HlslOut.semanticName(v.name);
 					var perInst = 0;
 					if( v.qualifiers != null )
 						for( q in v.qualifiers )
@@ -1112,38 +1113,11 @@ class DirectXDriver extends h3d.impl.Driver {
 							case PerInstance(k): perInst = k;
 							default:
 							}
-					e.semanticName = @:privateAccess name.toUtf8();
-					e.inputSlot = layout.length;
-					e.format = switch( v.type ) {
-					case TFloat: R32_FLOAT;
-					case TVec(2, VFloat): R32G32_FLOAT;
-					case TVec(3, VFloat): R32G32B32_FLOAT;
-					case TVec(4, VFloat): R32G32B32A32_FLOAT;
-					case TBytes(4): R8G8B8A8_UINT;
-					default:
-						throw "Unsupported input type " + hxsl.Ast.Tools.toString(v.type);
-					};
-					if( perInst > 0 ) {
-						e.inputSlotClass = PerInstanceData;
-						e.instanceDataStepRate = perInst;
-					} else
-						e.inputSlotClass = PerVertexData;
-					layout.push(e);
-					s.offsets.push(offset);
-					inputs.push(v.name);
+					s.perInst.push(perInst);
 					var t = hxd.BufferFormat.InputFormat.fromHXSL(v.type);
 					format.push({ name : v.name, type : t });
-					offset += t.getSize();
 				}
-
-			var n = new hl.NativeArray(layout.length);
-			for( i in 0...layout.length )
-				n[i] = layout[i];
 			s.format = hxd.BufferFormat.make(format);
-			s.inputs = InputNames.get(inputs);
-			s.layout = Driver.createInputLayout(n, vertex.bytes, vertex.bytes.length);
-			if( s.layout == null )
-				throw "Failed to create input layout";
 			shaders.set(shader.id, s);
 		}
 		if( s == currentShader )
@@ -1156,27 +1130,67 @@ class DirectXDriver extends h3d.impl.Driver {
 		currentShader = s;
 		dx.Driver.vsSetShader(s.vertex.shader);
 		dx.Driver.psSetShader(s.fragment.shader);
-		dx.Driver.iaSetInputLayout(s.layout);
-	}
-
-	override function getShaderInputNames() : InputNames {
-		return currentShader.inputs;
+		currentLayout = null;
+	}
+
+	function makeLayout( mapping : Array<hxd.BufferFormat.BufferMapping> ) {
+		var layout = new hl.NativeArray(mapping.length);
+		for( index => input in @:privateAccess currentShader.format.inputs ) {
+			var inf = mapping[index];
+			var e = new LayoutElement();
+			var name = hxsl.HlslOut.semanticName(input.name);
+			e.semanticName = @:privateAccess name.toUtf8();
+			e.inputSlot = index;
+			e.format = switch( [input.type, inf.precision] ) {
+			case [DFloat, F32]: R32_FLOAT;
+			case [DFloat, F16]: R16_FLOAT;
+			case [DVec2, F32]: R32G32_FLOAT;
+			case [DVec2, F16]: R16G16_FLOAT;
+			case [DVec3, F32]: R32G32B32_FLOAT;
+			case [DVec4, F32]: R32G32B32A32_FLOAT;
+			case [DVec3|DVec4, S8]: R8G8B8A8_SNORM;
+			case [DVec3|DVec4, U8]: R8G8B8A8_UNORM;
+			case [DVec3|DVec4, F16]: R16G16B16A16_FLOAT;
+			case [DBytes4, F32]: R8G8B8A8_UINT;
+			default:
+				throw "Unsupported input type " + input.type+"."+inf.precision;
+			};
+			var perInst = currentShader.perInst[index];
+			if( perInst > 0 ) {
+				e.inputSlotClass = PerInstanceData;
+				e.instanceDataStepRate = perInst;
+			} else
+				e.inputSlotClass = PerVertexData;
+			layout[index] = e;
+		}
+		var l = Driver.createInputLayout(layout, currentShader.vertexBytes, currentShader.vertexBytes.length);
+		if( l == null )
+			throw "Failed to create input layout";
+		return l;
 	}
 
 	override function selectBuffer(buffer:Buffer) {
 		if( hasDeviceError ) return;
+		// select layout
+		var layout = currentShader.layouts.get(buffer.format.uid);
+		if( layout == null ) {
+			layout = makeLayout(buffer.format.resolveMapping(currentShader.format));
+			currentShader.layouts.set(buffer.format.uid, layout);
+		}
+		if( layout != currentLayout ) {
+			dx.Driver.iaSetInputLayout(layout);
+			currentLayout = layout;
+		}
+		var map = buffer.format.resolveMapping(currentShader.format);
 		var vbuf = @:privateAccess buffer.vbuf;
-		var start = -1, max = -1, position = 0;
-		var bufOffsets;
-		if( buffer.format == currentShader.format || currentShader.format.isSubSet(buffer.format) )
-			bufOffsets = currentShader.offsets;
-		else
-			bufOffsets = buffer.format.getMatchingOffsets(currentShader.format);
-		for( i in 0...currentShader.inputs.names.length ) {
-			if( currentVBuffers[i] != vbuf.res || offsets[i] != bufOffsets[i] << 2 ) {
+		var start = -1, max = -1;
+		var stride = buffer.format.strideBytes;
+		for( i in 0...map.length ) {
+			var inf = map[i];
+			if( currentVBuffers[i] != vbuf.res || offsets[i] != inf.offset || strides[i] != stride ) {
 				currentVBuffers[i] = vbuf.res;
-				strides[i] = buffer.format.stride << 2;
-				offsets[i] = bufOffsets[i] << 2;
+				strides[i] = stride;
+				offsets[i] = inf.offset;
 				if( start < 0 ) start = i;
 				max = i;
 			}
@@ -1185,21 +1199,25 @@ class DirectXDriver extends h3d.impl.Driver {
 			Driver.iaSetVertexBuffers(start, max - start + 1, currentVBuffers.getRef().offset(start), hl.Bytes.getArray(strides).offset(start << 2), hl.Bytes.getArray(offsets).offset(start << 2));
 	}
 
-	override function selectMultiBuffers(bl:Buffer.BufferOffset) {
+	override function selectMultiBuffers(formats:hxd.BufferFormat.MultiFormat,buffers:Array<Buffer>) {
 		if( hasDeviceError ) return;
-		var index = 0;
-		var start = -1, max = -1;
-		while( bl != null ) {
-			var vbuf = @:privateAccess bl.buffer.vbuf;
-			if( currentVBuffers[index] != vbuf.res || offsets[index] != bl.offset << 2 ) {
-				currentVBuffers[index] = vbuf.res;
-				offsets[index] = bl.offset << 2;
-				strides[index] = bl.buffer.format.stride << 2;
-				if( start < 0 ) start = index;
-				max = index;
+		var layout = currentShader.layouts.get(-formats.uid-1);
+		if( layout == null ) {
+			layout = makeLayout(formats.resolveMapping(currentShader.format));
+			currentShader.layouts.set(-formats.uid-1, layout);
+		}
+		var map = formats.resolveMapping(currentShader.format);
+		var start = -1, max = -1, force = false;
+		for( i in 0...map.length ) {
+			var inf = map[i];
+			var buf = buffers[inf.bufferIndex];
+			if( currentVBuffers[i] != @:privateAccess buf.vbuf.res || offsets[i] != inf.offset || strides[i] != buf.format.strideBytes ) {
+				currentVBuffers[i] = @:privateAccess buf.vbuf.res;
+				strides[i] = buf.format.strideBytes;
+				offsets[i] = inf.offset;
+				if( start < 0 ) start = i;
+				max = i;
 			}
-			index++;
-			bl = bl.next;
 		}
 		if( max >= 0 )
 			Driver.iaSetVertexBuffers(start, max - start + 1, currentVBuffers.getRef().offset(start), hl.Bytes.getArray(strides).offset(start << 2), hl.Bytes.getArray(offsets).offset(start << 2));

+ 1 - 25
h3d/impl/Driver.hx

@@ -117,26 +117,6 @@ enum RenderFlag {
 	CameraHandness;
 }
 
-class InputNames {
-	public var id(default,null) : Int;
-	public var names(default,null) : Array<String>;
-	function new(names) {
-		this.id = UID++;
-		this.names = names;
-	}
-	static var UID = 0;
-	static var CACHE = new Map<String,InputNames>();
-	public static function get( names : Array<String> ) {
-		var key = names.join("|");
-		var i = CACHE.get(key);
-		if( i == null ) {
-			i = new InputNames(names.copy());
-			CACHE.set(key,i);
-		}
-		return i;
-	}
-}
-
 class Driver {
 
 	public var logEnable : Bool;
@@ -210,14 +190,10 @@ class Driver {
 	public function uploadShaderBuffers( buffers : h3d.shader.Buffers, which : h3d.shader.Buffers.BufferKind ) {
 	}
 
-	public function getShaderInputNames() : InputNames {
-		return null;
-	}
-
 	public function selectBuffer( buffer : Buffer ) {
 	}
 
-	public function selectMultiBuffers( buffers : Buffer.BufferOffset ) {
+	public function selectMultiBuffers( format : hxd.BufferFormat.MultiFormat, buffers : Array<h3d.Buffer> ) {
 	}
 
 	public function draw( ibuf : IndexBuffer, startIndex : Int, ntriangles : Int ) {

+ 37 - 40
h3d/impl/GlDriver.hx

@@ -58,7 +58,6 @@ private class CompiledAttribute {
 	public var index : Int;
 	public var type : Int;
 	public var size : Int;
-	public var offset : Int;
 	public var divisor : Int;
 	public function new() {
 	}
@@ -69,9 +68,8 @@ private class CompiledProgram {
 	public var vertex : CompiledShader;
 	public var fragment : CompiledShader;
 	public var format : hxd.BufferFormat;
-	public var inputs : InputNames;
 	public var attribs : Array<CompiledAttribute>;
-	public var hasAttribIndex : Array<Bool>;
+	public var hasAttribIndex : Int;
 	public function new() {
 	}
 }
@@ -247,10 +245,6 @@ class GlDriver extends Driver {
 		curBuffer = null;
 	}
 
-	override function getShaderInputNames() {
-		return curShader.inputs;
-	}
-
 	function makeCompiler() {
 		var glout = new ShaderCompiler();
 		glout.glES = glES;
@@ -410,27 +404,22 @@ class GlDriver extends Driver {
 			firstShader = false;
 			initShader(p, p.vertex, shader.vertex, shader);
 			initShader(p, p.fragment, shader.fragment, shader);
-			var attribNames = [];
 			p.attribs = [];
-			p.hasAttribIndex = [];
+			p.hasAttribIndex = 0;
 			var format : Array<hxd.BufferFormat.BufferInput> = [];
-			var stride = 0;
 			for( v in shader.vertex.data.vars )
 				switch( v.kind ) {
 				case Input:
 					var t = hxd.BufferFormat.InputFormat.fromHXSL(v.type);
 					var index = gl.getAttribLocation(p.p, glout.varNames.exists(v.id) ? glout.varNames.get(v.id) : v.name);
-					if( index < 0 ) {
-						stride += t.getSize();
+					if( index < 0 )
 						continue;
-					}
+					if( index >= 32 )
+						throw "assert";
 					var a = new CompiledAttribute();
 					a.type = GL.FLOAT;
 					a.index = index;
 					a.size = t.getSize();
-					a.offset = stride;
-					stride += a.size;
-
 					switch( v.type ) {
 					case TBytes(n):
 						a.type = GL.BYTE;
@@ -447,13 +436,11 @@ class GlDriver extends Driver {
 							}
 					}
 					p.attribs.push(a);
-					p.hasAttribIndex[a.index] = true;
-					attribNames.push(v.name);
+					p.hasAttribIndex |= 1 << a.index;
 					format.push({ name : v.name, type : t });
 				default:
 				}
 			p.format = hxd.BufferFormat.make(format);
-			p.inputs = InputNames.get(attribNames);
 			programs.set(shader.id, p);
 		}
 		if( curShader == p ) return false;
@@ -475,7 +462,7 @@ class GlDriver extends Driver {
 
 		var lastIdxCurAttribTrue = 0;
 		for( i in 0...maxIdxCurAttribs+1 ) {
-			if( curAttribs[i] && !p.hasAttribIndex[i]) {
+			if( curAttribs[i] && p.hasAttribIndex & (1 << i) == 0) {
 				gl.disableVertexAttribArray(i);
 				curAttribs[i] = false;
 			} else if (curAttribs[i]) {
@@ -1278,37 +1265,47 @@ class GlDriver extends Driver {
 
 		if( curShader == null )
 			throw "No shader selected";
-		curBuffer = b;
-
 		var m = @:privateAccess b.vbuf;
 		#if multidriver
 		if( m.driver != this )
 			throw "Invalid buffer context";
 		#end
 		gl.bindBuffer(GL.ARRAY_BUFFER, m.b);
-		var strideBytes = m.stride * 4;
-		if( b.format == curShader.format || curShader.format.isSubSet(b.format) ) {
-			for( a in curShader.attribs ) {
-				gl.vertexAttribPointer(a.index, a.size, a.type, false, strideBytes, a.offset * 4);
-				updateDivisor(a);
-			}
-		} else {
-			var offsets = b.format.getMatchingOffsets(curShader.format);
-			for( i => a in curShader.attribs ) {
-				gl.vertexAttribPointer(a.index, a.size, a.type, false, strideBytes, offsets[i] * 4);
-				updateDivisor(a);
-			}
+		curBuffer = b;
+
+		var strideBytes = b.format.strideBytes;
+		var map = b.format.resolveMapping(curShader.format);
+		for( i => a in curShader.attribs ) {
+			var inf = map[i];
+			var norm = false;
+			gl.vertexAttribPointer(a.index, a.size, switch( inf.precision ) {
+				case F32: a.type;
+				case F16: GL.HALF_FLOAT;
+				case S8: norm = true; GL.BYTE;
+				case U8: norm = true; GL.UNSIGNED_BYTE;
+			}, norm, strideBytes, inf.offset);
+			updateDivisor(a);
 		}
 	}
 
-	override function selectMultiBuffers( buffers : Buffer.BufferOffset ) {
-		for( a in curShader.attribs ) {
-			gl.bindBuffer(GL.ARRAY_BUFFER, @:privateAccess buffers.buffer.vbuf.b);
-			gl.vertexAttribPointer(a.index, a.size, a.type, false, buffers.buffer.format.stride * 4, buffers.offset * 4);
+	override function selectMultiBuffers( format : hxd.BufferFormat.MultiFormat, buffers : Array<h3d.Buffer> ) {
+		var map = format.resolveMapping(curShader.format);
+		for( i => a in curShader.attribs ) {
+			var inf = map[i];
+			var b = buffers[inf.bufferIndex];
+			if( curBuffer != b ) {
+				gl.bindBuffer(GL.ARRAY_BUFFER, @:privateAccess b.vbuf.b);
+				curBuffer = b;
+			}
+			var norm = false;
+			gl.vertexAttribPointer(a.index, a.size, switch( inf.precision ) {
+			case F32: a.type;
+			case F16: GL.HALF_FLOAT;
+			case S8: norm = true; GL.BYTE;
+			case U8: norm = true; GL.UNSIGNED_BYTE;
+			}, norm, b.format.strideBytes, inf.offset);
 			updateDivisor(a);
-			buffers = buffers.next;
 		}
-		curBuffer = null;
 	}
 
 	override function draw( ibuf : IndexBuffer, startIndex : Int, ntriangles : Int ) {

+ 2 - 6
h3d/impl/LogDriver.hx

@@ -247,18 +247,14 @@ class LogDriver extends Driver {
 		return inf;
 	}
 
-	override function getShaderInputNames() {
-		return d.getShaderInputNames();
-	}
-
 	override function selectBuffer( buffer : Buffer ) {
 		log('SelectBuffer');
 		d.selectBuffer(buffer);
 	}
 
-	override function selectMultiBuffers( buffers : Buffer.BufferOffset ) {
+	override function selectMultiBuffers( formats : hxd.BufferFormat.MultiFormat, buffers : Array<Buffer> ) {
 		log('SelectMultiBuffers');
-		d.selectMultiBuffers(buffers);
+		d.selectMultiBuffers(formats,buffers);
 	}
 
 	override function draw( ibuf : IndexBuffer, startIndex : Int, ntriangles : Int ) {

+ 0 - 8
h3d/impl/NullDriver.hx

@@ -42,14 +42,6 @@ class NullDriver extends Driver {
 		return true;
 	}
 
-	override function getShaderInputNames() : InputNames {
-		var names = [];
-		for( v in cur.vertex.data.vars )
-			if( v.kind == Input )
-				names.push(v.name);
-		return InputNames.get(names);
-	}
-
 	override function allocTexture( t : h3d.mat.Texture ) : Texture {
 		return cast {};
 	}

+ 20 - 35
h3d/prim/HMDModel.hx

@@ -10,7 +10,6 @@ class HMDModel extends MeshPrimitive {
 	var curMaterial : Int;
 	var collider : h3d.col.Collider;
 	var normalsRecomputed : String;
-	var bufferAliases : Map<String,{ realName : String, offset : Int }> = new Map();
 
 	public function new(data, dataPos, lib) {
 		this.data = data;
@@ -18,6 +17,10 @@ class HMDModel extends MeshPrimitive {
 		this.lib = lib;
 	}
 
+	override function hasInput( name : String ) {
+		return super.hasInput(name) || data.vertexFormat.hasInput(name);
+	}
+
 	override function triCount() {
 		return Std.int(data.indexCount / 3);
 	}
@@ -46,24 +49,13 @@ class HMDModel extends MeshPrimitive {
 		lib.loadSkin(data, skin);
 	}
 
-	public function addAlias( name : String, realName : String, offset = 0 ) {
-		var old = bufferAliases.get(name);
-		if( old != null ) {
-			if( old.realName != realName || old.offset != offset ) throw "Conflicting alias "+name;
-			return;
-		}
-		bufferAliases.set(name, {realName : realName, offset : offset });
-		// already allocated !
-		if( bufferCache != null ) allocAlias(name);
-	}
-
 	override function alloc(engine:h3d.Engine) {
 		dispose();
 		buffer = new h3d.Buffer(data.vertexCount, data.vertexFormat);
 
 		var entry = lib.resource.entry;
 
-		var size = data.vertexCount * data.vertexFormat.stride * 4;
+		var size = data.vertexCount * data.vertexFormat.strideBytes;
 		var bytes = entry.fetchBytes(dataPosition + data.vertexPosition, size);
 		buffer.uploadBytes(bytes, 0, data.vertexCount);
 
@@ -80,29 +72,17 @@ class HMDModel extends MeshPrimitive {
 		var bytes = entry.fetchBytes(dataPosition + data.indexPosition, size);
 		indexes.uploadBytes(bytes, 0, indexCount);
 
-		var pos = 0;
-		for( f in data.vertexFormat.getInputs() ) {
-			addBuffer(f.name, buffer, pos);
-			pos += f.type.getSize();
+		if( normalsRecomputed != null ) {
+			var name = normalsRecomputed;
+			normalsRecomputed = null;
+			recomputeNormals(name);
 		}
-
-		if( normalsRecomputed != null )
-			recomputeNormals(normalsRecomputed);
-
-		for( name in bufferAliases.keys() )
-			allocAlias(name);
-	}
-
-	function allocAlias( name : String ) {
-		var alias = bufferAliases.get(name);
-		var buffer = bufferCache.get(hxsl.Globals.allocID(alias.realName));
-		if( buffer == null ) throw "Buffer " + alias.realName+" not found for alias " + name;
-		if( buffer.offset + alias.offset > buffer.buffer.format.stride ) throw "Alias " + name+" for buffer " + alias.realName+" outside stride";
-		addBuffer(name, buffer.buffer, buffer.offset + alias.offset);
 	}
 
 	public function recomputeNormals( ?name : String ) {
 
+		if( normalsRecomputed != null )
+			return;
 		if( name != null && data.vertexFormat.hasInput(name) )
 			return;
 
@@ -157,12 +137,14 @@ class HMDModel extends MeshPrimitive {
 			v[k++] = n.y;
 			v[k++] = n.z;
 		}
-		var buf = h3d.Buffer.ofFloats(v, hxd.BufferFormat.make([{ name : "normal", type : DVec3 }]));
-		addBuffer(name, buf, 0);
+		var buf = h3d.Buffer.ofFloats(v, hxd.BufferFormat.make([{ name : name, type : DVec3 }]));
+		addBuffer(buf);
 		normalsRecomputed = name;
 	}
 
 	public function addTangents() {
+		if( hasInput("tangent") )
+			return;
 		var pos = lib.getBuffers(data, hxd.BufferFormat.POS3D);
 		var ids = new Array();
 		var pts : Array<h3d.col.Point> = [];
@@ -200,7 +182,7 @@ class HMDModel extends MeshPrimitive {
 			v[k++] = t.z;
 		}
 		var buf = h3d.Buffer.ofFloats(v, hxd.BufferFormat.make([{ name : "tangent", type : DVec3 }]));
-		addBuffer("tangent", buf, 0);
+		addBuffer(buf);
 	}
 
 	override function render( engine : h3d.Engine ) {
@@ -210,7 +192,10 @@ class HMDModel extends MeshPrimitive {
 		}
 		if( indexes == null || indexes.isDisposed() )
 			alloc(engine);
-		engine.renderMultiBuffers(getBuffers(engine), indexes, indexesTriPos[curMaterial], Std.int(data.indexCounts[curMaterial]/3));
+		if( buffers == null )
+			engine.renderIndexed(buffer, indexes, indexesTriPos[curMaterial], Std.int(data.indexCounts[curMaterial]/3));
+		else
+			engine.renderMultiBuffers(formats, buffers, indexes, indexesTriPos[curMaterial], Std.int(data.indexCounts[curMaterial]/3));
 		curMaterial = -1;
 	}
 

+ 17 - 25
h3d/prim/Instanced.hx

@@ -1,6 +1,6 @@
 package h3d.prim;
 
-class Instanced extends MeshPrimitive {
+class Instanced extends Primitive {
 
 	public var commands : h3d.impl.InstanceBuffer;
 	public var bounds : h3d.col.Bounds;
@@ -15,26 +15,15 @@ class Instanced extends MeshPrimitive {
 	}
 
 	public function setMesh( m : MeshPrimitive ) {
-		if(refCount > 0) {
-			if(primitive != null) {
+		if( refCount > 0 ) {
+			if( primitive != null )
 				primitive.decref();
-				bufferCache = null;
-			}
 			m.incref();
 		}
 		primitive = m;
-		var engine = h3d.Engine.getCurrent();
-		if( m.buffer == null || m.buffer.isDisposed() ) {
-			m.alloc(engine);
-		}
-		buffer = m.buffer;
-		indexes = m.indexes;
 		baseBounds = m.getBounds();
-		if( indexes == null ) indexes = engine.mem.getTriIndexes(buffer.vertices);
-		for( bid in m.bufferCache.keys() ) {
-			var b = m.bufferCache.get(bid);
-			addBuffer(hxsl.Globals.getIDName(bid), b.buffer, b.offset);
-		}
+		if( m.buffer == null )
+			m.alloc(h3d.Engine.getCurrent()); // make sure first alloc is done
 	}
 
 	public function initBounds() {
@@ -48,7 +37,7 @@ class Instanced extends MeshPrimitive {
 	}
 
 	override function dispose() {
-		// Not owning any resources
+		// Not owning any buffer
 	}
 
 	override function incref() {
@@ -67,15 +56,18 @@ class Instanced extends MeshPrimitive {
 		return bounds;
 	}
 
-	// make public
-	public override function addBuffer( name, buffer, offset = 0 ) {
-		super.addBuffer(name, buffer, offset);
-	}
-
 	override function render( engine : h3d.Engine ) {
-		if( buffer.isDisposed() )
-			setMesh(primitive);
-		engine.renderInstanced(getBuffers(engine),indexes,commands);
+		if( primitive.buffer == null || primitive.buffer.isDisposed() )
+			primitive.alloc(engine);
+		@:privateAccess engine.flushTarget();
+		@:privateAccess if( primitive.buffers == null )
+			engine.driver.selectBuffer(primitive.buffer);
+		else
+			engine.driver.selectMultiBuffers(primitive.formats,primitive.buffers);
+		var indexes = primitive.indexes;
+		if( indexes == null )
+			indexes = engine.mem.getTriIndexes(triCount() * 3);
+		engine.renderInstanced(indexes,commands);
 	}
 
 }

+ 47 - 66
h3d/prim/MeshPrimitive.hx

@@ -2,90 +2,71 @@ package h3d.prim;
 
 class MeshPrimitive extends Primitive {
 
-	var bufferCache : Map<Int,h3d.Buffer.BufferOffset>;
-	var layouts : Map<Int,h3d.Buffer.BufferOffset>;
+	var buffers : Array<h3d.Buffer>;
+	var formats : hxd.BufferFormat.MultiFormat;
 
-	function allocBuffer( engine : h3d.Engine, name : String ) {
-		return null;
+	public function hasInput( name : String ) {
+		return resolveBuffer(name) != null;
 	}
 
-	public function hasBuffer( name : String ) {
-		if( bufferCache == null )
-			return false;
-		return bufferCache.exists(hxsl.Globals.allocID(name));
+	public function resolveBuffer( name : String ) {
+		if( buffers != null ) {
+			for( b in buffers )
+				if( b.format.hasInput(name) )
+					return b;
+			return null;
+		}
+		if( buffer != null && buffer.format.hasInput(name) )
+			return buffer;
+		return null;
 	}
 
-	function getBuffer( name : String ) {
-		if( bufferCache == null )
-			return null;
-		var b = bufferCache.get(hxsl.Globals.allocID(name));
-		return b == null ? null : b.buffer;
+	public function removeBuffer( buf : h3d.Buffer ) {
+		if( buffers != null ) {
+			buffers.remove(buf);
+			if( buf == buffer )
+				buffer = buffers[buffers.length - 1];
+			if( buffers.length == 1 ) {
+				buffers = null;
+				formats = null;
+			}
+		} else if( buffer == buf ) {
+			buffer = null;
+		}
 	}
 
-	function addBuffer( name : String, buf, offset = 0 ) {
-		if( bufferCache == null )
-			bufferCache = new Map();
-		var id = hxsl.Globals.allocID(name);
-		var old = bufferCache.get(id);
-		if( old != null ) {
-			// don't dispose the buffer if it's used by another attribute
-			var inUse = old.buffer == buf;
-			for( b in bufferCache )
-				if( b != old && b.buffer == old.buffer ) {
-					inUse = true;
-					break;
-				}
-			if( !inUse )
-				old.dispose();
+	public function addBuffer( buf : h3d.Buffer ) {
+		if( buffer == null )
+			buffer = buf;
+		else {
+			if( buffers == null ) {
+				if( buf == buffer ) throw "Duplicate addBuffer()";
+				buffers = [buffer];
+			} else if( buffers.indexOf(buf) >= 0 )
+				throw "Duplicate addBuffer()";
+			buffers.unshift(buf);
+			formats = hxd.BufferFormat.MultiFormat.make([for( b in buffers ) b.format]);
 		}
-		bufferCache.set(id, new h3d.Buffer.BufferOffset(buf, offset));
-		layouts = null;
 	}
 
+
 	override public function dispose() {
 		super.dispose();
-		if( bufferCache != null )
-			for( b in bufferCache )
+		if( buffers != null ) {
+			for( b in buffers )
 				b.dispose();
-		bufferCache = null;
-		layouts = null;
-	}
-
-	function getBuffers( engine : h3d.Engine ) {
-		if( bufferCache == null )
-			bufferCache = new Map();
-		if( layouts == null )
-			layouts = new Map();
-		var inputs = @:privateAccess engine.driver.getShaderInputNames();
-		var buffers = layouts.get(inputs.id);
-		if( buffers != null )
-			return buffers;
-		var prev = null;
-		for( name in inputs.names ) {
-			var id = hxsl.Globals.allocID(name);
-			var b = bufferCache.get(id);
-			if( b == null ) {
-				b = allocBuffer(engine, name);
-				if( b == null ) throw "Buffer " + name + " is not available";
-				bufferCache.set(id, b);
-			}
-			b = b.clone();
-			if( prev == null ) {
-				buffers = prev = b;
-			} else {
-				prev.next = b;
-				prev = b;
-			}
+			buffers = null;
+			formats = null;
 		}
-		layouts.set(inputs.id, buffers);
-		return buffers;
 	}
 
 	override function render( engine : h3d.Engine ) {
-		// the actual alloc() cache will be implemented by subclasses
-		if( indexes == null || indexes.isDisposed() )
+		if( indexes == null || indexes.isDisposed() || buffer == null || buffer.isDisposed() )
 			alloc(engine);
-		engine.renderMultiBuffers(getBuffers(engine), indexes);
+		if( buffers != null )
+			engine.renderMultiBuffers(formats, buffers, indexes);
+		else
+			engine.renderIndexed(buffer, indexes);
 	}
 
 }

+ 8 - 14
h3d/prim/Polygon.hx

@@ -70,13 +70,6 @@ class Polygon extends MeshPrimitive {
 			}
 		}
 		buffer = h3d.Buffer.ofFloats(buf, format);
-
-		var position = 0;
-		for( i in format.getInputs() ) {
-			addBuffer(i.name, buffer, position);
-			position += i.type.getSize();
-		}
-
 		if( idx != null )
 			indexes = h3d.Indexes.alloc(idx);
 	}
@@ -264,13 +257,14 @@ class Polygon extends MeshPrimitive {
 	override function render( engine : h3d.Engine ) {
 		if( buffer == null || buffer.isDisposed() )
 			alloc(engine);
-		var bufs = getBuffers(engine);
-		if( indexes != null )
-			engine.renderMultiBuffers(bufs, indexes);
-		else {
-			var count = triCount();
-			engine.renderMultiBuffers(bufs, engine.mem.getTriIndexes(count*3), 0, count);
-		}
+		var indexes = indexes;
+		var count = triCount();
+		if( indexes == null )
+			indexes = engine.mem.getTriIndexes(count*3);
+		if( buffers != null )
+			engine.renderMultiBuffers(formats, buffers, indexes, 0, count);
+		else
+			engine.renderIndexed(buffer, indexes, 0, count);
 	}
 
 }

+ 8 - 6
h3d/scene/MeshBatch.hx

@@ -328,7 +328,7 @@ class MeshBatch extends MultiMaterial {
 	}
 
 	static var VEC4_FMT = hxd.BufferFormat.make([{ name : "data", type : DVec4 }]);
-	static var SINGLE_FLOAT_FMT = hxd.BufferFormat.make([{ name : "data", type : DFloat }]);
+	static var BATCH_START_FMT = hxd.BufferFormat.make([{ name : "Batch_Start", type : DFloat }]);
 
 	override function sync(ctx:RenderContext) {
 		super.sync(ctx);
@@ -372,16 +372,18 @@ class MeshBatch extends MultiMaterial {
 			p = p.next;
 		}
 		if( psBytes != null ) {
-			var prim = cast(primitive,h3d.prim.MeshPrimitive);
-			var offsets = @:privateAccess prim.getBuffer("Batch_Start");
+			var offsets = @:privateAccess instanced.primitive.resolveBuffer("Batch_Start");
 			if( offsets == null || offsets.vertices < instanceCount || offsets.isDisposed() ) {
-				if( offsets != null ) offsets.dispose();
+				if( offsets != null ) {
+					offsets.dispose();
+					@:privateAccess instanced.primitive.removeBuffer(offsets);
+				}
 				var tmp = haxe.io.Bytes.alloc(4 * instanceCount);
 				for( i in 0...instanceCount )
 					tmp.setFloat(i<<2, i);
-				offsets = new h3d.Buffer(instanceCount, SINGLE_FLOAT_FMT);
+				offsets = new h3d.Buffer(instanceCount, BATCH_START_FMT);
 				offsets.uploadBytes(tmp,0,instanceCount);
-				@:privateAccess prim.addBuffer("Batch_Start", offsets);
+				@:privateAccess instanced.primitive.addBuffer(offsets);
 			}
 		}
 		needUpload = false;

+ 212 - 20
hxd/BufferFormat.hx

@@ -1,5 +1,34 @@
 package hxd;
 
+
+enum abstract Precision(Int) {
+	var F32 = 0;
+	var F16 = 1;
+	var U8 = 2;
+	var S8 = 3;
+	inline function new(v) {
+		this = v;
+	}
+	public inline function getSize() {
+		return SIZES[this];
+	}
+	public inline function toInt() {
+		return this;
+	}
+	static inline function fromInt( v : Int ) : Precision {
+		return new Precision(v);
+	}
+	public function toString() {
+		return switch( new Precision(this) ) {
+		case F32: "F32";
+		case F16: "F16";
+		case U8: "U8";
+		case S8: "S8";
+		}
+	}
+	static var SIZES = [4,2,1,1];
+}
+
 enum abstract InputFormat(Int) {
 
 	public var DFloat = 1;
@@ -51,26 +80,70 @@ enum abstract InputFormat(Int) {
 class BufferInput {
 	public var name(default,null) : String;
 	public var type(default,null) : InputFormat;
-	public inline function new( name : String, type : InputFormat ) {
+	public var precision(default,null) : Precision;
+	public inline function new( name : String, type : InputFormat, precision = F32 ) {
 		this.name = name;
 		this.type = type;
+		this.precision = precision;
+	}
+	public inline function getBytesSize() {
+		return type.getSize() * precision.getSize();
+	}
+	public inline function equals(b:BufferInput) {
+		return type == b.type && name == b.name && precision == b.precision;
 	}
 }
 
+abstract BufferMapping(Int) {
+	public var bufferIndex(get,never) : Int;
+	public var offset(get,never) : Int;
+	public var precision(get,never) : Precision;
+	public function new(index,offset,prec:Precision) {
+		this = (index << 3) | prec.toInt() | (offset << 16);
+	}
+	inline function get_bufferIndex() return (this >> 3) & 0xFF;
+	inline function get_precision() return @:privateAccess new Precision(this & 7);
+	inline function get_offset() return this >> 16;
+}
+
 class BufferFormat {
 
 	static var _UID = 0;
-	public var uid : Int;
+	public var uid(default,null) : Int;
 	public var stride(default,null) : Int;
+	public var strideBytes(default,null) : Int;
 	var inputs : Array<BufferInput>;
-	var offsets : Map<Int, Array<Int>>;
+	var mappings : Array<Array<BufferMapping>>;
 
 	function new( inputs : Array<BufferInput> ) {
 		uid = _UID++;
 		stride = 0;
 		this.inputs = inputs.copy();
-		for( i in inputs )
+		for( i in inputs ) {
 			stride += i.type.getSize();
+			strideBytes += i.getBytesSize();
+			// 4 bytes align
+			if( strideBytes & 3 != 0 )
+				strideBytes += 4 - (strideBytes & 3);
+		}
+	}
+
+	public function getInput( name : String ) {
+		for( i in inputs )
+			if( i.name == name )
+				return i;
+		return null;
+	}
+
+	public function calculateInputOffset( name : String ) {
+		var offset = 0;
+		for( i in inputs ) {
+			if( i.name == name )
+				return offset;
+			offset += i.getBytesSize();
+			if( offset & 3 != 0 ) offset += 4 - (offset & 3);
+		}
+		throw "Input not found : "+name;
 	}
 
 	public function hasInput( name : String, ?type : InputFormat ) {
@@ -106,32 +179,35 @@ class BufferFormat {
 		return true;
 	}
 
-	public function getMatchingOffsets( target : BufferFormat ) {
-		var offs = offsets == null ? null : offsets.get(target.uid);
-		if( offs != null )
-			return offs;
-		offs = [];
+	public function resolveMapping( target : BufferFormat ) {
+		var m = mappings == null ? null : mappings[target.uid];
+		if( m != null )
+			return m;
+		m = [];
 		for( i in target.inputs ) {
-			var v = 0;
+			var found = false;
 			for( i2 in inputs ) {
-				if( i2.name == i.name ) {
-					offs.push(v);
-					v = -1;
+				if( i2.name == i.name && i2.type == i.type ) {
+					m.push(new BufferMapping(0,calculateInputOffset(i2.name),i2.precision));
+					found = true;
 					break;
 				}
-				v += i2.type.getSize();
 			}
-			if( v >= 0 ) throw "Missing buffer input '"+i.name+"'";
+			if( !found ) throw "Missing buffer input '"+i.name+"'";
 		}
-		if( offsets == null ) offsets = new Map();
-		offsets.set(target.uid, offs);
-		return offs;
+		if( mappings == null ) mappings = [];
+		mappings[target.uid] = m;
+		return m;
 	}
 
 	public inline function getInputs() {
 		return inputs.iterator();
 	}
 
+	public function toString() {
+		return [for( i in inputs ) i.name+":"+i.type.toString()+(i.precision == F32?"":"."+i.precision.toString().toLowerCase())].toString();
+	}
+
 	/**
 		Alias for XY_UV_RGBA
 	**/
@@ -188,7 +264,7 @@ class BufferFormat {
 		for( fmt in arr ) {
 			var found = true;
 			for( i in 0...inputs.length )
-				if( inputs[i].type != fmt.inputs[i].type ) {
+				if( !inputs[i].equals(fmt.inputs[i]) ) {
 					found = false;
 					break;
 				}
@@ -200,4 +276,120 @@ class BufferFormat {
 		return fmt;
 	}
 
-}
+	public static function float32to16( v : Float, denormalsAreZero : Bool = false ) : Int {
+		var i = haxe.io.FPHelper.floatToI32(v);
+		var sign = (i & 0x80000000) >>> 16;
+		var exp = (i & 0x7f800000) >>> 23;
+		var bits = i & 0x7FFFFF;
+		if( exp > 112 )
+			return sign | (((exp - 112) << 10)&0x7C00) | (bits>>13);
+		if( exp < 113 && exp > 101 && !denormalsAreZero )
+			return sign | ((((0x7FF000+bits)>>(125-exp))+1)>>1);
+		if( exp > 143 )
+			return sign | 0x7FFF;
+		return 0;
+	}
+
+	public static function float16to32( v : Int ) : Float {
+		var sign = (v & 0x8000) << 16;
+		var bits = (v & 0x3FF) << 13;
+		var exp = (v & 0x7C00) >> 10;
+		if( exp != 0 )
+			return haxe.io.FPHelper.i32ToFloat(sign | ((exp + 112) << 23) | bits);
+		if( bits == 0 )
+			return 0;
+		var bitcount = haxe.io.FPHelper.floatToI32(bits) >> 23; // hack to get exp (number of leading zeros)
+		return haxe.io.FPHelper.i32ToFloat(sign | ((bitcount - 37) << 23) | ((bits<<(150-bitcount))&0x7FE000));
+	}
+
+	public static function float32toS8( v : Float ) : Int {
+		if( v >= 1 )
+			return 0x7F;
+		if( v <= -1 )
+			return 0x80;
+		var i = Math.floor(v * 128);
+		return v >= 0 ? i : (0x7F - i) | 0x80;
+	}
+
+	public static function floatS8to32( v : Int ) {
+		return (v & 0x80 != 0 ? -1 : 1) * ((v&0x7F)/127);
+	}
+
+	public static function float32toU8( v : Float ) : Int {
+		if( v < 0 )
+			return 0;
+		if( v >= 1 )
+			return 0xFF;
+		return Math.floor(v * 256);
+	}
+
+	public inline static function floatU8to32( v : Int ) {
+		return (v & 0xFF) / 255;
+	}
+
+}
+
+typedef MultiFormatCache = Map<Int, { found : MultiFormat, nexts : MultiFormatCache }>;
+
+class MultiFormat {
+
+	static var UID = 0;
+	static var CACHE = new MultiFormatCache();
+
+	static var _UID = 0;
+	public var uid(default,null) : Int;
+	var formats : Array<BufferFormat>;
+	var mappings : Array<Array<BufferMapping>> = [];
+
+	function new( formats : Array<BufferFormat> ) {
+		uid = _UID++;
+		this.formats = formats;
+	}
+
+	public inline function resolveMapping( format : hxd.BufferFormat ) {
+		var m = mappings[format.uid];
+		if( m == null )
+			m = makeMapping(format);
+		return m;
+	}
+
+	function makeMapping( format : hxd.BufferFormat ) {
+		var m = [];
+		for( input in format.getInputs() ) {
+			var found = false;
+			for( idx => f in formats ) {
+				var i = f.getInput(input.name);
+				if( i != null && i.type == input.type ) {
+					var offset = f.calculateInputOffset(i.name);
+					m.push(new BufferMapping(idx,offset,i.precision));
+					found = true;
+					break;
+				}
+			}
+			if( !found )
+				throw "Missing shader buffer "+input.name;
+		}
+		mappings[format.uid] = m;
+		return m;
+	}
+
+	public static var MAX_FORMATS = 16;
+	public static function make( formats : Array<BufferFormat> ) : MultiFormat {
+		if( formats.length > MAX_FORMATS )
+			throw "Too many formats (addBuffer leak?) "+[for( f in formats ) f.toString()];
+		var c = { found : null, nexts : CACHE };
+		for( f in formats ) {
+			var c2 = c.nexts.get(f.uid);
+			if( c2 == null ) {
+				c2 = { found : null, nexts : new Map() };
+				c.nexts.set(f.uid, c2);
+			}
+			c = c2;
+		}
+		if( c.found == null )
+			c.found = new MultiFormat(formats);
+		return c.found;
+	}
+
+}
+