5 tháng trước cách đây · 4312baccfb
--- a/h3d/scene/GPUMeshBatch.hx
+++ b/h3d/scene/GPUMeshBatch.hx
@@ -0,0 +1,352 @@
 
				+package h3d.scene;
			
 
				+
			
 
				+import h3d.scene.MeshBatch.BatchData;
			
 
				+import h3d.scene.MeshBatch.MeshBatchPart;
			
 
				+
			
 
				+class GPUMeshBatch extends MeshBatch {
			
 
				+
			
 
				+	static var INDIRECT_DRAW_ARGUMENTS_FMT = hxd.BufferFormat.make([{ name : "", type : DVec4 }, { name : "", type : DFloat }]);
			
 
				+	static var INSTANCE_OFFSETS_FMT = hxd.BufferFormat.make([{ name : "", type : DFloat }]);
			
 
				+
			
 
				+	var matInfos : h3d.Buffer;
			
 
				+	var emittedSubParts : Array<MeshBatch.MeshBatchPart>;
			
 
				+	var currentSubParts : Int;
			
 
				+	var currentMaterialOffset : Int;
			
 
				+	var instanceOffsetsCpu : haxe.io.Bytes;
			
 
				+	var instanceOffsetsGpu : h3d.Buffer;
			
 
				+	var subPartsInfos : h3d.Buffer;
			
 
				+	var countBytes : haxe.io.Bytes;
			
 
				+	var materialCount : Int;
			
 
				+
			
 
				+	var gpuLodEnabled : Bool;
			
 
				+	var gpuCullingEnabled : Bool;
			
 
				+
			
 
				+	/**
			
 
				+	* If set, clip all instanced behind this distance.
			
 
				+	*/
			
 
				+	public var maxDistance : Float = -1;
			
 
				+
			
 
				+	public function new(primitive, ?material, ?parent) {
			
 
				+		super(primitive, material, parent);
			
 
				+
			
 
				+		#if (js || (hldx && !dx12))
			
 
				+		throw "Not available on this platform";
			
 
				+		#end
			
 
				+
			
 
				+		enableGpuUpdate();
			
 
				+	}
			
 
				+
			
 
				+	/**
			
 
				+	 * Enable lod selection at each frame on the gpu using a compute shader.
			
 
				+	 * Has effects only if a lod is available in the primitive.
			
 
				+	 */
			
 
				+	public function enableGpuLod() {
			
 
				+		gpuLodEnabled = primitiveSubPart != null || getPrimitive().lodCount() > 1;
			
 
				+		return gpuLodEnabled;
			
 
				+	}
			
 
				+
			
 
				+	/**
			
 
				+	 * Enable per instance frustum culling on the gpu using a compute shader.
			
 
				+	 */
			
 
				+	public function enableGpuCulling() {
			
 
				+		gpuCullingEnabled = true;
			
 
				+	}
			
 
				+
			
 
				+	function getLodCount() return gpuLodEnabled ? getPrimitive().lodCount() : 1;
			
 
				+	override function hasOffset() return true;
			
 
				+
			
 
				+	override function begin( emitCountTip = -1) {
			
 
				+		if ( !gpuLodEnabled && !gpuCullingEnabled )
			
 
				+			throw "No need to create a GPUMeshBatch without gpu lod nor gpu culling, create a regular MeshBatch instead";
			
 
				+
			
 
				+		emitCountTip = super.begin(emitCountTip);
			
 
				+
			
 
				+		if ( primitiveSubPart != null && ( gpuCullingEnabled || gpuLodEnabled ) && instanceOffsetsCpu == null ) {
			
 
				+			var size = emitCountTip * 2 * 4;
			
 
				+			instanceOffsetsCpu = haxe.io.Bytes.alloc(size);
			
 
				+		}
			
 
				+
			
 
				+		return emitCountTip;
			
 
				+	}
			
 
				+
			
 
				+	override function createBatchData() {
			
 
				+		return new GPUBatchData();
			
 
				+	}
			
 
				+
			
 
				+	override function emitPrimitiveSubPart() {
			
 
				+		if (emittedSubParts == null) {
			
 
				+			currentSubParts = 0;
			
 
				+			currentMaterialOffset = 0;
			
 
				+			emittedSubParts = [ primitiveSubPart.clone() ];
			
 
				+		} else {
			
 
				+			var currentIndexStart = emittedSubParts[currentSubParts].indexStart;
			
 
				+			if ( currentIndexStart != primitiveSubPart.indexStart  ) {
			
 
				+				currentSubParts = -1;
			
 
				+				currentIndexStart = primitiveSubPart.indexStart;
			
 
				+				currentMaterialOffset = 0;
			
 
				+				for ( i => part in emittedSubParts ) {
			
 
				+					if ( part.indexStart == currentIndexStart ) {
			
 
				+						currentSubParts = i;
			
 
				+						break;
			
 
				+					}
			
 
				+					currentMaterialOffset += part.lodIndexCount.length + 1;
			
 
				+				}
			
 
				+				if ( currentSubParts < 0 ) {
			
 
				+					currentSubParts = emittedSubParts.length;
			
 
				+					emittedSubParts.push( primitiveSubPart.clone() );
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		var maxInstanceID = ( instanceCount + 1 ) * 2;
			
 
				+		if ( instanceOffsetsCpu.length < maxInstanceID * 4 ) {
			
 
				+			var next = haxe.io.Bytes.alloc(Std.int(instanceOffsetsCpu.length*3/2));
			
 
				+			next.blit(0, instanceOffsetsCpu, 0, instanceOffsetsCpu.length);
			
 
				+			instanceOffsetsCpu = next;
			
 
				+		}
			
 
				+		instanceOffsetsCpu.setInt32((instanceCount * 2 + 0) * 4, currentMaterialOffset);
			
 
				+		instanceOffsetsCpu.setInt32((instanceCount * 2 + 1) * 4, currentSubParts);
			
 
				+	}
			
 
				+
			
 
				+	override function flush() {
			
 
				+		var alloc = hxd.impl.Allocator.get();
			
 
				+		var lodCount = getLodCount();
			
 
				+		materialCount = materials.length;
			
 
				+		var prim = getPrimitive();
			
 
				+		var hmd = Std.downcast(prim, h3d.prim.HMDModel);
			
 
				+
			
 
				+		if ( emittedSubParts != null ) {
			
 
				+			var upload = needUpload;
			
 
				+			var vertex = instanceCount * 2;
			
 
				+			if ( instanceOffsetsGpu == null || instanceOffsetsGpu.isDisposed() || vertex > instanceOffsetsGpu.vertices ) {
			
 
				+				if ( instanceOffsetsGpu != null)
			
 
				+					alloc.disposeBuffer( instanceOffsetsGpu );
			
 
				+				instanceOffsetsGpu = alloc.allocBuffer( vertex, INSTANCE_OFFSETS_FMT, UniformReadWrite );
			
 
				+				upload = true;
			
 
				+			}
			
 
				+			if ( upload )
			
 
				+				instanceOffsetsGpu.uploadBytes( instanceOffsetsCpu, 0, vertex );
			
 
				+
			
 
				+			if ( matInfos == null ) {
			
 
				+				materialCount = 0;
			
 
				+				var tmpSubPartInfos = alloc.allocFloats( 2 * emittedSubParts.length );
			
 
				+				var pos = 0;
			
 
				+				for ( subPart in emittedSubParts ) {
			
 
				+					var lodCount = subPart.lodIndexCount.length + 1;
			
 
				+					tmpSubPartInfos[pos++] = lodCount;
			
 
				+					tmpSubPartInfos[pos++] = subPart.bounds.dimension() * 0.5;
			
 
				+					materialCount += lodCount;
			
 
				+				}
			
 
				+				subPartsInfos = alloc.ofFloats( tmpSubPartInfos, hxd.BufferFormat.VEC4_DATA, Uniform );
			
 
				+				alloc.disposeFloats(tmpSubPartInfos);
			
 
				+
			
 
				+				var tmpMatInfos = alloc.allocFloats( 4 * ( materialCount + emittedSubParts.length ) );
			
 
				+				pos = 0;
			
 
				+				for ( subPart in emittedSubParts ) {
			
 
				+					var maxLod = subPart.lodIndexCount.length;
			
 
				+					var lodConfig = subPart.lodConfig;
			
 
				+					tmpMatInfos[pos++] = subPart.indexCount;
			
 
				+					tmpMatInfos[pos++] = subPart.indexStart;
			
 
				+					tmpMatInfos[pos++] = ( 0 < lodConfig.length ) ? lodConfig[0] : 0.0;
			
 
				+					tmpMatInfos[pos++] = ( maxLod < lodConfig.length && maxLod > 0 ) ? lodConfig[lodConfig.length - 1] : 0.0;
			
 
				+					for ( i in 0...maxLod ) {
			
 
				+						tmpMatInfos[pos++] = subPart.lodIndexCount[i];
			
 
				+						tmpMatInfos[pos++] = subPart.lodIndexStart[i];
			
 
				+						tmpMatInfos[pos++] = ( i + 1 < lodConfig.length ) ? lodConfig[i + 1] : 0.0;
			
 
				+						pos++;
			
 
				+					}
			
 
				+				}
			
 
				+
			
 
				+				matInfos = alloc.ofFloats( tmpMatInfos, hxd.BufferFormat.VEC4_DATA, Uniform );
			
 
				+				alloc.disposeFloats(tmpMatInfos);
			
 
				+			}
			
 
				+		} else if ( matInfos == null ) {
			
 
				+			if ( gpuLodEnabled ) {
			
 
				+				var tmpMatInfos = alloc.allocFloats( 4 * materialCount * lodCount );
			
 
				+				matInfos = alloc.allocBuffer( materialCount * lodCount, hxd.BufferFormat.VEC4_DATA, Uniform );
			
 
				+				var lodConfig = hmd.getLodConfig();
			
 
				+				var startIndex : Int = 0;
			
 
				+				var lodConfigHasCulling = lodConfig.length > lodCount - 1;
			
 
				+				var minScreenRatioCulling = lodConfigHasCulling ? lodConfig[lodConfig.length-1] : 0.0;
			
 
				+				for ( i => lod in @:privateAccess hmd.lods ) {
			
 
				+					for ( j in 0...materialCount ) {
			
 
				+						var indexCount = lod.indexCounts[j];
			
 
				+						var matIndex = i + j * lodCount;
			
 
				+						tmpMatInfos[matIndex * 4 + 0] = indexCount;
			
 
				+						tmpMatInfos[matIndex * 4 + 1] = startIndex;
			
 
				+						tmpMatInfos[matIndex * 4 + 2] = ( i < lodConfig.length ) ? lodConfig[i] : 0.0;
			
 
				+						tmpMatInfos[matIndex * 4 + 3] = minScreenRatioCulling;
			
 
				+						startIndex += indexCount;
			
 
				+					}
			
 
				+				}
			
 
				+				matInfos.uploadFloats( tmpMatInfos, 0, materialCount * lodCount );
			
 
				+				alloc.disposeFloats( tmpMatInfos );
			
 
				+			} else {
			
 
				+				var tmpMatInfos = alloc.allocFloats( 4 * materialCount );
			
 
				+				matInfos = alloc.allocBuffer( materialCount, hxd.BufferFormat.VEC4_DATA, Uniform );
			
 
				+				var pos : Int = 0;
			
 
				+				for ( i in 0...materials.length ) {
			
 
				+					tmpMatInfos[pos++] = prim.getMaterialIndexCount(i);
			
 
				+					tmpMatInfos[pos++] = prim.getMaterialIndexStart(i);
			
 
				+					pos += 2;
			
 
				+				}
			
 
				+				matInfos.uploadFloats( tmpMatInfos, 0, materialCount );
			
 
				+				alloc.disposeFloats( tmpMatInfos );
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		super.flush();
			
 
				+
			
 
				+		materialCount = 0;
			
 
				+	}
			
 
				+
			
 
				+	override function onFlushBuffer(p : BatchData, index : Int, count : Int) {
			
 
				+		var p = cast(p, GPUBatchData);
			
 
				+		var alloc = hxd.impl.Allocator.get();
			
 
				+
			
 
				+		var commandCountAllocated = hxd.Math.imin( hxd.Math.nextPOT( count ), p.maxInstance );
			
 
				+		if ( p.commandBuffers == null) {
			
 
				+			p.commandBuffers = [];
			
 
				+			p.countBuffers = [];
			
 
				+		}
			
 
				+		var buf = p.commandBuffers[index];
			
 
				+		var cbuf = p.countBuffers[index];
			
 
				+		if ( buf == null ) {
			
 
				+			buf = alloc.allocBuffer( commandCountAllocated, INDIRECT_DRAW_ARGUMENTS_FMT, UniformReadWrite );
			
 
				+			cbuf = alloc.allocBuffer( 1, hxd.BufferFormat.VEC4_DATA, UniformReadWrite );
			
 
				+			p.commandBuffers[index] = buf;
			
 
				+			p.countBuffers[index] = cbuf;
			
 
				+		}
			
 
				+		else if ( buf.vertices < commandCountAllocated ) {
			
 
				+			alloc.disposeBuffer( buf );
			
 
				+			buf = alloc.allocBuffer( commandCountAllocated, INDIRECT_DRAW_ARGUMENTS_FMT, UniformReadWrite );
			
 
				+			p.commandBuffers[index] = buf;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	override function onFlushPass(p : BatchData) {
			
 
				+		var p = cast(p, GPUBatchData);
			
 
				+		var prim = getPrimitive();
			
 
				+		var lodCount = getLodCount();
			
 
				+
			
 
				+		var computeShader;
			
 
				+		if( p.computePass == null ) {
			
 
				+			computeShader = new h3d.shader.InstanceIndirect();
			
 
				+			var computePass = new h3d.mat.Pass("batchUpdate");
			
 
				+			computePass.addShader(computeShader);
			
 
				+			addComputeShaders(computePass);
			
 
				+			p.computePass = computePass;
			
 
				+		} else {
			
 
				+			computeShader = p.computePass.getShader(h3d.shader.InstanceIndirect);
			
 
				+		}
			
 
				+
			
 
				+		computeShader.ENABLE_LOD = gpuLodEnabled;
			
 
				+		computeShader.ENABLE_CULLING = gpuCullingEnabled;
			
 
				+		computeShader.ENABLE_DISTANCE_CLIPPING = maxDistance >= 0;
			
 
				+		computeShader.radius = prim.getBounds().dimension() * 0.5;
			
 
				+		computeShader.maxDistance = maxDistance;
			
 
				+		computeShader.matInfos = matInfos;
			
 
				+		computeShader.lodCount = lodCount;
			
 
				+		computeShader.materialCount = materialCount;
			
 
				+		computeShader.MAX_MATERIAL_COUNT = 16;
			
 
				+		while ( materialCount * lodCount > computeShader.MAX_MATERIAL_COUNT )
			
 
				+			computeShader.MAX_MATERIAL_COUNT = computeShader.MAX_MATERIAL_COUNT + 16;
			
 
				+
			
 
				+		if ( emittedSubParts != null ) {
			
 
				+			computeShader.USING_SUB_PART = true;
			
 
				+			computeShader.subPartCount = emittedSubParts.length;
			
 
				+			computeShader.subPartInfos = subPartsInfos;
			
 
				+			computeShader.instanceOffsets = instanceOffsetsGpu;
			
 
				+			computeShader.MAX_SUB_PART_BUFFER_ELEMENT_COUNT = 16;
			
 
				+			var maxSubPartsElement = hxd.Math.ceil( emittedSubParts.length / 2 );
			
 
				+			while ( maxSubPartsElement > computeShader.MAX_SUB_PART_BUFFER_ELEMENT_COUNT )
			
 
				+				computeShader.MAX_SUB_PART_BUFFER_ELEMENT_COUNT = computeShader.MAX_SUB_PART_BUFFER_ELEMENT_COUNT + 16;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	function addComputeShaders( pass : h3d.mat.Pass ) {}
			
 
				+
			
 
				+	override function emitPass(ctx : RenderContext, p : BatchData) {
			
 
				+		var p = cast(p, GPUBatchData);
			
 
				+		var emittedCount = 0;
			
 
				+		for( i => buf in p.buffers ) {
			
 
				+			ctx.emitPass(p.pass, this).index = i | (p.matIndex << 16);
			
 
				+			if ( p.commandBuffers != null && p.commandBuffers.length > 0 ) {
			
 
				+				var count = hxd.Math.imin( instanceCount - p.maxInstance * i, p.maxInstance);
			
 
				+				var computeShader = p.computePass.getShader(h3d.shader.InstanceIndirect);
			
 
				+				if ( gpuCullingEnabled )
			
 
				+					computeShader.frustum = ctx.getCameraFrustumBuffer();
			
 
				+				computeShader.instanceData = buf;
			
 
				+				computeShader.matIndex = p.matIndex;
			
 
				+				computeShader.commandBuffer = p.commandBuffers[i];
			
 
				+				if ( countBytes == null ) {
			
 
				+					countBytes = haxe.io.Bytes.alloc(4*4);
			
 
				+					countBytes.setInt32(0, 0);
			
 
				+				}
			
 
				+				p.countBuffers[i].uploadBytes(countBytes, 0, 1);
			
 
				+				computeShader.countBuffer = p.countBuffers[i];
			
 
				+				computeShader.startInstanceOffset = emittedCount;
			
 
				+				computeShader.ENABLE_COUNT_BUFFER = isCountBufferAllowed();
			
 
				+				ctx.computeList(@:privateAccess p.computePass.shaders);
			
 
				+				ctx.computeDispatch(count);
			
 
				+				emittedCount += count;
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	override function setPassCommand(p : BatchData, bufferIndex : Int) {
			
 
				+		super.setPassCommand(p, bufferIndex);
			
 
				+		var p = cast(p, GPUBatchData);
			
 
				+		if ( p.commandBuffers != null && p.commandBuffers.length > 0 ) {
			
 
				+			@:privateAccess instanced.commands.data = p.commandBuffers[bufferIndex].vbuf;
			
 
				+			@:privateAccess instanced.commands.countBuffer = p.countBuffers[bufferIndex].vbuf;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	inline function isCountBufferAllowed() {
			
 
				+		#if hlsdl
			
 
				+		return h3d.impl.GlDriver.hasMultiIndirectCount;
			
 
				+		#else
			
 
				+		return true;
			
 
				+		#end
			
 
				+	}
			
 
				+
			
 
				+	override function cleanPasses() {
			
 
				+		super.cleanPasses();
			
 
				+
			
 
				+		var alloc = hxd.impl.Allocator.get();
			
 
				+		if ( matInfos != null ) {
			
 
				+			alloc.disposeBuffer(matInfos);
			
 
				+			matInfos = null;
			
 
				+		}
			
 
				+
			
 
				+		if ( subPartsInfos != null )
			
 
				+			alloc.disposeBuffer(subPartsInfos);
			
 
				+
			
 
				+		if ( instanceOffsetsGpu != null )
			
 
				+			alloc.disposeBuffer(instanceOffsetsGpu);
			
 
				+		instanceOffsetsCpu = null;
			
 
				+
			
 
				+		emittedSubParts = null;
			
 
				+		countBytes = null;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+class GPUBatchData extends BatchData {
			
 
				+	public var computePass : h3d.mat.Pass;
			
 
				+	public var commandBuffers : Array<h3d.Buffer>;
			
 
				+	public var countBuffers : Array<h3d.Buffer>;
			
 
				+
			
 
				+	override function clean() {
			
 
				+		super.clean();
			
 
				+
			
 
				+		var alloc = hxd.impl.Allocator.get();
			
 
				+		if ( commandBuffers != null && commandBuffers.length > 0 ) {
			
 
				+			for ( buf in commandBuffers )
			
 
				+				alloc.disposeBuffer(buf);
			
 
				+			commandBuffers.resize(0);
			
 
				+			for ( buf in countBuffers )
			
 
				+				alloc.disposeBuffer(buf);
			
 
				+			countBuffers.resize(0);
			
 
				+		}
			
 
				+	}
			
 
				+}
			
--- a/h3d/scene/MeshBatch.hx
+++ b/h3d/scene/MeshBatch.hx
@@ -1,56 +1,6 @@
 
				 package h3d.scene;

			
 
				-class BatchData {

			
 
				-

			
 
				-	public var paramsCount : Int;

			
 
				-	public var maxInstance : Int;

			
 
				-	public var matIndex : Int;

			
 
				-	public var indexCount : Int;

			
 
				-	public var indexStart : Int;

			
 
				-	public var instanceBuffers : Array<h3d.impl.InstanceBuffer>;

			
 
				-	public var buffers : Array<h3d.Buffer> = [];

			
 
				-	public var bufferFormat : hxd.BufferFormat;

			
 
				-	public var data : hxd.FloatBuffer;

			
 
				-	public var params : hxsl.RuntimeShader.AllocParam;

			
 
				-	public var shader : hxsl.BatchShader;

			
 
				-	public var shaders : Array<hxsl.Shader>;

			
 
				-	public var pass : h3d.mat.Pass;

			
 
				-	public var computePass : h3d.mat.Pass;

			
 
				-	public var commandBuffers : Array<h3d.Buffer>;

			
 
				-	public var countBuffers : Array<h3d.Buffer>;

			
 
				-	public var next : BatchData;

			
 
				-

			
 
				-	public function new() {

			
 
				-	}

			
 
				-

			
 
				-}

			
 
				-

			
 
				-class MeshBatchPart {

			
 
				-	public var indexStart : Int;

			
 
				-	public var indexCount : Int;

			
 
				-	public var lodIndexStart : Array<Int>;

			
 
				-	public var lodIndexCount : Array<Int>;

			
 
				-	public var lodConfig : Array<Float>;

			
 
				-	public var baseVertex : Int;

			
 
				-	public var bounds : h3d.col.Bounds;

			
 
				-	public function new() {

			
 
				-	}

			
 
				-

			
 
				-	public function clone() {

			
 
				-		var cl = new MeshBatchPart();

			
 
				-		cl.indexStart = indexStart;

			
 
				-		cl.indexCount = indexCount;

			
 
				-		cl.lodIndexStart = lodIndexStart;

			
 
				-		cl.lodIndexCount = lodIndexCount;

			
 
				-		cl.lodConfig = lodConfig;

			
 
				-		cl.baseVertex = baseVertex;

			
 
				-		cl.bounds = bounds;

			
 
				-		return cl;

			
 
				-	}

			
 
				-}

			
 
				 

			
 
				 enum MeshBatchFlag {

			
 
				-	EnableGpuCulling;

			
 
				-	EnableLod;

			
 
				 	EnableResizeDown;

			
 
				 	EnableGpuUpdate;

			
 
				 	EnableStorageBuffer;

			
@@ -73,19 +23,7 @@ class MeshBatch extends MultiMaterial {
 
				 	var dataPasses : BatchData;

			
 
				 	var needUpload = false;

			
 
				 	var instancedParams : hxsl.Cache.BatchInstanceParams;

			
 
				-

			
 
				-	// used if gpu lod or gpu culling

			
 
				-	static var INDIRECT_DRAW_ARGUMENTS_FMT = hxd.BufferFormat.make([{ name : "", type : DVec4 }, { name : "", type : DFloat }]);

			
 
				-	static var INSTANCE_OFFSETS_FMT = hxd.BufferFormat.make([{ name : "", type : DFloat }]);

			
 
				-	

			
 
				-	var matInfos : h3d.Buffer;

			
 
				-	var emittedSubParts : Array<MeshBatchPart>;

			
 
				-	var currentSubParts : Int;

			
 
				-	var currentMaterialOffset : Int;

			
 
				-	var instanceOffsetsCpu : haxe.io.Bytes;

			
 
				-	var instanceOffsetsGpu : h3d.Buffer;

			
 
				-	var subPartsInfos : h3d.Buffer;

			
 
				-	var countBytes : haxe.io.Bytes;

			
 
				+	var meshBatchFlags(default, null) : haxe.EnumFlags<MeshBatchFlag>;

			
 
				 

			
 
				 	/**

			
 
				 		Set if shader list or shader constants has changed, before calling begin()

			
@@ -119,13 +57,6 @@ class MeshBatch extends MultiMaterial {
 
				 	 */

			
 
				 	public var lodDistance : Float;

			
 
				 

			
 
				-	/**

			
 
				-	 * If set, and gpu update is enabled, clip all instanced behind this distance.

			
 
				-	 */

			
 
				-	public var maxDistance : Float = -1;

			
 
				-

			
 
				-	public var meshBatchFlags(default, null) : haxe.EnumFlags<MeshBatchFlag>;

			
 
				-

			
 
				 	public function new( primitive, ?material, ?parent ) {

			
 
				 		instanced = new h3d.prim.Instanced();

			
 
				 		instanced.commands = new h3d.impl.InstanceBuffer();

			
@@ -135,28 +66,30 @@ class MeshBatch extends MultiMaterial {
 
				 			@:privateAccess p.batchMode = true;

			
 
				 	}

			
 
				 

			
 
				-	function gpuLodEnabled() return meshBatchFlags.has(EnableLod);

			
 
				-	function gpuCullingEnabled() return meshBatchFlags.has(EnableGpuCulling);

			
 
				+	/**

			
 
				+	 * Buffer of per instance params such as position is created as a storage buffer

			
 
				+	 * allowing for huge amount of instances.

			
 
				+	 */

			
 
				+	public function enableStorageBuffer() {

			
 
				+		meshBatchFlags.set(EnableStorageBuffer);

			
 
				+	}

			
 
				+

			
 
				+	/**

			
 
				+	 * Buffer of per instance params such as position is created with its own format

			
 
				+	 * allowing compute shaders to update those parameters.

			
 
				+	 */

			
 
				+	public function enableGpuUpdate() {

			
 
				+		meshBatchFlags.set(EnableGpuUpdate);

			
 
				+		meshBatchFlags.set(EnableStorageBuffer);

			
 
				+	}

			
 
				+

			
 
				+	function hasOffset() return primitiveSubPart != null;

			
 
				+	function getPrimitive() return @:privateAccess instanced.primitive;

			
 
				 	function storageBufferEnabled() return meshBatchFlags.has(EnableStorageBuffer);

			
 
				-	function mustCalcBufferFormat() return meshBatchFlags.has(EnableGpuUpdate) || gpuCullingEnabled() || gpuLodEnabled();

			
 
				-

			
 
				-	public function begin( emitCountTip = -1, ?flags : haxe.EnumFlags<MeshBatchFlag> ) {

			
 
				-		if ( flags != null ) {

			
 
				-			#if (!js && !(hldx && !dx12))

			
 
				-			var allowedLOD = flags.has(EnableLod) && ( primitiveSubPart != null || @:privateAccess instanced.primitive.lodCount() > 1 );

			
 
				-			flags.setTo(EnableLod, allowedLOD);

			
 
				-			#else

			
 
				-			flags.setTo(EnableLod, false);

			
 
				-			flags.setTo(EnableGpuCulling, false);

			
 
				-			#end

			
 
				-			// Set flags non-related to shaders

			
 
				-			meshBatchFlags.setTo( EnableResizeDown, flags.has(EnableResizeDown) );

			
 
				-			if ( meshBatchFlags != flags )

			
 
				-				shadersChanged = true;

			
 
				-			meshBatchFlags = flags;

			
 
				-			meshBatchFlags.setTo( EnableStorageBuffer, mustCalcBufferFormat() || storageBufferEnabled() );

			
 
				-		}

			
 
				+	function gpuUpdateEnabled() return meshBatchFlags.has(EnableGpuUpdate);

			
 
				+	function getMaxElements() return storageBufferEnabled() ? MAX_STORAGE_BUFFER_ELEMENTS : MAX_BUFFER_ELEMENTS;

			
 
				 

			
 
				+	public function begin( emitCountTip = -1 ) : Int {

			
 
				 		instanceCount = 0;

			
 
				 		instanced.initBounds();

			
 
				 		if( shadersChanged ) {

			
@@ -176,10 +109,8 @@ class MeshBatch extends MultiMaterial {
 
				 			}

			
 
				 			p = p.next;

			
 
				 		}

			
 
				-		if ( primitiveSubPart != null && ( gpuCullingEnabled() || gpuLodEnabled() ) && instanceOffsetsCpu == null ) {

			
 
				-			var size = emitCountTip * 2 * 4;

			
 
				-			instanceOffsetsCpu = haxe.io.Bytes.alloc(size);

			
 
				-		}

			
 
				+

			
 
				+		return emitCountTip;

			
 
				 	}

			
 
				 

			
 
				 	function initShadersMapping() {

			
@@ -189,8 +120,9 @@ class MeshBatch extends MultiMaterial {
 
				 		for( index in 0...materials.length ) {

			
 
				 			var mat = materials[index];

			
 
				 			if( mat == null ) continue;

			
 
				-			var matCount = @:privateAccess instanced.primitive.getMaterialIndexCount(index);

			
 
				-			var matStart = @:privateAccess instanced.primitive.getMaterialIndexStart(index);

			
 
				+			var prim = getPrimitive();

			
 
				+			var matCount = prim.getMaterialIndexCount(index);

			
 
				+			var matStart = prim.getMaterialIndexStart(index);

			
 
				 			for( p in mat.getPasses() ) @:privateAccess {

			
 
				 				var ctx = scene.renderer.getPassByName(p.name);

			
 
				 				if( ctx == null ) throw "Could't find renderer pass "+p.name;

			
@@ -200,11 +132,11 @@ class MeshBatch extends MultiMaterial {
 
				 				var rt = output.compileShaders(scene.ctx.globals, shaders, Default);

			
 
				 				var shader = output.shaderCache.makeBatchShader(rt, shaders, instancedParams);

			
 
				 

			
 
				-				var b = new BatchData();

			
 
				+				var b = createBatchData();

			
 
				 				b.indexCount = matCount;

			
 
				 				b.indexStart = matStart;

			
 
				 				b.paramsCount = shader.paramsSize;

			
 
				-				b.maxInstance = Std.int( ( storageBufferEnabled() ? MAX_STORAGE_BUFFER_ELEMENTS : MAX_BUFFER_ELEMENTS ) / b.paramsCount);

			
 
				+				b.maxInstance = Std.int( getMaxElements() / b.paramsCount);

			
 
				 				b.bufferFormat = hxd.BufferFormat.VEC4_DATA;

			
 
				 				if( b.maxInstance <= 0 )

			
 
				 					throw "Mesh batch shaders needs at least one perInstance parameter";

			
@@ -216,56 +148,8 @@ class MeshBatch extends MultiMaterial {
 
				 				p.dynamicParameters = true;

			
 
				 				p.batchMode = true;

			
 
				 

			
 
				-				if( mustCalcBufferFormat() ) {

			
 
				-					var pl = [];

			
 
				-					var p = b.params;

			
 
				-					while( p != null ) {

			
 
				-						pl.push(p);

			
 
				-						p = p.next;

			
 
				-					}

			
 
				-					pl.sort(function(p1,p2) return p1.pos - p2.pos);

			
 
				-					var fmt : Array<hxd.BufferFormat.BufferInput> = [];

			
 
				-					var curPos = 0;

			
 
				-					var paddingIndex = 0;

			
 
				-					for( p in pl ) {

			
 
				-						var paddingSize = p.pos - curPos;

			
 
				-						if ( paddingSize > 0 ) {

			
 
				-							var paddingType : hxsl.Ast.Type = switch ( paddingSize ) {

			
 
				-							case 0:

			
 
				-								TFloat;

			
 
				-							case 1,2,3:

			
 
				-								TVec(paddingSize, VFloat);

			
 
				-							default:

			
 
				-								throw "Buffer has padding";

			
 
				-							}

			
 
				-							var t = hxd.BufferFormat.InputFormat.fromHXSL(paddingType);

			
 
				-							fmt.push(new hxd.BufferFormat.BufferInput("padding_"+paddingIndex,t));

			
 
				-							paddingIndex++;

			
 
				-							curPos = p.pos;

			
 
				-						}

			
 
				-						var name = p.name;

			
 
				-						var prev = fmt.length;

			
 
				-						switch( p.type ) {

			
 
				-						case TMat3:

			
 
				-							for( i in 0...3 )

			
 
				-								fmt.push(new hxd.BufferFormat.BufferInput(name+"__m"+i,DVec3));

			
 
				-						case TMat3x4:

			
 
				-							for( i in 0...3 )

			
 
				-								fmt.push(new hxd.BufferFormat.BufferInput(name+"__m"+i,DVec4));

			
 
				-						case TMat4:

			
 
				-							for( i in 0...4 )

			
 
				-								fmt.push(new hxd.BufferFormat.BufferInput(name+"__m"+i,DVec4));

			
 
				-						default:

			
 
				-							var t = hxd.BufferFormat.InputFormat.fromHXSL(p.type);

			
 
				-							fmt.push(new hxd.BufferFormat.BufferInput(p.name,t));

			
 
				-						}

			
 
				-						for( i in prev...fmt.length )

			
 
				-							curPos += fmt[i].getBytesSize() >> 2;

			
 
				-					}

			
 
				-					if ( curPos & 3 != 0)

			
 
				-						throw "Buffer has padding";

			
 
				-					b.bufferFormat = hxd.BufferFormat.make(fmt);

			
 
				-				}

			
 
				+				if ( gpuUpdateEnabled() )

			
 
				+					calcBufferFormat(b);

			
 
				 

			
 
				 				b.next = dataPasses;

			
 
				 				dataPasses = b;

			
@@ -277,7 +161,7 @@ class MeshBatch extends MultiMaterial {
 
				 				}

			
 
				 				shader.Batch_UseStorage = storageBufferEnabled();

			
 
				 				shader.Batch_Count = storageBufferEnabled() ? 0 : b.maxInstance * b.paramsCount;

			
 
				-				shader.Batch_HasOffset = primitiveSubPart != null || gpuLodEnabled() || gpuCullingEnabled();

			
 
				+				shader.Batch_HasOffset = hasOffset();

			
 
				 				shader.constBits = (shader.Batch_Count << 2) | (shader.Batch_UseStorage ? ( 1 << 1 ) : 0) | (shader.Batch_HasOffset ? 1 : 0);

			
 
				 				shader.updateConstants(null);

			
 
				 			}

			
@@ -291,66 +175,68 @@ class MeshBatch extends MultiMaterial {
 
				 		}

			
 
				 	}

			
 
				 

			
 
				-	public function emitInstance() {

			
 
				-		if( worldPosition == null ) syncPos();

			
 
				-		var ps = primitiveSubPart;

			
 
				-		if( ps != null ) @:privateAccess {

			
 
				-			if(calcBounds) {

			
 
				-				instanced.tmpBounds.load(primitiveSubPart.bounds);

			
 
				-				instanced.tmpBounds.transform(worldPosition == null ? absPos : worldPosition);

			
 
				-				instanced.bounds.add(instanced.tmpBounds);

			
 
				-			}

			
 
				-			if ( gpuLodEnabled() || gpuCullingEnabled() ) {

			
 
				-				if (emittedSubParts == null) {

			
 
				-					currentSubParts = 0;

			
 
				-					currentMaterialOffset = 0;

			
 
				-					emittedSubParts = [ primitiveSubPart.clone() ];

			
 
				-				} else {

			
 
				-					var currentIndexStart = emittedSubParts[currentSubParts].indexStart;

			
 
				-					if ( currentIndexStart != primitiveSubPart.indexStart  ) {

			
 
				-						currentSubParts = -1;

			
 
				-						currentIndexStart = primitiveSubPart.indexStart;

			
 
				-						currentMaterialOffset = 0;

			
 
				-						for ( i => part in emittedSubParts ) {

			
 
				-							if ( part.indexStart == currentIndexStart ) {

			
 
				-								currentSubParts = i;

			
 
				-								break;

			
 
				-							}

			
 
				-							currentMaterialOffset += part.lodIndexCount.length + 1;

			
 
				-						}

			
 
				-						if ( currentSubParts < 0 ) {

			
 
				-							currentSubParts = emittedSubParts.length;

			
 
				-							emittedSubParts.push( primitiveSubPart.clone() );

			
 
				-						}

			
 
				-					}

			
 
				-				}

			
 
				-				var maxInstanceID = ( instanceCount + 1 ) * 2;

			
 
				-				if ( instanceOffsetsCpu.length < maxInstanceID * 4 ) {

			
 
				-					var next = haxe.io.Bytes.alloc(Std.int(instanceOffsetsCpu.length*3/2));

			
 
				-					next.blit(0, instanceOffsetsCpu, 0, instanceOffsetsCpu.length);

			
 
				-					instanceOffsetsCpu = next;

			
 
				-				}

			
 
				-				instanceOffsetsCpu.setInt32((instanceCount * 2 + 0) * 4, currentMaterialOffset);

			
 
				-				instanceOffsetsCpu.setInt32((instanceCount * 2 + 1) * 4, currentSubParts);

			
 
				-			} else {

			
 
				-				if( primitiveSubBytes == null ) {

			
 
				-					primitiveSubBytes = haxe.io.Bytes.alloc(128);

			
 
				-					instanced.commands = null;

			
 
				-				}

			
 
				-				if( primitiveSubBytes.length < (instanceCount+1) * 20 ) {

			
 
				-					var next = haxe.io.Bytes.alloc(Std.int(primitiveSubBytes.length*3/2));

			
 
				-					next.blit(0, primitiveSubBytes, 0, instanceCount * 20);

			
 
				-					primitiveSubBytes = next;

			
 
				+	function createBatchData() {

			
 
				+		return new BatchData();

			
 
				+	}

			
 
				+

			
 
				+	function calcBufferFormat(b : BatchData) {

			
 
				+		var pl = [];

			
 
				+		var p = b.params;

			
 
				+		while( p != null ) {

			
 
				+			pl.push(p);

			
 
				+			p = p.next;

			
 
				+		}

			
 
				+		pl.sort(function(p1,p2) return p1.pos - p2.pos);

			
 
				+		var fmt : Array<hxd.BufferFormat.BufferInput> = [];

			
 
				+		var curPos = 0;

			
 
				+		var paddingIndex = 0;

			
 
				+		for( p in pl ) {

			
 
				+			var paddingSize = p.pos - curPos;

			
 
				+			if ( paddingSize > 0 ) {

			
 
				+				var paddingType : hxsl.Ast.Type = switch ( paddingSize ) {

			
 
				+				case 0:

			
 
				+					TFloat;

			
 
				+				case 1,2,3:

			
 
				+					TVec(paddingSize, VFloat);

			
 
				+				default:

			
 
				+					throw "Buffer has padding";

			
 
				 				}

			
 
				-				var p = instanceCount * 20;

			
 
				-				primitiveSubBytes.setInt32(p, ps.indexCount);

			
 
				-				primitiveSubBytes.setInt32(p + 4, 1);

			
 
				-				primitiveSubBytes.setInt32(p + 8, ps.indexStart);

			
 
				-				primitiveSubBytes.setInt32(p + 12, ps.baseVertex);

			
 
				-				primitiveSubBytes.setInt32(p + 16, 0);

			
 
				+				var t = hxd.BufferFormat.InputFormat.fromHXSL(paddingType);

			
 
				+				fmt.push(new hxd.BufferFormat.BufferInput("padding_"+paddingIndex,t));

			
 
				+				paddingIndex++;

			
 
				+				curPos = p.pos;

			
 
				+			}

			
 
				+			var name = p.name;

			
 
				+			var prev = fmt.length;

			
 
				+			switch( p.type ) {

			
 
				+			case TMat3:

			
 
				+				for( i in 0...3 )

			
 
				+					fmt.push(new hxd.BufferFormat.BufferInput(name+"__m"+i,DVec3));

			
 
				+			case TMat3x4:

			
 
				+				for( i in 0...3 )

			
 
				+					fmt.push(new hxd.BufferFormat.BufferInput(name+"__m"+i,DVec4));

			
 
				+			case TMat4:

			
 
				+				for( i in 0...4 )

			
 
				+					fmt.push(new hxd.BufferFormat.BufferInput(name+"__m"+i,DVec4));

			
 
				+			default:

			
 
				+				var t = hxd.BufferFormat.InputFormat.fromHXSL(p.type);

			
 
				+				fmt.push(new hxd.BufferFormat.BufferInput(p.name,t));

			
 
				 			}

			
 
				-		} else if (calcBounds)

			
 
				+			for( i in prev...fmt.length )

			
 
				+				curPos += fmt[i].getBytesSize() >> 2;

			
 
				+		}

			
 
				+		if ( curPos & 3 != 0)

			
 
				+			throw "Buffer has padding";

			
 
				+		b.bufferFormat = hxd.BufferFormat.make(fmt);

			
 
				+	}

			
 
				+

			
 
				+	public function emitInstance() {

			
 
				+		if( worldPosition == null ) syncPos();

			
 
				+		if( primitiveSubPart != null )

			
 
				+			emitPrimitiveSubPart();

			
 
				+		else if (calcBounds)

			
 
				 			instanced.addInstanceBounds(worldPosition == null ? absPos : worldPosition);

			
 
				+

			
 
				 		var p = dataPasses;

			
 
				 		while( p != null ) {

			
 
				 			syncData(p);

			
@@ -359,6 +245,30 @@ class MeshBatch extends MultiMaterial {
 
				 		instanceCount++;

			
 
				 	}

			
 
				 

			
 
				+	function emitPrimitiveSubPart() {

			
 
				+		if(calcBounds) @:privateAccess {

			
 
				+			instanced.tmpBounds.load(primitiveSubPart.bounds);

			
 
				+			instanced.tmpBounds.transform(worldPosition == null ? absPos : worldPosition);

			
 
				+			instanced.bounds.add(instanced.tmpBounds);

			
 
				+		}

			
 
				+

			
 
				+		if( primitiveSubBytes == null ) {

			
 
				+			primitiveSubBytes = haxe.io.Bytes.alloc(128);

			
 
				+			instanced.commands = null;

			
 
				+		}

			
 
				+		if( primitiveSubBytes.length < (instanceCount+1) * 20 ) {

			
 
				+			var next = haxe.io.Bytes.alloc(Std.int(primitiveSubBytes.length*3/2));

			
 
				+			next.blit(0, primitiveSubBytes, 0, instanceCount * 20);

			
 
				+			primitiveSubBytes = next;

			
 
				+		}

			
 
				+		var p = instanceCount * 20;

			
 
				+		primitiveSubBytes.setInt32(p, primitiveSubPart.indexCount);

			
 
				+		primitiveSubBytes.setInt32(p + 4, 1);

			
 
				+		primitiveSubBytes.setInt32(p + 8, primitiveSubPart.indexStart);

			
 
				+		primitiveSubBytes.setInt32(p + 12, primitiveSubPart.baseVertex);

			
 
				+		primitiveSubBytes.setInt32(p + 16, 0);

			
 
				+	}

			
 
				+

			
 
				 	override function sync(ctx:RenderContext) {

			
 
				 		super.sync(ctx);

			
 
				 		if( instanceCount == 0 ) return;

			
@@ -368,94 +278,8 @@ class MeshBatch extends MultiMaterial {
 
				 	public function flush() {

			
 
				 		var p = dataPasses;

			
 
				 		var alloc = hxd.impl.Allocator.get();

			
 
				-		var psBytes = primitiveSubBytes;

			
 
				-

			
 
				-		var prim = @:privateAccess instanced.primitive;

			
 
				-		var hmd = Std.downcast(prim, h3d.prim.HMDModel);

			
 
				-		var materialCount = materials.length;

			
 
				-		var lodCount = ( gpuLodEnabled() ) ? prim.lodCount() : 1;

			
 
				-

			
 
				-		if ( gpuLodEnabled() || gpuCullingEnabled() ) {

			
 
				-			if ( emittedSubParts != null ) {

			
 
				-				var upload = needUpload;

			
 
				-				var vertex = instanceCount * 2;

			
 
				-				if ( instanceOffsetsGpu == null || instanceOffsetsGpu.isDisposed() || vertex > instanceOffsetsGpu.vertices ) {

			
 
				-					if ( instanceOffsetsGpu != null)

			
 
				-						alloc.disposeBuffer( instanceOffsetsGpu );

			
 
				-					instanceOffsetsGpu = alloc.allocBuffer( vertex, INSTANCE_OFFSETS_FMT, UniformReadWrite );

			
 
				-					upload = true;

			
 
				-				}

			
 
				-				if ( upload )

			
 
				-					instanceOffsetsGpu.uploadBytes( instanceOffsetsCpu, 0, vertex );

			
 
				-

			
 
				-				if ( matInfos == null ) {

			
 
				-					materialCount = 0;

			
 
				-					var tmpSubPartInfos = alloc.allocFloats( 2 * emittedSubParts.length );

			
 
				-					var pos = 0;

			
 
				-					for ( subPart in emittedSubParts ) {

			
 
				-						var lodCount = subPart.lodIndexCount.length + 1;

			
 
				-						tmpSubPartInfos[pos++] = lodCount;

			
 
				-						tmpSubPartInfos[pos++] = subPart.bounds.dimension() * 0.5;

			
 
				-						materialCount += lodCount;

			
 
				-					}

			
 
				-					subPartsInfos = alloc.ofFloats( tmpSubPartInfos, hxd.BufferFormat.VEC4_DATA, Uniform );

			
 
				-					alloc.disposeFloats(tmpSubPartInfos);

			
 
				-

			
 
				-					var tmpMatInfos = alloc.allocFloats( 4 * ( materialCount + emittedSubParts.length ) );

			
 
				-					pos = 0;

			
 
				-					for ( subPart in emittedSubParts ) {

			
 
				-						var maxLod = subPart.lodIndexCount.length;

			
 
				-						var lodConfig = subPart.lodConfig;

			
 
				-						tmpMatInfos[pos++] = subPart.indexCount;

			
 
				-						tmpMatInfos[pos++] = subPart.indexStart;

			
 
				-						tmpMatInfos[pos++] = ( 0 < lodConfig.length ) ? lodConfig[0] : 0.0;

			
 
				-						tmpMatInfos[pos++] = ( maxLod < lodConfig.length && maxLod > 0 ) ? lodConfig[lodConfig.length - 1] : 0.0;

			
 
				-						for ( i in 0...maxLod ) {

			
 
				-							tmpMatInfos[pos++] = subPart.lodIndexCount[i];

			
 
				-							tmpMatInfos[pos++] = subPart.lodIndexStart[i];

			
 
				-							tmpMatInfos[pos++] = ( i + 1 < lodConfig.length ) ? lodConfig[i + 1] : 0.0;

			
 
				-							pos++;

			
 
				-						}

			
 
				-					}

			
 
				 

			
 
				-					matInfos = alloc.ofFloats( tmpMatInfos, hxd.BufferFormat.VEC4_DATA, Uniform );

			
 
				-					alloc.disposeFloats(tmpMatInfos);

			
 
				-				}

			
 
				-			} else if ( matInfos == null ) {

			
 
				-				if ( gpuLodEnabled() ) {

			
 
				-					var tmpMatInfos = alloc.allocFloats( 4 * materialCount * lodCount );

			
 
				-					matInfos = alloc.allocBuffer( materialCount * lodCount, hxd.BufferFormat.VEC4_DATA, Uniform );

			
 
				-					var lodConfig = hmd.getLodConfig();

			
 
				-					var startIndex : Int = 0;

			
 
				-					var lodConfigHasCulling = lodConfig.length > lodCount - 1;

			
 
				-					var minScreenRatioCulling = lodConfigHasCulling ? lodConfig[lodConfig.length-1] : 0.0;

			
 
				-					for ( i => lod in @:privateAccess hmd.lods ) {

			
 
				-						for ( j in 0...materialCount ) {

			
 
				-							var indexCount = lod.indexCounts[j];

			
 
				-							var matIndex = i + j * lodCount;

			
 
				-							tmpMatInfos[matIndex * 4 + 0] = indexCount;

			
 
				-							tmpMatInfos[matIndex * 4 + 1] = startIndex;

			
 
				-							tmpMatInfos[matIndex * 4 + 2] = ( i < lodConfig.length ) ? lodConfig[i] : 0.0;

			
 
				-							tmpMatInfos[matIndex * 4 + 3] = minScreenRatioCulling;

			
 
				-							startIndex += indexCount;

			
 
				-						}

			
 
				-					}

			
 
				-					matInfos.uploadFloats( tmpMatInfos, 0, materialCount * lodCount );

			
 
				-					alloc.disposeFloats( tmpMatInfos );

			
 
				-				} else {

			
 
				-					var tmpMatInfos = alloc.allocFloats( 4 * materialCount );

			
 
				-					matInfos = alloc.allocBuffer( materialCount, hxd.BufferFormat.VEC4_DATA, Uniform );

			
 
				-					var pos : Int = 0;

			
 
				-					for ( i in 0...materials.length ) {

			
 
				-						tmpMatInfos[pos++] = prim.getMaterialIndexCount(i);

			
 
				-						tmpMatInfos[pos++] = prim.getMaterialIndexStart(i);

			
 
				-						pos += 2;

			
 
				-					}

			
 
				-					matInfos.uploadFloats( tmpMatInfos, 0, materialCount );

			
 
				-					alloc.disposeFloats( tmpMatInfos );

			
 
				-				}

			
 
				-			}

			
 
				-		}

			
 
				+		var prim = getPrimitive();

			
 
				 

			
 
				 		while( p != null ) {

			
 
				 			var index = 0;

			
@@ -467,7 +291,7 @@ class MeshBatch extends MultiMaterial {
 
				 				if( count > p.maxInstance )

			
 
				 					count = p.maxInstance;

			
 
				 

			
 
				-				var maxVertexCount = ( mustCalcBufferFormat() ) ? p.maxInstance : ( storageBufferEnabled() ? MAX_STORAGE_BUFFER_ELEMENTS : MAX_BUFFER_ELEMENTS );

			
 
				+				var maxVertexCount = gpuUpdateEnabled() ? p.maxInstance : getMaxElements();

			
 
				 				var vertexCount = Std.int( count * (( 4 * p.paramsCount ) / p.bufferFormat.stride) );

			
 
				 				var vertexCountAllocated = #if js Std.int( MAX_BUFFER_ELEMENTS * 4 / p.bufferFormat.stride ) #else hxd.Math.imin( hxd.Math.nextPOT( vertexCount ), maxVertexCount ) #end;

			
 
				 

			
@@ -481,7 +305,7 @@ class MeshBatch extends MultiMaterial {
 
				 				}

			
 
				 				if( upload )

			
 
				 					buf.uploadFloats(p.data, start * p.paramsCount * 4, vertexCount);

			
 
				-				if( psBytes != null ) {

			
 
				+				if( primitiveSubBytes != null ) {

			
 
				 					if( p.instanceBuffers == null )

			
 
				 						p.instanceBuffers = [];

			
 
				 					var buf = p.instanceBuffers[index];

			
@@ -491,7 +315,7 @@ class MeshBatch extends MultiMaterial {
 
				 					}

			
 
				 					if( buf == null ) {

			
 
				 						buf = new h3d.impl.InstanceBuffer();

			
 
				-						var sub = psBytes.sub(start*20,count*20);

			
 
				+						var sub = primitiveSubBytes.sub(start*20,count*20);

			
 
				 						for( i in 0...count )

			
 
				 							sub.setInt32(i*20+16, i);

			
 
				 						buf.setBuffer(count, sub);

			
@@ -499,87 +323,40 @@ class MeshBatch extends MultiMaterial {
 
				 					}

			
 
				 				}

			
 
				 

			
 
				-				var commandCountAllocated = hxd.Math.imin( hxd.Math.nextPOT( count ), p.maxInstance );

			
 
				+				onFlushBuffer(p, index, count);

			
 
				 

			
 
				-				if ( gpuLodEnabled() || gpuCullingEnabled() ) {

			
 
				-					if ( p.commandBuffers == null) {

			
 
				-						p.commandBuffers = [];

			
 
				-						p.countBuffers = [];

			
 
				-					}

			
 
				-					var buf = p.commandBuffers[index];

			
 
				-					var cbuf = p.countBuffers[index];

			
 
				-					if ( buf == null ) {

			
 
				-						buf = alloc.allocBuffer( commandCountAllocated, INDIRECT_DRAW_ARGUMENTS_FMT, UniformReadWrite );

			
 
				-						cbuf = alloc.allocBuffer( 1, hxd.BufferFormat.VEC4_DATA, UniformReadWrite );

			
 
				-						p.commandBuffers[index] = buf;

			
 
				-						p.countBuffers[index] = cbuf;

			
 
				-					}

			
 
				-					else if ( buf.vertices < commandCountAllocated ) {

			
 
				-						alloc.disposeBuffer( buf );

			
 
				-						buf = alloc.allocBuffer( commandCountAllocated, INDIRECT_DRAW_ARGUMENTS_FMT, UniformReadWrite );

			
 
				-						p.commandBuffers[index] = buf;

			
 
				-					}

			
 
				-				}

			
 
				 				start += count;

			
 
				 				index++;

			
 
				 			}

			
 
				-			if ( ( gpuLodEnabled() || gpuCullingEnabled() ) ) {

			
 
				-				var computeShader;

			
 
				-				if( p.computePass == null ) {

			
 
				-					computeShader = new h3d.shader.InstanceIndirect();

			
 
				-					var computePass = new h3d.mat.Pass("batchUpdate");

			
 
				-					computePass.addShader(computeShader);

			
 
				-					addComputeShaders(computePass);

			
 
				-					p.computePass = computePass;

			
 
				-				} else {

			
 
				-					computeShader = p.computePass.getShader(h3d.shader.InstanceIndirect);

			
 
				-				}

			
 
				 

			
 
				-				computeShader.ENABLE_LOD = gpuLodEnabled();

			
 
				-				computeShader.ENABLE_CULLING = gpuCullingEnabled();

			
 
				-				computeShader.ENABLE_DISTANCE_CLIPPING = maxDistance >= 0;

			
 
				-				computeShader.radius = prim.getBounds().dimension() * 0.5;

			
 
				-				computeShader.maxDistance = maxDistance;

			
 
				-				computeShader.matInfos = matInfos;

			
 
				-				computeShader.lodCount = lodCount;

			
 
				-				computeShader.materialCount = materialCount;

			
 
				-				computeShader.MAX_MATERIAL_COUNT = 16;

			
 
				-				while ( materialCount * lodCount > computeShader.MAX_MATERIAL_COUNT )

			
 
				-					computeShader.MAX_MATERIAL_COUNT = computeShader.MAX_MATERIAL_COUNT + 16;

			
 
				-

			
 
				-				if ( emittedSubParts != null ) {

			
 
				-					computeShader.USING_SUB_PART = true;

			
 
				-					computeShader.subPartCount = emittedSubParts.length;

			
 
				-					computeShader.subPartInfos = subPartsInfos;

			
 
				-					computeShader.instanceOffsets = instanceOffsetsGpu;

			
 
				-					computeShader.MAX_SUB_PART_BUFFER_ELEMENT_COUNT = 16;

			
 
				-					var maxSubPartsElement = hxd.Math.ceil( emittedSubParts.length / 2 );

			
 
				-					while ( maxSubPartsElement > computeShader.MAX_SUB_PART_BUFFER_ELEMENT_COUNT )

			
 
				-						computeShader.MAX_SUB_PART_BUFFER_ELEMENT_COUNT = computeShader.MAX_SUB_PART_BUFFER_ELEMENT_COUNT + 16;

			
 
				-				}

			
 
				-			}

			
 
				+			onFlushPass(p);

			
 
				+

			
 
				 			while( p.buffers.length > index )

			
 
				 				alloc.disposeBuffer( p.buffers.pop() );

			
 
				 			p = p.next;

			
 
				 		}

			
 
				-		if( psBytes != null || gpuLodEnabled() || gpuCullingEnabled() ) {

			
 
				-			var offsets = @:privateAccess instanced.primitive.resolveBuffer("Batch_Start");

			
 
				+		if( hasOffset() ) {

			
 
				+			var offsets = prim.resolveBuffer("Batch_Start");

			
 
				 			if( offsets == null || offsets.vertices < instanceCount || offsets.isDisposed() ) {

			
 
				 				if( offsets != null ) {

			
 
				 					offsets.dispose();

			
 
				-					@:privateAccess instanced.primitive.removeBuffer(offsets);

			
 
				+					prim.removeBuffer(offsets);

			
 
				 				}

			
 
				 				var tmp = haxe.io.Bytes.alloc(4 * instanceCount);

			
 
				 				for( i in 0...instanceCount )

			
 
				 					tmp.setFloat(i<<2, i);

			
 
				 				offsets = new h3d.Buffer(instanceCount, BATCH_START_FMT);

			
 
				 				offsets.uploadBytes(tmp,0,instanceCount);

			
 
				-				@:privateAccess instanced.primitive.addBuffer(offsets);

			
 
				+				prim.addBuffer(offsets);

			
 
				 			}

			
 
				 		}

			
 
				 		needUpload = false;

			
 
				 	}

			
 
				 

			
 
				+	function onFlushBuffer(p : BatchData, index : Int, count : Int) {}

			
 
				+

			
 
				+	function onFlushPass(p : BatchData) {}

			
 
				+

			
 
				 	function syncData( batch : BatchData ) {

			
 
				 

			
 
				 		var startPos = batch.paramsCount * instanceCount << 2;

			
@@ -675,54 +452,33 @@ class MeshBatch extends MultiMaterial {
 
				 

			
 
				 			// check that the pass is still enable

			
 
				 			var material = materials[p.matIndex];

			
 
				-			if( material != null && material.getPass(pass.name) != null ) {

			
 
				-				var emittedCount = 0;

			
 
				-				for( i => buf in p.buffers ) {

			
 
				-					ctx.emitPass(pass, this).index = i | (p.matIndex << 16);

			
 
				-					if ( p.commandBuffers != null && p.commandBuffers.length > 0 ) {

			
 
				-						var count = hxd.Math.imin( instanceCount - p.maxInstance * i, p.maxInstance);

			
 
				-						var computeShader = p.computePass.getShader(h3d.shader.InstanceIndirect);

			
 
				-						if ( gpuCullingEnabled() )

			
 
				-							computeShader.frustum = ctx.getCameraFrustumBuffer();

			
 
				-						computeShader.instanceData = buf;

			
 
				-						computeShader.matIndex = p.matIndex;

			
 
				-						computeShader.commandBuffer = p.commandBuffers[i];

			
 
				-						if ( countBytes == null ) {

			
 
				-							countBytes = haxe.io.Bytes.alloc(4*4);

			
 
				-							countBytes.setInt32(0, 0);

			
 
				-						}

			
 
				-						p.countBuffers[i].uploadBytes(countBytes, 0, 1);

			
 
				-						computeShader.countBuffer = p.countBuffers[i];

			
 
				-						computeShader.startInstanceOffset = emittedCount;

			
 
				-						computeShader.ENABLE_COUNT_BUFFER = isCountBufferAllowed();

			
 
				-						ctx.computeList(@:privateAccess p.computePass.shaders);

			
 
				-						ctx.computeDispatch(count);

			
 
				-						emittedCount += count;

			
 
				-					}

			
 
				-				}

			
 
				-			}

			
 
				+			if( material != null && material.getPass(pass.name) != null )

			
 
				+				emitPass(ctx, p);

			
 
				 			p = p.next;

			
 
				 		}

			
 
				 	}

			
 
				 

			
 
				+	function emitPass(ctx : RenderContext, p : BatchData) {

			
 
				+		for( i => buf in p.buffers )

			
 
				+			ctx.emitPass(p.pass, this).index = i | (p.matIndex << 16);

			
 
				+	}

			
 
				+

			
 
				 	override function draw(ctx:RenderContext) {

			
 
				 		var p = dataPasses;

			
 
				 		while( true ) {

			
 
				 			if( p.pass == ctx.drawPass.pass ) {

			
 
				 				var bufferIndex = ctx.drawPass.index & 0xFFFF;

			
 
				+

			
 
				 				if ( storageBufferEnabled() )

			
 
				 					p.shader.Batch_StorageBuffer = p.buffers[bufferIndex];

			
 
				 				else

			
 
				 					p.shader.Batch_Buffer = p.buffers[bufferIndex];

			
 
				-				if( p.instanceBuffers == null ) {

			
 
				-					var count = hxd.Math.imin( instanceCount - p.maxInstance * bufferIndex, p.maxInstance );

			
 
				-					instanced.setCommand(p.matIndex, instanced.screenRatioToLod(curScreenRatio), count);

			
 
				-					if ( p.commandBuffers != null && p.commandBuffers.length > 0 ) {

			
 
				-						@:privateAccess instanced.commands.data = p.commandBuffers[bufferIndex].vbuf;

			
 
				-						@:privateAccess instanced.commands.countBuffer = p.countBuffers[bufferIndex].vbuf;

			
 
				-					}

			
 
				-				} else

			
 
				+

			
 
				+				if( p.instanceBuffers == null )

			
 
				+					setPassCommand(p, bufferIndex);

			
 
				+				else

			
 
				 					instanced.commands = p.instanceBuffers[bufferIndex];

			
 
				+

			
 
				 				break;

			
 
				 			}

			
 
				 			p = p.next;

			
@@ -734,8 +490,13 @@ class MeshBatch extends MultiMaterial {
 
				 		ctx.drawPass.index = prev;

			
 
				 	}

			
 
				 

			
 
				+	function setPassCommand(p : BatchData, bufferIndex : Int) {

			
 
				+		var count = hxd.Math.imin( instanceCount - p.maxInstance * bufferIndex, p.maxInstance );

			
 
				+		instanced.setCommand(p.matIndex, instanced.screenRatioToLod(curScreenRatio), count);

			
 
				+	}

			
 
				+

			
 
				 	override function calcScreenRatio(ctx:RenderContext) {

			
 
				-		curScreenRatio = @:privateAccess instanced.primitive.getBounds().dimension() / ( 2.0 * hxd.Math.max(lodDistance, 0.0001) );

			
 
				+		curScreenRatio = getPrimitive().getBounds().dimension() / ( 2.0 * hxd.Math.max(lodDistance, 0.0001) );

			
 
				 	}

			
 
				 

			
 
				 	override function addBoundsRec( b : h3d.col.Bounds, relativeTo: h3d.Matrix ) {

			
@@ -753,16 +514,6 @@ class MeshBatch extends MultiMaterial {
 
				 			b.addTransform(bounds, relativeTo);

			
 
				 	}

			
 
				 

			
 
				-	function addComputeShaders( pass : h3d.mat.Pass ) {}

			
 
				-

			
 
				-	inline function isCountBufferAllowed() {

			
 
				-		#if hlsdl

			
 
				-		return h3d.impl.GlDriver.hasMultiIndirectCount;

			
 
				-		#else

			
 
				-		return true;

			
 
				-		#end

			
 
				-	}

			
 
				-

			
 
				 	override function onRemove() {

			
 
				 		super.onRemove();

			
 
				 		cleanPasses();

			
@@ -783,45 +534,76 @@ class MeshBatch extends MultiMaterial {
 
				 	function cleanPasses() {

			
 
				 		var alloc = hxd.impl.Allocator.get();

			
 
				 		while( dataPasses != null ) {

			
 
				-			dataPasses.pass.removeShader(dataPasses.shader);

			
 
				-			for( b in dataPasses.buffers )

			
 
				-				alloc.disposeBuffer(b);

			
 
				-

			
 
				-			if ( dataPasses.commandBuffers != null && dataPasses.commandBuffers.length > 0 ) {

			
 
				-				@:privateAccess instanced.commands.data = null;

			
 
				-				for ( buf in dataPasses.commandBuffers )

			
 
				-					alloc.disposeBuffer(buf);

			
 
				-				dataPasses.commandBuffers.resize(0);

			
 
				-				for ( buf in dataPasses.countBuffers )

			
 
				-					alloc.disposeBuffer(buf);

			
 
				-				dataPasses.countBuffers.resize(0);

			
 
				-				dataPasses.computePass = null;

			
 
				-			}

			
 
				-

			
 
				-			if( dataPasses.instanceBuffers != null ) {

			
 
				-				for( b in dataPasses.instanceBuffers )

			
 
				-					b.dispose();

			
 
				-			}

			
 
				-			alloc.disposeFloats(dataPasses.data);

			
 
				+			dataPasses.clean();

			
 
				 			dataPasses = dataPasses.next;

			
 
				 		}

			
 
				-		if ( matInfos != null ) {

			
 
				-			alloc.disposeBuffer(matInfos);

			
 
				-			matInfos = null;

			
 
				-		}

			
 
				-		if( instanced.commands != null )

			
 
				-			instanced.commands.dispose();

			
 
				-

			
 
				-		if ( subPartsInfos != null )

			
 
				-			alloc.disposeBuffer(subPartsInfos);

			
 
				 

			
 
				-		if ( instanceOffsetsGpu != null )

			
 
				-			alloc.disposeBuffer(instanceOffsetsGpu);

			
 
				-		instanceOffsetsCpu = null;

			
 
				+		if( instanced.commands != null ) {

			
 
				+			instanced.commands.dispose();

			
 
				+			@:privateAccess instanced.commands.data = null;

			
 
				+		}

			
 
				 

			
 
				 		primitiveSubBytes = null;

			
 
				-		emittedSubParts = null;

			
 
				-		countBytes = null;

			
 
				 		shadersChanged = true;

			
 
				 	}

			
 
				+}

			
 
				+

			
 
				+class BatchData {

			
 
				+

			
 
				+	public var paramsCount : Int;

			
 
				+	public var maxInstance : Int;

			
 
				+	public var matIndex : Int;

			
 
				+	public var indexCount : Int;

			
 
				+	public var indexStart : Int;

			
 
				+	public var instanceBuffers : Array<h3d.impl.InstanceBuffer>;

			
 
				+	public var buffers : Array<h3d.Buffer> = [];

			
 
				+	public var bufferFormat : hxd.BufferFormat;

			
 
				+	public var data : hxd.FloatBuffer;

			
 
				+	public var params : hxsl.RuntimeShader.AllocParam;

			
 
				+	public var shader : hxsl.BatchShader;

			
 
				+	public var shaders : Array<hxsl.Shader>;

			
 
				+	public var pass : h3d.mat.Pass;

			
 
				+	public var next : BatchData;

			
 
				+

			
 
				+	public function new() {

			
 
				+	}

			
 
				+

			
 
				+	public function clean() {

			
 
				+		var alloc = hxd.impl.Allocator.get();

			
 
				+

			
 
				+		pass.removeShader(shader);

			
 
				+		for( b in buffers )

			
 
				+			alloc.disposeBuffer(b);

			
 
				+

			
 
				+		if( instanceBuffers != null ) {

			
 
				+			for( b in instanceBuffers )

			
 
				+				b.dispose();

			
 
				+		}

			
 
				+		alloc.disposeFloats(data);

			
 
				+	}

			
 
				+}

			
 
				+

			
 
				+class MeshBatchPart {

			
 
				+	public var indexStart : Int;

			
 
				+	public var indexCount : Int;

			
 
				+	// TODO : remove lod here

			
 
				+	public var lodIndexStart : Array<Int>;

			
 
				+	public var lodIndexCount : Array<Int>;

			
 
				+	public var lodConfig : Array<Float>;

			
 
				+	public var baseVertex : Int;

			
 
				+	public var bounds : h3d.col.Bounds;

			
 
				+	public function new() {

			
 
				+	}

			
 
				+

			
 
				+	public function clone() {

			
 
				+		var cl = new MeshBatchPart();

			
 
				+		cl.indexStart = indexStart;

			
 
				+		cl.indexCount = indexCount;

			
 
				+		cl.lodIndexStart = lodIndexStart;

			
 
				+		cl.lodIndexCount = lodIndexCount;

			
 
				+		cl.lodConfig = lodConfig;

			
 
				+		cl.baseVertex = baseVertex;

			
 
				+		cl.bounds = bounds;

			
 
				+		return cl;

			
 
				+	}

			
 
				 }