浏览代码

Split MeshBatch gpu culling and lod selection into its own child class.

clementlandrin 5 月之前
父节点
当前提交
4312baccfb
共有 2 个文件被更改,包括 567 次插入433 次删除
  1. 352 0
      h3d/scene/GPUMeshBatch.hx
  2. 215 433
      h3d/scene/MeshBatch.hx

+ 352 - 0
h3d/scene/GPUMeshBatch.hx

@@ -0,0 +1,352 @@
+package h3d.scene;
+
+import h3d.scene.MeshBatch.BatchData;
+import h3d.scene.MeshBatch.MeshBatchPart;
+
+class GPUMeshBatch extends MeshBatch {
+
+	static var INDIRECT_DRAW_ARGUMENTS_FMT = hxd.BufferFormat.make([{ name : "", type : DVec4 }, { name : "", type : DFloat }]);
+	static var INSTANCE_OFFSETS_FMT = hxd.BufferFormat.make([{ name : "", type : DFloat }]);
+
+	var matInfos : h3d.Buffer;
+	var emittedSubParts : Array<MeshBatch.MeshBatchPart>;
+	var currentSubParts : Int;
+	var currentMaterialOffset : Int;
+	var instanceOffsetsCpu : haxe.io.Bytes;
+	var instanceOffsetsGpu : h3d.Buffer;
+	var subPartsInfos : h3d.Buffer;
+	var countBytes : haxe.io.Bytes;
+	var materialCount : Int;
+
+	var gpuLodEnabled : Bool;
+	var gpuCullingEnabled : Bool;
+
+	/**
+	* If set, clip all instanced behind this distance.
+	*/
+	public var maxDistance : Float = -1;
+
+	public function new(primitive, ?material, ?parent) {
+		super(primitive, material, parent);
+
+		#if (js || (hldx && !dx12))
+		throw "Not available on this platform";
+		#end
+
+		enableGpuUpdate();
+	}
+
+	/**
+	 * Enable lod selection at each frame on the gpu using a compute shader.
+	 * Has effects only if a lod is available in the primitive.
+	 */
+	public function enableGpuLod() {
+		gpuLodEnabled = primitiveSubPart != null || getPrimitive().lodCount() > 1;
+		return gpuLodEnabled;
+	}
+
+	/**
+	 * Enable per instance frustum culling on the gpu using a compute shader.
+	 */
+	public function enableGpuCulling() {
+		gpuCullingEnabled = true;
+	}
+
+	function getLodCount() return gpuLodEnabled ? getPrimitive().lodCount() : 1;
+	override function hasOffset() return true;
+
+	override function begin( emitCountTip = -1) {
+		if ( !gpuLodEnabled && !gpuCullingEnabled )
+			throw "No need to create a GPUMeshBatch without gpu lod nor gpu culling, create a regular MeshBatch instead";
+
+		emitCountTip = super.begin(emitCountTip);
+
+		if ( primitiveSubPart != null && ( gpuCullingEnabled || gpuLodEnabled ) && instanceOffsetsCpu == null ) {
+			var size = emitCountTip * 2 * 4;
+			instanceOffsetsCpu = haxe.io.Bytes.alloc(size);
+		}
+
+		return emitCountTip;
+	}
+
+	override function createBatchData() {
+		return new GPUBatchData();
+	}
+
+	override function emitPrimitiveSubPart() {
+		if (emittedSubParts == null) {
+			currentSubParts = 0;
+			currentMaterialOffset = 0;
+			emittedSubParts = [ primitiveSubPart.clone() ];
+		} else {
+			var currentIndexStart = emittedSubParts[currentSubParts].indexStart;
+			if ( currentIndexStart != primitiveSubPart.indexStart  ) {
+				currentSubParts = -1;
+				currentIndexStart = primitiveSubPart.indexStart;
+				currentMaterialOffset = 0;
+				for ( i => part in emittedSubParts ) {
+					if ( part.indexStart == currentIndexStart ) {
+						currentSubParts = i;
+						break;
+					}
+					currentMaterialOffset += part.lodIndexCount.length + 1;
+				}
+				if ( currentSubParts < 0 ) {
+					currentSubParts = emittedSubParts.length;
+					emittedSubParts.push( primitiveSubPart.clone() );
+				}
+			}
+		}
+		var maxInstanceID = ( instanceCount + 1 ) * 2;
+		if ( instanceOffsetsCpu.length < maxInstanceID * 4 ) {
+			var next = haxe.io.Bytes.alloc(Std.int(instanceOffsetsCpu.length*3/2));
+			next.blit(0, instanceOffsetsCpu, 0, instanceOffsetsCpu.length);
+			instanceOffsetsCpu = next;
+		}
+		instanceOffsetsCpu.setInt32((instanceCount * 2 + 0) * 4, currentMaterialOffset);
+		instanceOffsetsCpu.setInt32((instanceCount * 2 + 1) * 4, currentSubParts);
+	}
+
+	override function flush() {
+		var alloc = hxd.impl.Allocator.get();
+		var lodCount = getLodCount();
+		materialCount = materials.length;
+		var prim = getPrimitive();
+		var hmd = Std.downcast(prim, h3d.prim.HMDModel);
+
+		if ( emittedSubParts != null ) {
+			var upload = needUpload;
+			var vertex = instanceCount * 2;
+			if ( instanceOffsetsGpu == null || instanceOffsetsGpu.isDisposed() || vertex > instanceOffsetsGpu.vertices ) {
+				if ( instanceOffsetsGpu != null)
+					alloc.disposeBuffer( instanceOffsetsGpu );
+				instanceOffsetsGpu = alloc.allocBuffer( vertex, INSTANCE_OFFSETS_FMT, UniformReadWrite );
+				upload = true;
+			}
+			if ( upload )
+				instanceOffsetsGpu.uploadBytes( instanceOffsetsCpu, 0, vertex );
+
+			if ( matInfos == null ) {
+				materialCount = 0;
+				var tmpSubPartInfos = alloc.allocFloats( 2 * emittedSubParts.length );
+				var pos = 0;
+				for ( subPart in emittedSubParts ) {
+					var lodCount = subPart.lodIndexCount.length + 1;
+					tmpSubPartInfos[pos++] = lodCount;
+					tmpSubPartInfos[pos++] = subPart.bounds.dimension() * 0.5;
+					materialCount += lodCount;
+				}
+				subPartsInfos = alloc.ofFloats( tmpSubPartInfos, hxd.BufferFormat.VEC4_DATA, Uniform );
+				alloc.disposeFloats(tmpSubPartInfos);
+
+				var tmpMatInfos = alloc.allocFloats( 4 * ( materialCount + emittedSubParts.length ) );
+				pos = 0;
+				for ( subPart in emittedSubParts ) {
+					var maxLod = subPart.lodIndexCount.length;
+					var lodConfig = subPart.lodConfig;
+					tmpMatInfos[pos++] = subPart.indexCount;
+					tmpMatInfos[pos++] = subPart.indexStart;
+					tmpMatInfos[pos++] = ( 0 < lodConfig.length ) ? lodConfig[0] : 0.0;
+					tmpMatInfos[pos++] = ( maxLod < lodConfig.length && maxLod > 0 ) ? lodConfig[lodConfig.length - 1] : 0.0;
+					for ( i in 0...maxLod ) {
+						tmpMatInfos[pos++] = subPart.lodIndexCount[i];
+						tmpMatInfos[pos++] = subPart.lodIndexStart[i];
+						tmpMatInfos[pos++] = ( i + 1 < lodConfig.length ) ? lodConfig[i + 1] : 0.0;
+						pos++;
+					}
+				}
+
+				matInfos = alloc.ofFloats( tmpMatInfos, hxd.BufferFormat.VEC4_DATA, Uniform );
+				alloc.disposeFloats(tmpMatInfos);
+			}
+		} else if ( matInfos == null ) {
+			if ( gpuLodEnabled ) {
+				var tmpMatInfos = alloc.allocFloats( 4 * materialCount * lodCount );
+				matInfos = alloc.allocBuffer( materialCount * lodCount, hxd.BufferFormat.VEC4_DATA, Uniform );
+				var lodConfig = hmd.getLodConfig();
+				var startIndex : Int = 0;
+				var lodConfigHasCulling = lodConfig.length > lodCount - 1;
+				var minScreenRatioCulling = lodConfigHasCulling ? lodConfig[lodConfig.length-1] : 0.0;
+				for ( i => lod in @:privateAccess hmd.lods ) {
+					for ( j in 0...materialCount ) {
+						var indexCount = lod.indexCounts[j];
+						var matIndex = i + j * lodCount;
+						tmpMatInfos[matIndex * 4 + 0] = indexCount;
+						tmpMatInfos[matIndex * 4 + 1] = startIndex;
+						tmpMatInfos[matIndex * 4 + 2] = ( i < lodConfig.length ) ? lodConfig[i] : 0.0;
+						tmpMatInfos[matIndex * 4 + 3] = minScreenRatioCulling;
+						startIndex += indexCount;
+					}
+				}
+				matInfos.uploadFloats( tmpMatInfos, 0, materialCount * lodCount );
+				alloc.disposeFloats( tmpMatInfos );
+			} else {
+				var tmpMatInfos = alloc.allocFloats( 4 * materialCount );
+				matInfos = alloc.allocBuffer( materialCount, hxd.BufferFormat.VEC4_DATA, Uniform );
+				var pos : Int = 0;
+				for ( i in 0...materials.length ) {
+					tmpMatInfos[pos++] = prim.getMaterialIndexCount(i);
+					tmpMatInfos[pos++] = prim.getMaterialIndexStart(i);
+					pos += 2;
+				}
+				matInfos.uploadFloats( tmpMatInfos, 0, materialCount );
+				alloc.disposeFloats( tmpMatInfos );
+			}
+		}
+
+		super.flush();
+
+		materialCount = 0;
+	}
+
+	override function onFlushBuffer(p : BatchData, index : Int, count : Int) {
+		var p = cast(p, GPUBatchData);
+		var alloc = hxd.impl.Allocator.get();
+
+		var commandCountAllocated = hxd.Math.imin( hxd.Math.nextPOT( count ), p.maxInstance );
+		if ( p.commandBuffers == null) {
+			p.commandBuffers = [];
+			p.countBuffers = [];
+		}
+		var buf = p.commandBuffers[index];
+		var cbuf = p.countBuffers[index];
+		if ( buf == null ) {
+			buf = alloc.allocBuffer( commandCountAllocated, INDIRECT_DRAW_ARGUMENTS_FMT, UniformReadWrite );
+			cbuf = alloc.allocBuffer( 1, hxd.BufferFormat.VEC4_DATA, UniformReadWrite );
+			p.commandBuffers[index] = buf;
+			p.countBuffers[index] = cbuf;
+		}
+		else if ( buf.vertices < commandCountAllocated ) {
+			alloc.disposeBuffer( buf );
+			buf = alloc.allocBuffer( commandCountAllocated, INDIRECT_DRAW_ARGUMENTS_FMT, UniformReadWrite );
+			p.commandBuffers[index] = buf;
+		}
+	}
+
+	override function onFlushPass(p : BatchData) {
+		var p = cast(p, GPUBatchData);
+		var prim = getPrimitive();
+		var lodCount = getLodCount();
+
+		var computeShader;
+		if( p.computePass == null ) {
+			computeShader = new h3d.shader.InstanceIndirect();
+			var computePass = new h3d.mat.Pass("batchUpdate");
+			computePass.addShader(computeShader);
+			addComputeShaders(computePass);
+			p.computePass = computePass;
+		} else {
+			computeShader = p.computePass.getShader(h3d.shader.InstanceIndirect);
+		}
+
+		computeShader.ENABLE_LOD = gpuLodEnabled;
+		computeShader.ENABLE_CULLING = gpuCullingEnabled;
+		computeShader.ENABLE_DISTANCE_CLIPPING = maxDistance >= 0;
+		computeShader.radius = prim.getBounds().dimension() * 0.5;
+		computeShader.maxDistance = maxDistance;
+		computeShader.matInfos = matInfos;
+		computeShader.lodCount = lodCount;
+		computeShader.materialCount = materialCount;
+		computeShader.MAX_MATERIAL_COUNT = 16;
+		while ( materialCount * lodCount > computeShader.MAX_MATERIAL_COUNT )
+			computeShader.MAX_MATERIAL_COUNT = computeShader.MAX_MATERIAL_COUNT + 16;
+
+		if ( emittedSubParts != null ) {
+			computeShader.USING_SUB_PART = true;
+			computeShader.subPartCount = emittedSubParts.length;
+			computeShader.subPartInfos = subPartsInfos;
+			computeShader.instanceOffsets = instanceOffsetsGpu;
+			computeShader.MAX_SUB_PART_BUFFER_ELEMENT_COUNT = 16;
+			var maxSubPartsElement = hxd.Math.ceil( emittedSubParts.length / 2 );
+			while ( maxSubPartsElement > computeShader.MAX_SUB_PART_BUFFER_ELEMENT_COUNT )
+				computeShader.MAX_SUB_PART_BUFFER_ELEMENT_COUNT = computeShader.MAX_SUB_PART_BUFFER_ELEMENT_COUNT + 16;
+		}
+	}
+
+	function addComputeShaders( pass : h3d.mat.Pass ) {}
+
+	override function emitPass(ctx : RenderContext, p : BatchData) {
+		var p = cast(p, GPUBatchData);
+		var emittedCount = 0;
+		for( i => buf in p.buffers ) {
+			ctx.emitPass(p.pass, this).index = i | (p.matIndex << 16);
+			if ( p.commandBuffers != null && p.commandBuffers.length > 0 ) {
+				var count = hxd.Math.imin( instanceCount - p.maxInstance * i, p.maxInstance);
+				var computeShader = p.computePass.getShader(h3d.shader.InstanceIndirect);
+				if ( gpuCullingEnabled )
+					computeShader.frustum = ctx.getCameraFrustumBuffer();
+				computeShader.instanceData = buf;
+				computeShader.matIndex = p.matIndex;
+				computeShader.commandBuffer = p.commandBuffers[i];
+				if ( countBytes == null ) {
+					countBytes = haxe.io.Bytes.alloc(4*4);
+					countBytes.setInt32(0, 0);
+				}
+				p.countBuffers[i].uploadBytes(countBytes, 0, 1);
+				computeShader.countBuffer = p.countBuffers[i];
+				computeShader.startInstanceOffset = emittedCount;
+				computeShader.ENABLE_COUNT_BUFFER = isCountBufferAllowed();
+				ctx.computeList(@:privateAccess p.computePass.shaders);
+				ctx.computeDispatch(count);
+				emittedCount += count;
+			}
+		}
+	}
+
+	override function setPassCommand(p : BatchData, bufferIndex : Int) {
+		super.setPassCommand(p, bufferIndex);
+		var p = cast(p, GPUBatchData);
+		if ( p.commandBuffers != null && p.commandBuffers.length > 0 ) {
+			@:privateAccess instanced.commands.data = p.commandBuffers[bufferIndex].vbuf;
+			@:privateAccess instanced.commands.countBuffer = p.countBuffers[bufferIndex].vbuf;
+		}
+	}
+
+	inline function isCountBufferAllowed() {
+		#if hlsdl
+		return h3d.impl.GlDriver.hasMultiIndirectCount;
+		#else
+		return true;
+		#end
+	}
+
+	override function cleanPasses() {
+		super.cleanPasses();
+
+		var alloc = hxd.impl.Allocator.get();
+		if ( matInfos != null ) {
+			alloc.disposeBuffer(matInfos);
+			matInfos = null;
+		}
+
+		if ( subPartsInfos != null )
+			alloc.disposeBuffer(subPartsInfos);
+
+		if ( instanceOffsetsGpu != null )
+			alloc.disposeBuffer(instanceOffsetsGpu);
+		instanceOffsetsCpu = null;
+
+		emittedSubParts = null;
+		countBytes = null;
+	}
+}
+
+class GPUBatchData extends BatchData {
+	public var computePass : h3d.mat.Pass;
+	public var commandBuffers : Array<h3d.Buffer>;
+	public var countBuffers : Array<h3d.Buffer>;
+
+	override function clean() {
+		super.clean();
+
+		var alloc = hxd.impl.Allocator.get();
+		if ( commandBuffers != null && commandBuffers.length > 0 ) {
+			for ( buf in commandBuffers )
+				alloc.disposeBuffer(buf);
+			commandBuffers.resize(0);
+			for ( buf in countBuffers )
+				alloc.disposeBuffer(buf);
+			countBuffers.resize(0);
+		}
+	}
+}

+ 215 - 433
h3d/scene/MeshBatch.hx

@@ -1,56 +1,6 @@
 package h3d.scene;
-class BatchData {
-
-	public var paramsCount : Int;
-	public var maxInstance : Int;
-	public var matIndex : Int;
-	public var indexCount : Int;
-	public var indexStart : Int;
-	public var instanceBuffers : Array<h3d.impl.InstanceBuffer>;
-	public var buffers : Array<h3d.Buffer> = [];
-	public var bufferFormat : hxd.BufferFormat;
-	public var data : hxd.FloatBuffer;
-	public var params : hxsl.RuntimeShader.AllocParam;
-	public var shader : hxsl.BatchShader;
-	public var shaders : Array<hxsl.Shader>;
-	public var pass : h3d.mat.Pass;
-	public var computePass : h3d.mat.Pass;
-	public var commandBuffers : Array<h3d.Buffer>;
-	public var countBuffers : Array<h3d.Buffer>;
-	public var next : BatchData;
-
-	public function new() {
-	}
-
-}
-
-class MeshBatchPart {
-	public var indexStart : Int;
-	public var indexCount : Int;
-	public var lodIndexStart : Array<Int>;
-	public var lodIndexCount : Array<Int>;
-	public var lodConfig : Array<Float>;
-	public var baseVertex : Int;
-	public var bounds : h3d.col.Bounds;
-	public function new() {
-	}
-
-	public function clone() {
-		var cl = new MeshBatchPart();
-		cl.indexStart = indexStart;
-		cl.indexCount = indexCount;
-		cl.lodIndexStart = lodIndexStart;
-		cl.lodIndexCount = lodIndexCount;
-		cl.lodConfig = lodConfig;
-		cl.baseVertex = baseVertex;
-		cl.bounds = bounds;
-		return cl;
-	}
-}
 
 enum MeshBatchFlag {
-	EnableGpuCulling;
-	EnableLod;
 	EnableResizeDown;
 	EnableGpuUpdate;
 	EnableStorageBuffer;
@@ -73,19 +23,7 @@ class MeshBatch extends MultiMaterial {
 	var dataPasses : BatchData;
 	var needUpload = false;
 	var instancedParams : hxsl.Cache.BatchInstanceParams;
-
-	// used if gpu lod or gpu culling
-	static var INDIRECT_DRAW_ARGUMENTS_FMT = hxd.BufferFormat.make([{ name : "", type : DVec4 }, { name : "", type : DFloat }]);
-	static var INSTANCE_OFFSETS_FMT = hxd.BufferFormat.make([{ name : "", type : DFloat }]);
-	
-	var matInfos : h3d.Buffer;
-	var emittedSubParts : Array<MeshBatchPart>;
-	var currentSubParts : Int;
-	var currentMaterialOffset : Int;
-	var instanceOffsetsCpu : haxe.io.Bytes;
-	var instanceOffsetsGpu : h3d.Buffer;
-	var subPartsInfos : h3d.Buffer;
-	var countBytes : haxe.io.Bytes;
+	var meshBatchFlags(default, null) : haxe.EnumFlags<MeshBatchFlag>;
 
 	/**
 		Set if shader list or shader constants has changed, before calling begin()
@@ -119,13 +57,6 @@ class MeshBatch extends MultiMaterial {
 	 */
 	public var lodDistance : Float;
 
-	/**
-	 * If set, and gpu update is enabled, clip all instanced behind this distance.
-	 */
-	public var maxDistance : Float = -1;
-
-	public var meshBatchFlags(default, null) : haxe.EnumFlags<MeshBatchFlag>;
-
 	public function new( primitive, ?material, ?parent ) {
 		instanced = new h3d.prim.Instanced();
 		instanced.commands = new h3d.impl.InstanceBuffer();
@@ -135,28 +66,30 @@ class MeshBatch extends MultiMaterial {
 			@:privateAccess p.batchMode = true;
 	}
 
-	function gpuLodEnabled() return meshBatchFlags.has(EnableLod);
-	function gpuCullingEnabled() return meshBatchFlags.has(EnableGpuCulling);
+	/**
+	 * Buffer of per instance params such as position is created as a storage buffer
+	 * allowing for huge amount of instances.
+	 */
+	public function enableStorageBuffer() {
+		meshBatchFlags.set(EnableStorageBuffer);
+	}
+
+	/**
+	 * Buffer of per instance params such as position is created with its own format
+	 * allowing compute shaders to update those parameters.
+	 */
+	public function enableGpuUpdate() {
+		meshBatchFlags.set(EnableGpuUpdate);
+		meshBatchFlags.set(EnableStorageBuffer);
+	}
+
+	function hasOffset() return primitiveSubPart != null;
+	function getPrimitive() return @:privateAccess instanced.primitive;
 	function storageBufferEnabled() return meshBatchFlags.has(EnableStorageBuffer);
-	function mustCalcBufferFormat() return meshBatchFlags.has(EnableGpuUpdate) || gpuCullingEnabled() || gpuLodEnabled();
-
-	public function begin( emitCountTip = -1, ?flags : haxe.EnumFlags<MeshBatchFlag> ) {
-		if ( flags != null ) {
-			#if (!js && !(hldx && !dx12))
-			var allowedLOD = flags.has(EnableLod) && ( primitiveSubPart != null || @:privateAccess instanced.primitive.lodCount() > 1 );
-			flags.setTo(EnableLod, allowedLOD);
-			#else
-			flags.setTo(EnableLod, false);
-			flags.setTo(EnableGpuCulling, false);
-			#end
-			// Set flags non-related to shaders
-			meshBatchFlags.setTo( EnableResizeDown, flags.has(EnableResizeDown) );
-			if ( meshBatchFlags != flags )
-				shadersChanged = true;
-			meshBatchFlags = flags;
-			meshBatchFlags.setTo( EnableStorageBuffer, mustCalcBufferFormat() || storageBufferEnabled() );
-		}
+	function gpuUpdateEnabled() return meshBatchFlags.has(EnableGpuUpdate);
+	function getMaxElements() return storageBufferEnabled() ? MAX_STORAGE_BUFFER_ELEMENTS : MAX_BUFFER_ELEMENTS;
 
+	public function begin( emitCountTip = -1 ) : Int {
 		instanceCount = 0;
 		instanced.initBounds();
 		if( shadersChanged ) {
@@ -176,10 +109,8 @@ class MeshBatch extends MultiMaterial {
 			}
 			p = p.next;
 		}
-		if ( primitiveSubPart != null && ( gpuCullingEnabled() || gpuLodEnabled() ) && instanceOffsetsCpu == null ) {
-			var size = emitCountTip * 2 * 4;
-			instanceOffsetsCpu = haxe.io.Bytes.alloc(size);
-		}
+
+		return emitCountTip;
 	}
 
 	function initShadersMapping() {
@@ -189,8 +120,9 @@ class MeshBatch extends MultiMaterial {
 		for( index in 0...materials.length ) {
 			var mat = materials[index];
 			if( mat == null ) continue;
-			var matCount = @:privateAccess instanced.primitive.getMaterialIndexCount(index);
-			var matStart = @:privateAccess instanced.primitive.getMaterialIndexStart(index);
+			var prim = getPrimitive();
+			var matCount = prim.getMaterialIndexCount(index);
+			var matStart = prim.getMaterialIndexStart(index);
 			for( p in mat.getPasses() ) @:privateAccess {
 				var ctx = scene.renderer.getPassByName(p.name);
 				if( ctx == null ) throw "Could't find renderer pass "+p.name;
@@ -200,11 +132,11 @@ class MeshBatch extends MultiMaterial {
 				var rt = output.compileShaders(scene.ctx.globals, shaders, Default);
 				var shader = output.shaderCache.makeBatchShader(rt, shaders, instancedParams);
 
-				var b = new BatchData();
+				var b = createBatchData();
 				b.indexCount = matCount;
 				b.indexStart = matStart;
 				b.paramsCount = shader.paramsSize;
-				b.maxInstance = Std.int( ( storageBufferEnabled() ? MAX_STORAGE_BUFFER_ELEMENTS : MAX_BUFFER_ELEMENTS ) / b.paramsCount);
+				b.maxInstance = Std.int( getMaxElements() / b.paramsCount);
 				b.bufferFormat = hxd.BufferFormat.VEC4_DATA;
 				if( b.maxInstance <= 0 )
 					throw "Mesh batch shaders needs at least one perInstance parameter";
@@ -216,56 +148,8 @@ class MeshBatch extends MultiMaterial {
 				p.dynamicParameters = true;
 				p.batchMode = true;
 
-				if( mustCalcBufferFormat() ) {
-					var pl = [];
-					var p = b.params;
-					while( p != null ) {
-						pl.push(p);
-						p = p.next;
-					}
-					pl.sort(function(p1,p2) return p1.pos - p2.pos);
-					var fmt : Array<hxd.BufferFormat.BufferInput> = [];
-					var curPos = 0;
-					var paddingIndex = 0;
-					for( p in pl ) {
-						var paddingSize = p.pos - curPos;
-						if ( paddingSize > 0 ) {
-							var paddingType : hxsl.Ast.Type = switch ( paddingSize ) {
-							case 0:
-								TFloat;
-							case 1,2,3:
-								TVec(paddingSize, VFloat);
-							default:
-								throw "Buffer has padding";
-							}
-							var t = hxd.BufferFormat.InputFormat.fromHXSL(paddingType);
-							fmt.push(new hxd.BufferFormat.BufferInput("padding_"+paddingIndex,t));
-							paddingIndex++;
-							curPos = p.pos;
-						}
-						var name = p.name;
-						var prev = fmt.length;
-						switch( p.type ) {
-						case TMat3:
-							for( i in 0...3 )
-								fmt.push(new hxd.BufferFormat.BufferInput(name+"__m"+i,DVec3));
-						case TMat3x4:
-							for( i in 0...3 )
-								fmt.push(new hxd.BufferFormat.BufferInput(name+"__m"+i,DVec4));
-						case TMat4:
-							for( i in 0...4 )
-								fmt.push(new hxd.BufferFormat.BufferInput(name+"__m"+i,DVec4));
-						default:
-							var t = hxd.BufferFormat.InputFormat.fromHXSL(p.type);
-							fmt.push(new hxd.BufferFormat.BufferInput(p.name,t));
-						}
-						for( i in prev...fmt.length )
-							curPos += fmt[i].getBytesSize() >> 2;
-					}
-					if ( curPos & 3 != 0)
-						throw "Buffer has padding";
-					b.bufferFormat = hxd.BufferFormat.make(fmt);
-				}
+				if ( gpuUpdateEnabled() )
+					calcBufferFormat(b);
 
 				b.next = dataPasses;
 				dataPasses = b;
@@ -277,7 +161,7 @@ class MeshBatch extends MultiMaterial {
 				}
 				shader.Batch_UseStorage = storageBufferEnabled();
 				shader.Batch_Count = storageBufferEnabled() ? 0 : b.maxInstance * b.paramsCount;
-				shader.Batch_HasOffset = primitiveSubPart != null || gpuLodEnabled() || gpuCullingEnabled();
+				shader.Batch_HasOffset = hasOffset();
 				shader.constBits = (shader.Batch_Count << 2) | (shader.Batch_UseStorage ? ( 1 << 1 ) : 0) | (shader.Batch_HasOffset ? 1 : 0);
 				shader.updateConstants(null);
 			}
@@ -291,66 +175,68 @@ class MeshBatch extends MultiMaterial {
 		}
 	}
 
-	public function emitInstance() {
-		if( worldPosition == null ) syncPos();
-		var ps = primitiveSubPart;
-		if( ps != null ) @:privateAccess {
-			if(calcBounds) {
-				instanced.tmpBounds.load(primitiveSubPart.bounds);
-				instanced.tmpBounds.transform(worldPosition == null ? absPos : worldPosition);
-				instanced.bounds.add(instanced.tmpBounds);
-			}
-			if ( gpuLodEnabled() || gpuCullingEnabled() ) {
-				if (emittedSubParts == null) {
-					currentSubParts = 0;
-					currentMaterialOffset = 0;
-					emittedSubParts = [ primitiveSubPart.clone() ];
-				} else {
-					var currentIndexStart = emittedSubParts[currentSubParts].indexStart;
-					if ( currentIndexStart != primitiveSubPart.indexStart  ) {
-						currentSubParts = -1;
-						currentIndexStart = primitiveSubPart.indexStart;
-						currentMaterialOffset = 0;
-						for ( i => part in emittedSubParts ) {
-							if ( part.indexStart == currentIndexStart ) {
-								currentSubParts = i;
-								break;
-							}
-							currentMaterialOffset += part.lodIndexCount.length + 1;
-						}
-						if ( currentSubParts < 0 ) {
-							currentSubParts = emittedSubParts.length;
-							emittedSubParts.push( primitiveSubPart.clone() );
-						}
-					}
-				}
-				var maxInstanceID = ( instanceCount + 1 ) * 2;
-				if ( instanceOffsetsCpu.length < maxInstanceID * 4 ) {
-					var next = haxe.io.Bytes.alloc(Std.int(instanceOffsetsCpu.length*3/2));
-					next.blit(0, instanceOffsetsCpu, 0, instanceOffsetsCpu.length);
-					instanceOffsetsCpu = next;
-				}
-				instanceOffsetsCpu.setInt32((instanceCount * 2 + 0) * 4, currentMaterialOffset);
-				instanceOffsetsCpu.setInt32((instanceCount * 2 + 1) * 4, currentSubParts);
-			} else {
-				if( primitiveSubBytes == null ) {
-					primitiveSubBytes = haxe.io.Bytes.alloc(128);
-					instanced.commands = null;
-				}
-				if( primitiveSubBytes.length < (instanceCount+1) * 20 ) {
-					var next = haxe.io.Bytes.alloc(Std.int(primitiveSubBytes.length*3/2));
-					next.blit(0, primitiveSubBytes, 0, instanceCount * 20);
-					primitiveSubBytes = next;
+	function createBatchData() {
+		return new BatchData();
+	}
+
+	function calcBufferFormat(b : BatchData) {
+		var pl = [];
+		var p = b.params;
+		while( p != null ) {
+			pl.push(p);
+			p = p.next;
+		}
+		pl.sort(function(p1,p2) return p1.pos - p2.pos);
+		var fmt : Array<hxd.BufferFormat.BufferInput> = [];
+		var curPos = 0;
+		var paddingIndex = 0;
+		for( p in pl ) {
+			var paddingSize = p.pos - curPos;
+			if ( paddingSize > 0 ) {
+				var paddingType : hxsl.Ast.Type = switch ( paddingSize ) {
+				case 0:
+					TFloat;
+				case 1,2,3:
+					TVec(paddingSize, VFloat);
+				default:
+					throw "Buffer has padding";
 				}
-				var p = instanceCount * 20;
-				primitiveSubBytes.setInt32(p, ps.indexCount);
-				primitiveSubBytes.setInt32(p + 4, 1);
-				primitiveSubBytes.setInt32(p + 8, ps.indexStart);
-				primitiveSubBytes.setInt32(p + 12, ps.baseVertex);
-				primitiveSubBytes.setInt32(p + 16, 0);
+				var t = hxd.BufferFormat.InputFormat.fromHXSL(paddingType);
+				fmt.push(new hxd.BufferFormat.BufferInput("padding_"+paddingIndex,t));
+				paddingIndex++;
+				curPos = p.pos;
+			}
+			var name = p.name;
+			var prev = fmt.length;
+			switch( p.type ) {
+			case TMat3:
+				for( i in 0...3 )
+					fmt.push(new hxd.BufferFormat.BufferInput(name+"__m"+i,DVec3));
+			case TMat3x4:
+				for( i in 0...3 )
+					fmt.push(new hxd.BufferFormat.BufferInput(name+"__m"+i,DVec4));
+			case TMat4:
+				for( i in 0...4 )
+					fmt.push(new hxd.BufferFormat.BufferInput(name+"__m"+i,DVec4));
+			default:
+				var t = hxd.BufferFormat.InputFormat.fromHXSL(p.type);
+				fmt.push(new hxd.BufferFormat.BufferInput(p.name,t));
 			}
-		} else if (calcBounds)
+			for( i in prev...fmt.length )
+				curPos += fmt[i].getBytesSize() >> 2;
+		}
+		if ( curPos & 3 != 0)
+			throw "Buffer has padding";
+		b.bufferFormat = hxd.BufferFormat.make(fmt);
+	}
+
+	public function emitInstance() {
+		if( worldPosition == null ) syncPos();
+		if( primitiveSubPart != null )
+			emitPrimitiveSubPart();
+		else if (calcBounds)
 			instanced.addInstanceBounds(worldPosition == null ? absPos : worldPosition);
+
 		var p = dataPasses;
 		while( p != null ) {
 			syncData(p);
@@ -359,6 +245,30 @@ class MeshBatch extends MultiMaterial {
 		instanceCount++;
 	}
 
+	function emitPrimitiveSubPart() {
+		if(calcBounds) @:privateAccess {
+			instanced.tmpBounds.load(primitiveSubPart.bounds);
+			instanced.tmpBounds.transform(worldPosition == null ? absPos : worldPosition);
+			instanced.bounds.add(instanced.tmpBounds);
+		}
+
+		if( primitiveSubBytes == null ) {
+			primitiveSubBytes = haxe.io.Bytes.alloc(128);
+			instanced.commands = null;
+		}
+		if( primitiveSubBytes.length < (instanceCount+1) * 20 ) {
+			var next = haxe.io.Bytes.alloc(Std.int(primitiveSubBytes.length*3/2));
+			next.blit(0, primitiveSubBytes, 0, instanceCount * 20);
+			primitiveSubBytes = next;
+		}
+		var p = instanceCount * 20;
+		primitiveSubBytes.setInt32(p, primitiveSubPart.indexCount);
+		primitiveSubBytes.setInt32(p + 4, 1);
+		primitiveSubBytes.setInt32(p + 8, primitiveSubPart.indexStart);
+		primitiveSubBytes.setInt32(p + 12, primitiveSubPart.baseVertex);
+		primitiveSubBytes.setInt32(p + 16, 0);
+	}
+
 	override function sync(ctx:RenderContext) {
 		super.sync(ctx);
 		if( instanceCount == 0 ) return;
@@ -368,94 +278,8 @@ class MeshBatch extends MultiMaterial {
 	public function flush() {
 		var p = dataPasses;
 		var alloc = hxd.impl.Allocator.get();
-		var psBytes = primitiveSubBytes;
-
-		var prim = @:privateAccess instanced.primitive;
-		var hmd = Std.downcast(prim, h3d.prim.HMDModel);
-		var materialCount = materials.length;
-		var lodCount = ( gpuLodEnabled() ) ? prim.lodCount() : 1;
-
-		if ( gpuLodEnabled() || gpuCullingEnabled() ) {
-			if ( emittedSubParts != null ) {
-				var upload = needUpload;
-				var vertex = instanceCount * 2;
-				if ( instanceOffsetsGpu == null || instanceOffsetsGpu.isDisposed() || vertex > instanceOffsetsGpu.vertices ) {
-					if ( instanceOffsetsGpu != null)
-						alloc.disposeBuffer( instanceOffsetsGpu );
-					instanceOffsetsGpu = alloc.allocBuffer( vertex, INSTANCE_OFFSETS_FMT, UniformReadWrite );
-					upload = true;
-				}
-				if ( upload )
-					instanceOffsetsGpu.uploadBytes( instanceOffsetsCpu, 0, vertex );
-
-				if ( matInfos == null ) {
-					materialCount = 0;
-					var tmpSubPartInfos = alloc.allocFloats( 2 * emittedSubParts.length );
-					var pos = 0;
-					for ( subPart in emittedSubParts ) {
-						var lodCount = subPart.lodIndexCount.length + 1;
-						tmpSubPartInfos[pos++] = lodCount;
-						tmpSubPartInfos[pos++] = subPart.bounds.dimension() * 0.5;
-						materialCount += lodCount;
-					}
-					subPartsInfos = alloc.ofFloats( tmpSubPartInfos, hxd.BufferFormat.VEC4_DATA, Uniform );
-					alloc.disposeFloats(tmpSubPartInfos);
-
-					var tmpMatInfos = alloc.allocFloats( 4 * ( materialCount + emittedSubParts.length ) );
-					pos = 0;
-					for ( subPart in emittedSubParts ) {
-						var maxLod = subPart.lodIndexCount.length;
-						var lodConfig = subPart.lodConfig;
-						tmpMatInfos[pos++] = subPart.indexCount;
-						tmpMatInfos[pos++] = subPart.indexStart;
-						tmpMatInfos[pos++] = ( 0 < lodConfig.length ) ? lodConfig[0] : 0.0;
-						tmpMatInfos[pos++] = ( maxLod < lodConfig.length && maxLod > 0 ) ? lodConfig[lodConfig.length - 1] : 0.0;
-						for ( i in 0...maxLod ) {
-							tmpMatInfos[pos++] = subPart.lodIndexCount[i];
-							tmpMatInfos[pos++] = subPart.lodIndexStart[i];
-							tmpMatInfos[pos++] = ( i + 1 < lodConfig.length ) ? lodConfig[i + 1] : 0.0;
-							pos++;
-						}
-					}
 
-					matInfos = alloc.ofFloats( tmpMatInfos, hxd.BufferFormat.VEC4_DATA, Uniform );
-					alloc.disposeFloats(tmpMatInfos);
-				}
-			} else if ( matInfos == null ) {
-				if ( gpuLodEnabled() ) {
-					var tmpMatInfos = alloc.allocFloats( 4 * materialCount * lodCount );
-					matInfos = alloc.allocBuffer( materialCount * lodCount, hxd.BufferFormat.VEC4_DATA, Uniform );
-					var lodConfig = hmd.getLodConfig();
-					var startIndex : Int = 0;
-					var lodConfigHasCulling = lodConfig.length > lodCount - 1;
-					var minScreenRatioCulling = lodConfigHasCulling ? lodConfig[lodConfig.length-1] : 0.0;
-					for ( i => lod in @:privateAccess hmd.lods ) {
-						for ( j in 0...materialCount ) {
-							var indexCount = lod.indexCounts[j];
-							var matIndex = i + j * lodCount;
-							tmpMatInfos[matIndex * 4 + 0] = indexCount;
-							tmpMatInfos[matIndex * 4 + 1] = startIndex;
-							tmpMatInfos[matIndex * 4 + 2] = ( i < lodConfig.length ) ? lodConfig[i] : 0.0;
-							tmpMatInfos[matIndex * 4 + 3] = minScreenRatioCulling;
-							startIndex += indexCount;
-						}
-					}
-					matInfos.uploadFloats( tmpMatInfos, 0, materialCount * lodCount );
-					alloc.disposeFloats( tmpMatInfos );
-				} else {
-					var tmpMatInfos = alloc.allocFloats( 4 * materialCount );
-					matInfos = alloc.allocBuffer( materialCount, hxd.BufferFormat.VEC4_DATA, Uniform );
-					var pos : Int = 0;
-					for ( i in 0...materials.length ) {
-						tmpMatInfos[pos++] = prim.getMaterialIndexCount(i);
-						tmpMatInfos[pos++] = prim.getMaterialIndexStart(i);
-						pos += 2;
-					}
-					matInfos.uploadFloats( tmpMatInfos, 0, materialCount );
-					alloc.disposeFloats( tmpMatInfos );
-				}
-			}
-		}
+		var prim = getPrimitive();
 
 		while( p != null ) {
 			var index = 0;
@@ -467,7 +291,7 @@ class MeshBatch extends MultiMaterial {
 				if( count > p.maxInstance )
 					count = p.maxInstance;
 
-				var maxVertexCount = ( mustCalcBufferFormat() ) ? p.maxInstance : ( storageBufferEnabled() ? MAX_STORAGE_BUFFER_ELEMENTS : MAX_BUFFER_ELEMENTS );
+				var maxVertexCount = gpuUpdateEnabled() ? p.maxInstance : getMaxElements();
 				var vertexCount = Std.int( count * (( 4 * p.paramsCount ) / p.bufferFormat.stride) );
 				var vertexCountAllocated = #if js Std.int( MAX_BUFFER_ELEMENTS * 4 / p.bufferFormat.stride ) #else hxd.Math.imin( hxd.Math.nextPOT( vertexCount ), maxVertexCount ) #end;
 
@@ -481,7 +305,7 @@ class MeshBatch extends MultiMaterial {
 				}
 				if( upload )
 					buf.uploadFloats(p.data, start * p.paramsCount * 4, vertexCount);
-				if( psBytes != null ) {
+				if( primitiveSubBytes != null ) {
 					if( p.instanceBuffers == null )
 						p.instanceBuffers = [];
 					var buf = p.instanceBuffers[index];
@@ -491,7 +315,7 @@ class MeshBatch extends MultiMaterial {
 					}
 					if( buf == null ) {
 						buf = new h3d.impl.InstanceBuffer();
-						var sub = psBytes.sub(start*20,count*20);
+						var sub = primitiveSubBytes.sub(start*20,count*20);
 						for( i in 0...count )
 							sub.setInt32(i*20+16, i);
 						buf.setBuffer(count, sub);
@@ -499,87 +323,40 @@ class MeshBatch extends MultiMaterial {
 					}
 				}
 
-				var commandCountAllocated = hxd.Math.imin( hxd.Math.nextPOT( count ), p.maxInstance );
+				onFlushBuffer(p, index, count);
 
-				if ( gpuLodEnabled() || gpuCullingEnabled() ) {
-					if ( p.commandBuffers == null) {
-						p.commandBuffers = [];
-						p.countBuffers = [];
-					}
-					var buf = p.commandBuffers[index];
-					var cbuf = p.countBuffers[index];
-					if ( buf == null ) {
-						buf = alloc.allocBuffer( commandCountAllocated, INDIRECT_DRAW_ARGUMENTS_FMT, UniformReadWrite );
-						cbuf = alloc.allocBuffer( 1, hxd.BufferFormat.VEC4_DATA, UniformReadWrite );
-						p.commandBuffers[index] = buf;
-						p.countBuffers[index] = cbuf;
-					}
-					else if ( buf.vertices < commandCountAllocated ) {
-						alloc.disposeBuffer( buf );
-						buf = alloc.allocBuffer( commandCountAllocated, INDIRECT_DRAW_ARGUMENTS_FMT, UniformReadWrite );
-						p.commandBuffers[index] = buf;
-					}
-				}
 				start += count;
 				index++;
 			}
-			if ( ( gpuLodEnabled() || gpuCullingEnabled() ) ) {
-				var computeShader;
-				if( p.computePass == null ) {
-					computeShader = new h3d.shader.InstanceIndirect();
-					var computePass = new h3d.mat.Pass("batchUpdate");
-					computePass.addShader(computeShader);
-					addComputeShaders(computePass);
-					p.computePass = computePass;
-				} else {
-					computeShader = p.computePass.getShader(h3d.shader.InstanceIndirect);
-				}
 
-				computeShader.ENABLE_LOD = gpuLodEnabled();
-				computeShader.ENABLE_CULLING = gpuCullingEnabled();
-				computeShader.ENABLE_DISTANCE_CLIPPING = maxDistance >= 0;
-				computeShader.radius = prim.getBounds().dimension() * 0.5;
-				computeShader.maxDistance = maxDistance;
-				computeShader.matInfos = matInfos;
-				computeShader.lodCount = lodCount;
-				computeShader.materialCount = materialCount;
-				computeShader.MAX_MATERIAL_COUNT = 16;
-				while ( materialCount * lodCount > computeShader.MAX_MATERIAL_COUNT )
-					computeShader.MAX_MATERIAL_COUNT = computeShader.MAX_MATERIAL_COUNT + 16;
-
-				if ( emittedSubParts != null ) {
-					computeShader.USING_SUB_PART = true;
-					computeShader.subPartCount = emittedSubParts.length;
-					computeShader.subPartInfos = subPartsInfos;
-					computeShader.instanceOffsets = instanceOffsetsGpu;
-					computeShader.MAX_SUB_PART_BUFFER_ELEMENT_COUNT = 16;
-					var maxSubPartsElement = hxd.Math.ceil( emittedSubParts.length / 2 );
-					while ( maxSubPartsElement > computeShader.MAX_SUB_PART_BUFFER_ELEMENT_COUNT )
-						computeShader.MAX_SUB_PART_BUFFER_ELEMENT_COUNT = computeShader.MAX_SUB_PART_BUFFER_ELEMENT_COUNT + 16;
-				}
-			}
+			onFlushPass(p);
+
 			while( p.buffers.length > index )
 				alloc.disposeBuffer( p.buffers.pop() );
 			p = p.next;
 		}
-		if( psBytes != null || gpuLodEnabled() || gpuCullingEnabled() ) {
-			var offsets = @:privateAccess instanced.primitive.resolveBuffer("Batch_Start");
+		if( hasOffset() ) {
+			var offsets = prim.resolveBuffer("Batch_Start");
 			if( offsets == null || offsets.vertices < instanceCount || offsets.isDisposed() ) {
 				if( offsets != null ) {
 					offsets.dispose();
-					@:privateAccess instanced.primitive.removeBuffer(offsets);
+					prim.removeBuffer(offsets);
 				}
 				var tmp = haxe.io.Bytes.alloc(4 * instanceCount);
 				for( i in 0...instanceCount )
 					tmp.setFloat(i<<2, i);
 				offsets = new h3d.Buffer(instanceCount, BATCH_START_FMT);
 				offsets.uploadBytes(tmp,0,instanceCount);
-				@:privateAccess instanced.primitive.addBuffer(offsets);
+				prim.addBuffer(offsets);
 			}
 		}
 		needUpload = false;
 	}
 
+	function onFlushBuffer(p : BatchData, index : Int, count : Int) {}
+
+	function onFlushPass(p : BatchData) {}
+
 	function syncData( batch : BatchData ) {
 
 		var startPos = batch.paramsCount * instanceCount << 2;
@@ -675,54 +452,33 @@ class MeshBatch extends MultiMaterial {
 
 			// check that the pass is still enable
 			var material = materials[p.matIndex];
-			if( material != null && material.getPass(pass.name) != null ) {
-				var emittedCount = 0;
-				for( i => buf in p.buffers ) {
-					ctx.emitPass(pass, this).index = i | (p.matIndex << 16);
-					if ( p.commandBuffers != null && p.commandBuffers.length > 0 ) {
-						var count = hxd.Math.imin( instanceCount - p.maxInstance * i, p.maxInstance);
-						var computeShader = p.computePass.getShader(h3d.shader.InstanceIndirect);
-						if ( gpuCullingEnabled() )
-							computeShader.frustum = ctx.getCameraFrustumBuffer();
-						computeShader.instanceData = buf;
-						computeShader.matIndex = p.matIndex;
-						computeShader.commandBuffer = p.commandBuffers[i];
-						if ( countBytes == null ) {
-							countBytes = haxe.io.Bytes.alloc(4*4);
-							countBytes.setInt32(0, 0);
-						}
-						p.countBuffers[i].uploadBytes(countBytes, 0, 1);
-						computeShader.countBuffer = p.countBuffers[i];
-						computeShader.startInstanceOffset = emittedCount;
-						computeShader.ENABLE_COUNT_BUFFER = isCountBufferAllowed();
-						ctx.computeList(@:privateAccess p.computePass.shaders);
-						ctx.computeDispatch(count);
-						emittedCount += count;
-					}
-				}
-			}
+			if( material != null && material.getPass(pass.name) != null )
+				emitPass(ctx, p);
 			p = p.next;
 		}
 	}
 
+	function emitPass(ctx : RenderContext, p : BatchData) {
+		for( i => buf in p.buffers )
+			ctx.emitPass(p.pass, this).index = i | (p.matIndex << 16);
+	}
+
 	override function draw(ctx:RenderContext) {
 		var p = dataPasses;
 		while( true ) {
 			if( p.pass == ctx.drawPass.pass ) {
 				var bufferIndex = ctx.drawPass.index & 0xFFFF;
+
 				if ( storageBufferEnabled() )
 					p.shader.Batch_StorageBuffer = p.buffers[bufferIndex];
 				else
 					p.shader.Batch_Buffer = p.buffers[bufferIndex];
-				if( p.instanceBuffers == null ) {
-					var count = hxd.Math.imin( instanceCount - p.maxInstance * bufferIndex, p.maxInstance );
-					instanced.setCommand(p.matIndex, instanced.screenRatioToLod(curScreenRatio), count);
-					if ( p.commandBuffers != null && p.commandBuffers.length > 0 ) {
-						@:privateAccess instanced.commands.data = p.commandBuffers[bufferIndex].vbuf;
-						@:privateAccess instanced.commands.countBuffer = p.countBuffers[bufferIndex].vbuf;
-					}
-				} else
+
+				if( p.instanceBuffers == null )
+					setPassCommand(p, bufferIndex);
+				else
 					instanced.commands = p.instanceBuffers[bufferIndex];
+
 				break;
 			}
 			p = p.next;
@@ -734,8 +490,13 @@ class MeshBatch extends MultiMaterial {
 		ctx.drawPass.index = prev;
 	}
 
+	function setPassCommand(p : BatchData, bufferIndex : Int) {
+		var count = hxd.Math.imin( instanceCount - p.maxInstance * bufferIndex, p.maxInstance );
+		instanced.setCommand(p.matIndex, instanced.screenRatioToLod(curScreenRatio), count);
+	}
+
 	override function calcScreenRatio(ctx:RenderContext) {
-		curScreenRatio = @:privateAccess instanced.primitive.getBounds().dimension() / ( 2.0 * hxd.Math.max(lodDistance, 0.0001) );
+		curScreenRatio = getPrimitive().getBounds().dimension() / ( 2.0 * hxd.Math.max(lodDistance, 0.0001) );
 	}
 
 	override function addBoundsRec( b : h3d.col.Bounds, relativeTo: h3d.Matrix ) {
@@ -753,16 +514,6 @@ class MeshBatch extends MultiMaterial {
 			b.addTransform(bounds, relativeTo);
 	}
 
-	function addComputeShaders( pass : h3d.mat.Pass ) {}
-
-	inline function isCountBufferAllowed() {
-		#if hlsdl
-		return h3d.impl.GlDriver.hasMultiIndirectCount;
-		#else
-		return true;
-		#end
-	}
-
 	override function onRemove() {
 		super.onRemove();
 		cleanPasses();
@@ -783,45 +534,76 @@ class MeshBatch extends MultiMaterial {
 	function cleanPasses() {
 		var alloc = hxd.impl.Allocator.get();
 		while( dataPasses != null ) {
-			dataPasses.pass.removeShader(dataPasses.shader);
-			for( b in dataPasses.buffers )
-				alloc.disposeBuffer(b);
-
-			if ( dataPasses.commandBuffers != null && dataPasses.commandBuffers.length > 0 ) {
-				@:privateAccess instanced.commands.data = null;
-				for ( buf in dataPasses.commandBuffers )
-					alloc.disposeBuffer(buf);
-				dataPasses.commandBuffers.resize(0);
-				for ( buf in dataPasses.countBuffers )
-					alloc.disposeBuffer(buf);
-				dataPasses.countBuffers.resize(0);
-				dataPasses.computePass = null;
-			}
-
-			if( dataPasses.instanceBuffers != null ) {
-				for( b in dataPasses.instanceBuffers )
-					b.dispose();
-			}
-			alloc.disposeFloats(dataPasses.data);
+			dataPasses.clean();
 			dataPasses = dataPasses.next;
 		}
-		if ( matInfos != null ) {
-			alloc.disposeBuffer(matInfos);
-			matInfos = null;
-		}
-		if( instanced.commands != null )
-			instanced.commands.dispose();
-
-		if ( subPartsInfos != null )
-			alloc.disposeBuffer(subPartsInfos);
 
-		if ( instanceOffsetsGpu != null )
-			alloc.disposeBuffer(instanceOffsetsGpu);
-		instanceOffsetsCpu = null;
+		if( instanced.commands != null ) {
+			instanced.commands.dispose();
+			@:privateAccess instanced.commands.data = null;
+		}
 
 		primitiveSubBytes = null;
-		emittedSubParts = null;
-		countBytes = null;
 		shadersChanged = true;
 	}
+}
+
+class BatchData {
+
+	public var paramsCount : Int;
+	public var maxInstance : Int;
+	public var matIndex : Int;
+	public var indexCount : Int;
+	public var indexStart : Int;
+	public var instanceBuffers : Array<h3d.impl.InstanceBuffer>;
+	public var buffers : Array<h3d.Buffer> = [];
+	public var bufferFormat : hxd.BufferFormat;
+	public var data : hxd.FloatBuffer;
+	public var params : hxsl.RuntimeShader.AllocParam;
+	public var shader : hxsl.BatchShader;
+	public var shaders : Array<hxsl.Shader>;
+	public var pass : h3d.mat.Pass;
+	public var next : BatchData;
+
+	public function new() {
+	}
+
+	public function clean() {
+		var alloc = hxd.impl.Allocator.get();
+
+		pass.removeShader(shader);
+		for( b in buffers )
+			alloc.disposeBuffer(b);
+
+		if( instanceBuffers != null ) {
+			for( b in instanceBuffers )
+				b.dispose();
+		}
+		alloc.disposeFloats(data);
+	}
+}
+
+class MeshBatchPart {
+	public var indexStart : Int;
+	public var indexCount : Int;
+	// TODO : remove lod here
+	public var lodIndexStart : Array<Int>;
+	public var lodIndexCount : Array<Int>;
+	public var lodConfig : Array<Float>;
+	public var baseVertex : Int;
+	public var bounds : h3d.col.Bounds;
+	public function new() {
+	}
+
+	public function clone() {
+		var cl = new MeshBatchPart();
+		cl.indexStart = indexStart;
+		cl.indexCount = indexCount;
+		cl.lodIndexStart = lodIndexStart;
+		cl.lodIndexCount = lodIndexCount;
+		cl.lodConfig = lodConfig;
+		cl.baseVertex = baseVertex;
+		cl.bounds = bounds;
+		return cl;
+	}
 }