Jelajahi Sumber

Updated meshoptimizer.

Бранимир Караџић 6 tahun lalu
induk
melakukan
52e1fb9d9f

+ 57 - 5
3rdparty/meshoptimizer/CMakeLists.txt

@@ -1,8 +1,10 @@
-project(meshoptimizer)
 cmake_minimum_required(VERSION 3.0)
 
+project(meshoptimizer VERSION 0.12)
+
 option(BUILD_DEMO "Build demo" OFF)
 option(BUILD_TOOLS "Build tools" OFF)
+option(BUILD_SHARED_LIBS "Build shared libraries" OFF)
 
 set(SOURCES
     src/meshoptimizer.h
@@ -13,6 +15,7 @@ set(SOURCES
     src/overdrawanalyzer.cpp
     src/overdrawoptimizer.cpp
     src/simplifier.cpp
+    src/spatialorder.cpp
     src/stripifier.cpp
     src/vcacheanalyzer.cpp
     src/vcacheoptimizer.cpp
@@ -21,15 +24,29 @@ set(SOURCES
     src/vfetchoptimizer.cpp
 )
 
-add_library(meshoptimizer STATIC ${SOURCES})
-target_include_directories(meshoptimizer INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/src")
+add_library(meshoptimizer ${SOURCES})
+target_include_directories(meshoptimizer INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src>")
 
 if(MSVC)
-	target_compile_options(meshoptimizer PRIVATE /W4 /WX)
+    target_compile_options(meshoptimizer PRIVATE /W4 /WX)
 else()
-	target_compile_options(meshoptimizer PRIVATE -Wall -Wextra -Werror)
+    target_compile_options(meshoptimizer PRIVATE -Wall -Wextra -Werror)
+endif()
+
+if(BUILD_SHARED_LIBS)
+    set_target_properties(meshoptimizer PROPERTIES CXX_VISIBILITY_PRESET hidden)
+    set_target_properties(meshoptimizer PROPERTIES VISIBILITY_INLINES_HIDDEN ON)
+
+    if(WIN32)
+        target_compile_definitions(meshoptimizer INTERFACE "MESHOPTIMIZER_API=__declspec(dllimport)")
+        target_compile_definitions(meshoptimizer PRIVATE "MESHOPTIMIZER_API=__declspec(dllexport)")
+    else()
+        target_compile_definitions(meshoptimizer PUBLIC "MESHOPTIMIZER_API=__attribute__((visibility(\"default\")))")
+    endif()
 endif()
 
+set(TARGETS meshoptimizer)
+
 if(BUILD_DEMO)
     add_executable(demo demo/main.cpp demo/miniz.cpp demo/tests.cpp tools/meshloader.cpp)
     target_link_libraries(demo meshoptimizer)
@@ -38,4 +55,39 @@ endif()
 if(BUILD_TOOLS)
     add_executable(gltfpack tools/gltfpack.cpp tools/meshloader.cpp)
     target_link_libraries(gltfpack meshoptimizer)
+    list(APPEND TARGETS gltfpack)
+
+    set_target_properties(gltfpack PROPERTIES INSTALL_RPATH "$ORIGIN/../lib")
+endif()
+
+install(TARGETS ${TARGETS} EXPORT meshoptimizerTargets
+    RUNTIME DESTINATION bin
+    LIBRARY DESTINATION lib
+    ARCHIVE DESTINATION lib
+    INCLUDES DESTINATION include)
+
+install(FILES src/meshoptimizer.h DESTINATION include)
+install(EXPORT meshoptimizerTargets DESTINATION lib/cmake/meshoptimizer NAMESPACE meshoptimizer::)
+
+# TARGET_PDB_FILE is available since 3.1
+if(MSVC AND NOT (CMAKE_VERSION VERSION_LESS "3.1"))
+    foreach(TARGET ${TARGETS})
+        get_target_property(TARGET_TYPE ${TARGET} TYPE)
+        if(NOT ${TARGET_TYPE} STREQUAL "STATIC_LIBRARY")
+            install(FILES $<TARGET_PDB_FILE:${TARGET}> DESTINATION bin OPTIONAL)
+        endif()
+    endforeach(TARGET)
 endif()
+
+include(CMakePackageConfigHelpers)
+
+configure_package_config_file(config.cmake.in
+    ${CMAKE_CURRENT_BINARY_DIR}/meshoptimizerConfig.cmake
+    INSTALL_DESTINATION lib/cmake/meshoptimizer NO_SET_AND_CHECK_MACRO)
+
+write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/meshoptimizerConfigVersion.cmake COMPATIBILITY ExactVersion)
+
+install(FILES
+    ${CMAKE_CURRENT_BINARY_DIR}/meshoptimizerConfig.cmake
+    ${CMAKE_CURRENT_BINARY_DIR}/meshoptimizerConfigVersion.cmake
+    DESTINATION lib/cmake/meshoptimizer)

+ 4 - 0
3rdparty/meshoptimizer/config.cmake.in

@@ -0,0 +1,4 @@
+@PACKAGE_INIT@
+
+include("${CMAKE_CURRENT_LIST_DIR}/meshoptimizerTargets.cmake")
+check_required_components(meshoptimizer)

+ 74 - 3
3rdparty/meshoptimizer/demo/main.cpp

@@ -771,6 +771,68 @@ void meshlets(const Mesh& mesh)
 	       (endc - startc) * 1000);
 }
 
+void spatialSort(const Mesh& mesh)
+{
+	typedef PackedVertexOct PV;
+
+	std::vector<PV> pv(mesh.vertices.size());
+	packMesh(pv, mesh.vertices);
+
+	double start = timestamp();
+
+	std::vector<unsigned int> remap(mesh.vertices.size());
+	meshopt_spatialSortRemap(&remap[0], &mesh.vertices[0].px, mesh.vertices.size(), sizeof(Vertex));
+
+	double end = timestamp();
+
+	meshopt_remapVertexBuffer(&pv[0], &pv[0], mesh.vertices.size(), sizeof(PV), &remap[0]);
+
+	std::vector<unsigned char> vbuf(meshopt_encodeVertexBufferBound(mesh.vertices.size(), sizeof(PV)));
+	vbuf.resize(meshopt_encodeVertexBuffer(&vbuf[0], vbuf.size(), &pv[0], mesh.vertices.size(), sizeof(PV)));
+
+	size_t csize = compress(vbuf);
+
+	printf("Spatial  : %.1f bits/vertex (post-deflate %.1f bits/vertex); sort %.2f msec\n",
+	       double(vbuf.size() * 8) / double(mesh.vertices.size()),
+	       double(csize * 8) / double(mesh.vertices.size()),
+	       (end - start) * 1000);
+}
+
+void spatialSortTriangles(const Mesh& mesh)
+{
+	typedef PackedVertexOct PV;
+
+	Mesh copy = mesh;
+
+	double start = timestamp();
+
+	meshopt_spatialSortTriangles(&copy.indices[0], &copy.indices[0], mesh.indices.size(), &copy.vertices[0].px, copy.vertices.size(), sizeof(Vertex));
+
+	double end = timestamp();
+
+	meshopt_optimizeVertexCache(&copy.indices[0], &copy.indices[0], copy.indices.size(), copy.vertices.size());
+	meshopt_optimizeVertexFetch(&copy.vertices[0], &copy.indices[0], copy.indices.size(), &copy.vertices[0], copy.vertices.size(), sizeof(Vertex));
+
+	std::vector<PV> pv(mesh.vertices.size());
+	packMesh(pv, copy.vertices);
+
+	std::vector<unsigned char> vbuf(meshopt_encodeVertexBufferBound(mesh.vertices.size(), sizeof(PV)));
+	vbuf.resize(meshopt_encodeVertexBuffer(&vbuf[0], vbuf.size(), &pv[0], mesh.vertices.size(), sizeof(PV)));
+
+	std::vector<unsigned char> ibuf(meshopt_encodeIndexBufferBound(mesh.indices.size(), mesh.vertices.size()));
+	ibuf.resize(meshopt_encodeIndexBuffer(&ibuf[0], ibuf.size(), &copy.indices[0], mesh.indices.size()));
+
+	size_t csizev = compress(vbuf);
+	size_t csizei = compress(ibuf);
+
+	printf("SpatialT : %.1f bits/vertex (post-deflate %.1f bits/vertex); %.1f bits/triangle (post-deflate %.1f bits/triangle); sort %.2f msec\n",
+	       double(vbuf.size() * 8) / double(mesh.vertices.size()),
+	       double(csizev * 8) / double(mesh.vertices.size()),
+	       double(ibuf.size() * 8) / double(mesh.indices.size() / 3),
+	       double(csizei * 8) / double(mesh.indices.size() / 3),
+	       (end - start) * 1000);
+}
+
 bool loadMesh(Mesh& mesh, const char* path)
 {
 	double start = timestamp();
@@ -924,6 +986,9 @@ void process(const char* path)
 	simplifySloppy(mesh);
 	simplifyComplete(mesh);
 
+	spatialSort(mesh);
+	spatialSortTriangles(mesh);
+
 	if (path)
 		processDeinterleaved(path);
 }
@@ -934,9 +999,15 @@ void processDev(const char* path)
 	if (!loadMesh(mesh, path))
 		return;
 
-	simplifySloppy(mesh, 0.5f);
-	simplifySloppy(mesh, 0.1f);
-	simplifySloppy(mesh, 0.01f);
+	Mesh copy = mesh;
+	meshopt_optimizeVertexCache(&copy.indices[0], &copy.indices[0], copy.indices.size(), copy.vertices.size());
+	meshopt_optimizeVertexFetch(&copy.vertices[0], &copy.indices[0], copy.indices.size(), &copy.vertices[0], copy.vertices.size(), sizeof(Vertex));
+
+	encodeIndex(copy);
+	encodeVertex<PackedVertexOct>(copy, "O");
+
+	spatialSort(mesh);
+	spatialSortTriangles(mesh);
 }
 
 int main(int argc, char** argv)

+ 28 - 0
3rdparty/meshoptimizer/src/meshoptimizer.h

@@ -342,6 +342,25 @@ struct meshopt_Bounds
 MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
 MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeMeshletBounds(const struct meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
 
+/**
+ * Experimental: Spatial sorter
+ * Generates a remap table that can be used to reorder points for spatial locality.
+ * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer.
+ *
+ * destination must contain enough space for the resulting remap table (vertex_count elements)
+ */
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+
+/**
+ * Experimental: Spatial sorter
+ * Reorders triangles for spatial locality, and generates a new index buffer. The resulting index buffer can be used with other functions like optimizeVertexCache.
+ *
+ * destination must contain enough space for the resulting index buffer (index_count elements)
+ * indices must contain index data that is the result of meshopt_optimizeVertexCache (*not* the original mesh indices!)
+ * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+ */
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+
 /**
  * Set allocation callbacks
  * These callbacks will be used instead of the default operator new/operator delete for all temporary allocations in the library.
@@ -620,6 +639,15 @@ inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t inde
 
 	return meshopt_computeClusterBounds(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
 }
+
+template <typename T>
+inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+	meshopt_IndexAdapter<T> in(0, indices, index_count);
+	meshopt_IndexAdapter<T> out(destination, 0, index_count);
+
+	meshopt_spatialSortTriangles(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
+}
 #endif
 
 /* Inline implementation */

+ 194 - 0
3rdparty/meshoptimizer/src/spatialorder.cpp

@@ -0,0 +1,194 @@
+// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
+#include "meshoptimizer.h"
+
+#include <assert.h>
+#include <float.h>
+#include <string.h>
+
+// This work is based on:
+// Fabian Giesen. Decoding Morton codes. 2009
+namespace meshopt
+{
+
+// "Insert" two 0 bits after each of the 10 low bits of x
+inline unsigned int part1By2(unsigned int x)
+{
+	x &= 0x000003ff;                  // x = ---- ---- ---- ---- ---- --98 7654 3210
+	x = (x ^ (x << 16)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210
+	x = (x ^ (x << 8)) & 0x0300f00f;  // x = ---- --98 ---- ---- 7654 ---- ---- 3210
+	x = (x ^ (x << 4)) & 0x030c30c3;  // x = ---- --98 ---- 76-- --54 ---- 32-- --10
+	x = (x ^ (x << 2)) & 0x09249249;  // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
+	return x;
+}
+
+static void computeOrder(unsigned int* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride)
+{
+	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+
+	float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX};
+	float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
+
+	for (size_t i = 0; i < vertex_count; ++i)
+	{
+		const float* v = vertex_positions_data + i * vertex_stride_float;
+
+		for (int j = 0; j < 3; ++j)
+		{
+			float vj = v[j];
+
+			minv[j] = minv[j] > vj ? vj : minv[j];
+			maxv[j] = maxv[j] < vj ? vj : maxv[j];
+		}
+	}
+
+	float extent = 0.f;
+
+	extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]);
+	extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]);
+	extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]);
+
+	float scale = extent == 0 ? 0.f : 1.f / extent;
+
+	// generate Morton order based on the position inside a unit cube
+	for (size_t i = 0; i < vertex_count; ++i)
+	{
+		const float* v = vertex_positions_data + i * vertex_stride_float;
+
+		int x = int((v[0] - minv[0]) * scale * 1023.f + 0.5f);
+		int y = int((v[1] - minv[1]) * scale * 1023.f + 0.5f);
+		int z = int((v[2] - minv[2]) * scale * 1023.f + 0.5f);
+
+		result[i] = part1By2(x) | (part1By2(y) << 1) | (part1By2(z) << 2);
+	}
+}
+
+static void computeHistogram(unsigned int (&hist)[1024][3], const unsigned int* data, size_t count)
+{
+	memset(hist, 0, sizeof(hist));
+
+	// compute 3 10-bit histograms in parallel
+	for (size_t i = 0; i < count; ++i)
+	{
+		unsigned int id = data[i];
+
+		hist[(id >> 0) & 1023][0]++;
+		hist[(id >> 10) & 1023][1]++;
+		hist[(id >> 20) & 1023][2]++;
+	}
+
+	unsigned int sumx = 0, sumy = 0, sumz = 0;
+
+	// replace histogram data with prefix histogram sums in-place
+	for (int i = 0; i < 1024; ++i)
+	{
+		unsigned int hx = hist[i][0], hy = hist[i][1], hz = hist[i][2];
+
+		hist[i][0] = sumx;
+		hist[i][1] = sumy;
+		hist[i][2] = sumz;
+
+		sumx += hx;
+		sumy += hy;
+		sumz += hz;
+	}
+
+	assert(sumx == count && sumy == count && sumz == count);
+}
+
+static void radixPass(unsigned int* destination, const unsigned int* source, const unsigned int* keys, size_t count, unsigned int (&hist)[1024][3], int pass)
+{
+	int bitoff = pass * 10;
+
+	for (size_t i = 0; i < count; ++i)
+	{
+		unsigned int id = (keys[source[i]] >> bitoff) & 1023;
+
+		destination[hist[id][pass]++] = source[i];
+	}
+}
+
+} // namespace meshopt
+
+void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+	using namespace meshopt;
+
+	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+	assert(vertex_positions_stride % sizeof(float) == 0);
+
+	meshopt_Allocator allocator;
+
+	unsigned int* keys = allocator.allocate<unsigned int>(vertex_count);
+	computeOrder(keys, vertex_positions, vertex_count, vertex_positions_stride);
+
+	unsigned int hist[1024][3];
+	computeHistogram(hist, keys, vertex_count);
+
+	unsigned int* scratch = allocator.allocate<unsigned int>(vertex_count);
+
+	for (size_t i = 0; i < vertex_count; ++i)
+		destination[i] = unsigned(i);
+
+	// 3-pass radix sort computes the resulting order into scratch
+	radixPass(scratch, destination, keys, vertex_count, hist, 0);
+	radixPass(destination, scratch, keys, vertex_count, hist, 1);
+	radixPass(scratch, destination, keys, vertex_count, hist, 2);
+
+	// since our remap table is mapping old=>new, we need to reverse it
+	for (size_t i = 0; i < vertex_count; ++i)
+		destination[scratch[i]] = unsigned(i);
+}
+
+void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+	using namespace meshopt;
+
+	assert(index_count % 3 == 0);
+	assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+	assert(vertex_positions_stride % sizeof(float) == 0);
+
+	(void)vertex_count;
+
+	size_t face_count = index_count / 3;
+	size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+
+	meshopt_Allocator allocator;
+
+	float* centroids = allocator.allocate<float>(face_count * 3);
+
+	for (size_t i = 0; i < face_count; ++i)
+	{
+		unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+		assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+		const float* va = vertex_positions + a * vertex_stride_float;
+		const float* vb = vertex_positions + b * vertex_stride_float;
+		const float* vc = vertex_positions + c * vertex_stride_float;
+
+		centroids[i * 3 + 0] = (va[0] + vb[0] + vc[0]) / 3.f;
+		centroids[i * 3 + 1] = (va[1] + vb[1] + vc[1]) / 3.f;
+		centroids[i * 3 + 2] = (va[2] + vb[2] + vc[2]) / 3.f;
+	}
+
+	unsigned int* remap = allocator.allocate<unsigned int>(face_count);
+
+	meshopt_spatialSortRemap(remap, centroids, face_count, sizeof(float) * 3);
+
+	// support in-order remap
+	if (destination == indices)
+	{
+		unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
+		memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
+		indices = indices_copy;
+	}
+
+	for (size_t i = 0; i < face_count; ++i)
+	{
+		unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+		unsigned int r = remap[i];
+
+		destination[r * 3 + 0] = a;
+		destination[r * 3 + 1] = b;
+		destination[r * 3 + 2] = c;
+	}
+}

+ 117 - 36
3rdparty/meshoptimizer/tools/gltfpack.cpp

@@ -58,6 +58,8 @@ struct Mesh
 	cgltf_material* material;
 	cgltf_skin* skin;
 
+	cgltf_primitive_type type;
+
 	std::vector<Stream> streams;
 	std::vector<unsigned int> indices;
 
@@ -237,12 +239,18 @@ void parseMeshesGltf(cgltf_data* data, std::vector<Mesh>& meshes)
 		{
 			const cgltf_primitive& primitive = mesh.primitives[pi];
 
-			if (primitive.type != cgltf_primitive_type_triangles)
+			if (primitive.type != cgltf_primitive_type_triangles && primitive.type != cgltf_primitive_type_points)
 			{
 				fprintf(stderr, "Warning: ignoring primitive %d of mesh %d because type %d is not supported\n", int(pi), mesh_id, primitive.type);
 				continue;
 			}
 
+			if (primitive.type == cgltf_primitive_type_points && primitive.indices)
+			{
+				fprintf(stderr, "Warning: ignoring primitive %d of mesh %d because indexed points are not supported\n", int(pi), mesh_id);
+				continue;
+			}
+
 			Mesh result;
 
 			result.node = &node;
@@ -250,13 +258,15 @@ void parseMeshesGltf(cgltf_data* data, std::vector<Mesh>& meshes)
 			result.material = primitive.material;
 			result.skin = node.skin;
 
+			result.type = primitive.type;
+
 			if (primitive.indices)
 			{
 				result.indices.resize(primitive.indices->count);
 				for (size_t i = 0; i < primitive.indices->count; ++i)
 					result.indices[i] = unsigned(cgltf_accessor_read_index(primitive.indices, i));
 			}
-			else
+			else if (primitive.type != cgltf_primitive_type_points)
 			{
 				size_t count = primitive.attributes ? primitive.attributes[0].data->count : 0;
 
@@ -431,6 +441,8 @@ void parseMeshesObj(fastObjMesh* obj, cgltf_data* data, std::vector<Mesh>& meshe
 			mesh.material = &data->materials[mi];
 		}
 
+		mesh.type = cgltf_primitive_type_triangles;
+
 		mesh.streams.resize(3);
 		mesh.streams[0].type = cgltf_attribute_type_position;
 		mesh.streams[0].data.resize(vertex_count[mi]);
@@ -601,9 +613,6 @@ void mergeMeshMaterials(cgltf_data* data, std::vector<Mesh>& meshes)
 	{
 		Mesh& mesh = meshes[i];
 
-		if (mesh.indices.empty())
-			continue;
-
 		if (!mesh.material)
 			continue;
 
@@ -653,6 +662,9 @@ bool canMergeMeshes(const Mesh& lhs, const Mesh& rhs, const Settings& settings)
 	if (lhs.skin != rhs.skin)
 		return false;
 
+	if (lhs.type != rhs.type)
+		return false;
+
 	if (lhs.targets != rhs.targets)
 		return false;
 
@@ -663,6 +675,9 @@ bool canMergeMeshes(const Mesh& lhs, const Mesh& rhs, const Settings& settings)
 		if (lhs.weights[i] != rhs.weights[i])
 			return false;
 
+	if (lhs.indices.empty() != rhs.indices.empty())
+		return false;
+
 	if (lhs.streams.size() != rhs.streams.size())
 		return false;
 
@@ -693,25 +708,65 @@ void mergeMeshes(Mesh& target, const Mesh& mesh)
 
 void mergeMeshes(std::vector<Mesh>& meshes, const Settings& settings)
 {
+	size_t write = 0;
+
 	for (size_t i = 0; i < meshes.size(); ++i)
 	{
-		Mesh& mesh = meshes[i];
+		if (meshes[i].streams.empty())
+			continue;
+
+		Mesh& target = meshes[write];
 
-		for (size_t j = 0; j < i; ++j)
+		if (i != write)
 		{
-			Mesh& target = meshes[j];
+			Mesh& mesh = meshes[i];
 
-			if (target.indices.size() && canMergeMeshes(mesh, target, settings))
+			// note: this copy is expensive; we could use move in C++11 or swap manually which is a bit painful...
+			target = mesh;
+
+			mesh.streams.clear();
+			mesh.indices.clear();
+		}
+
+		size_t target_vertices = target.streams[0].data.size();
+		size_t target_indices = target.indices.size();
+
+		for (size_t j = i + 1; j < meshes.size(); ++j)
+		{
+			Mesh& mesh = meshes[j];
+
+			if (!mesh.streams.empty() && canMergeMeshes(target, mesh, settings))
+			{
+				target_vertices += mesh.streams[0].data.size();
+				target_indices += mesh.indices.size();
+			}
+		}
+
+		for (size_t j = 0; j < target.streams.size(); ++j)
+			target.streams[j].data.reserve(target_vertices);
+
+		target.indices.reserve(target_indices);
+
+		for (size_t j = i + 1; j < meshes.size(); ++j)
+		{
+			Mesh& mesh = meshes[j];
+
+			if (!mesh.streams.empty() && canMergeMeshes(target, mesh, settings))
 			{
 				mergeMeshes(target, mesh);
 
 				mesh.streams.clear();
 				mesh.indices.clear();
-
-				break;
 			}
 		}
+
+		assert(target.streams[0].data.size() == target_vertices);
+		assert(target.indices.size() == target_indices);
+
+		write++;
 	}
+
+	meshes.resize(write);
 }
 
 void reindexMesh(Mesh& mesh)
@@ -764,6 +819,33 @@ void optimizeMesh(Mesh& mesh)
 		meshopt_remapVertexBuffer(&mesh.streams[i].data[0], &mesh.streams[i].data[0], vertex_count, sizeof(Attr), &remap[0]);
 }
 
+void sortPointMesh(Mesh& mesh)
+{
+	size_t positions = 0;
+
+	for (size_t i = 0; i < mesh.streams.size(); ++i)
+		if (mesh.streams[i].type == cgltf_attribute_type_position)
+		{
+			positions = i;
+			break;
+		}
+
+	assert(mesh.streams[positions].type == cgltf_attribute_type_position);
+	assert(mesh.indices.empty());
+
+	size_t total_vertices = mesh.streams[positions].data.size();
+
+	std::vector<unsigned int> remap(total_vertices);
+	meshopt_spatialSortRemap(&remap[0], mesh.streams[positions].data[0].f, total_vertices, sizeof(Attr));
+
+	for (size_t i = 0; i < mesh.streams.size(); ++i)
+	{
+		assert(mesh.streams[i].data.size() == total_vertices);
+
+		meshopt_remapVertexBuffer(&mesh.streams[i].data[0], &mesh.streams[i].data[0], total_vertices, sizeof(Attr), &remap[0]);
+	}
+}
+
 bool getAttributeBounds(const std::vector<Mesh>& meshes, cgltf_attribute_type type, Attr& min, Attr& max)
 {
 	min.f[0] = min.f[1] = min.f[2] = min.f[3] = +FLT_MAX;
@@ -2070,9 +2152,6 @@ void markNeededNodes(cgltf_data* data, std::vector<NodeInfo>& nodes, const std::
 	{
 		const Mesh& mesh = meshes[i];
 
-		if (mesh.indices.empty())
-			continue;
-
 		if (mesh.node)
 		{
 			NodeInfo& ni = nodes[mesh.node - data->nodes];
@@ -2114,9 +2193,6 @@ void markNeededMaterials(cgltf_data* data, std::vector<MaterialInfo>& materials,
 	{
 		const Mesh& mesh = meshes[i];
 
-		if (mesh.indices.empty())
-			continue;
-
 		if (mesh.material)
 		{
 			MaterialInfo& mi = materials[mesh.material - data->materials];
@@ -2781,11 +2857,20 @@ void process(cgltf_data* data, std::vector<Mesh>& meshes, const Settings& settin
 	{
 		Mesh& mesh = meshes[i];
 
-		if (mesh.indices.empty())
-			continue;
+		switch (mesh.type)
+		{
+		case cgltf_primitive_type_points:
+			sortPointMesh(mesh);
+			break;
 
-		reindexMesh(mesh);
-		optimizeMesh(mesh);
+		case cgltf_primitive_type_triangles:
+			reindexMesh(mesh);
+			optimizeMesh(mesh);
+			break;
+
+		default:
+			assert(!"Unknown primitive type");
+		}
 	}
 
 	if (settings.verbose)
@@ -2904,9 +2989,6 @@ void process(cgltf_data* data, std::vector<Mesh>& meshes, const Settings& settin
 	{
 		const Mesh& mesh = meshes[i];
 
-		if (mesh.indices.empty())
-			continue;
-
 		comma(json_meshes);
 		append(json_meshes, "{\"primitives\":[");
 
@@ -2915,9 +2997,6 @@ void process(cgltf_data* data, std::vector<Mesh>& meshes, const Settings& settin
 		{
 			const Mesh& prim = meshes[pi];
 
-			if (prim.indices.empty())
-				continue;
-
 			if (prim.node != mesh.node || prim.skin != mesh.skin || prim.targets != mesh.targets)
 				break;
 
@@ -2928,6 +3007,8 @@ void process(cgltf_data* data, std::vector<Mesh>& meshes, const Settings& settin
 			append(json_meshes, "{\"attributes\":{");
 			writeMeshAttributes(json_meshes, views, json_accessors, accr_offset, prim, 0, qp, settings);
 			append(json_meshes, "}");
+			append(json_meshes, ",\"mode\":");
+			append(json_meshes, size_t(prim.type));
 
 			if (mesh.targets)
 			{
@@ -2942,10 +3023,14 @@ void process(cgltf_data* data, std::vector<Mesh>& meshes, const Settings& settin
 				append(json_meshes, "]");
 			}
 
-			size_t index_accr = writeMeshIndices(views, json_accessors, accr_offset, prim, settings);
+			if (!prim.indices.empty())
+			{
+				size_t index_accr = writeMeshIndices(views, json_accessors, accr_offset, prim, settings);
+
+				append(json_meshes, ",\"indices\":");
+				append(json_meshes, index_accr);
+			}
 
-			append(json_meshes, ",\"indices\":");
-			append(json_meshes, index_accr);
 			if (prim.material)
 			{
 				MaterialInfo& mi = materials[prim.material - data->materials];
@@ -2954,6 +3039,7 @@ void process(cgltf_data* data, std::vector<Mesh>& meshes, const Settings& settin
 				append(json_meshes, ",\"material\":");
 				append(json_meshes, size_t(mi.remap));
 			}
+
 			append(json_meshes, "}");
 		}
 
@@ -3226,12 +3312,7 @@ void process(cgltf_data* data, std::vector<Mesh>& meshes, const Settings& settin
 
 	if (settings.verbose)
 	{
-		size_t primitives = 0;
-
-		for (size_t i = 0; i < meshes.size(); ++i)
-			primitives += !meshes[i].indices.empty();
-
-		printf("output: %d nodes, %d meshes (%d primitives), %d materials\n", int(node_offset), int(mesh_offset), int(primitives), int(material_offset));
+		printf("output: %d nodes, %d meshes (%d primitives), %d materials\n", int(node_offset), int(mesh_offset), int(meshes.size()), int(material_offset));
 		printf("output: JSON %d bytes, buffers %d bytes\n", int(json.size()), int(bin.size()));
 		printf("output: buffers: vertex %d bytes, index %d bytes, skin %d bytes, time %d bytes, keyframe %d bytes, image %d bytes\n",
 		       int(bytes[BufferView::Kind_Vertex]), int(bytes[BufferView::Kind_Index]), int(bytes[BufferView::Kind_Skin]),