Browse Source

Implement Particle Trails

-Enable the trails and set the length in seconds
-Provide a mesh with a skeleton and a skin
-Or, alternatively use one of the built-in TubeTrailMesh/RibbonTrailMesh
-Works deterministically
-Fixed particle collisions (were broken)
-Not working in 2D yet (that will happen next)
reduz 4 years ago
parent
commit
90056460ad
33 changed files with 1572 additions and 176 deletions
  1. 7 0
      core/io/resource.cpp
  2. 2 0
      core/io/resource.h
  3. 13 0
      editor/scene_tree_dock.cpp
  4. 3 0
      editor/scene_tree_dock.h
  5. 4 1
      editor/scene_tree_editor.cpp
  6. 2 0
      editor/scene_tree_editor.h
  7. 165 5
      scene/3d/gpu_particles_3d.cpp
  8. 31 0
      scene/3d/gpu_particles_3d.h
  9. 2 0
      scene/register_scene_types.cpp
  10. 2 2
      scene/resources/curve.cpp
  11. 1 1
      scene/resources/curve.h
  12. 10 0
      scene/resources/material.cpp
  13. 5 5
      scene/resources/material.h
  14. 7 0
      scene/resources/mesh.cpp
  15. 3 0
      scene/resources/mesh.h
  16. 551 0
      scene/resources/primitive_meshes.cpp
  17. 94 0
      scene/resources/primitive_meshes.h
  18. 13 21
      servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp
  19. 5 3
      servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h
  20. 3 0
      servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp
  21. 1 0
      servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h
  22. 331 71
      servers/rendering/renderer_rd/renderer_storage_rd.cpp
  23. 50 6
      servers/rendering/renderer_rd/renderer_storage_rd.h
  24. 64 19
      servers/rendering/renderer_rd/shaders/particles.glsl
  25. 96 5
      servers/rendering/renderer_rd/shaders/particles_copy.glsl
  26. 67 30
      servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl
  27. 3 4
      servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl
  28. 4 1
      servers/rendering/renderer_scene_cull.cpp
  29. 2 0
      servers/rendering/renderer_scene_cull.h
  30. 9 1
      servers/rendering/renderer_storage.h
  31. 6 0
      servers/rendering/rendering_server_default.h
  32. 2 1
      servers/rendering/shader_types.cpp
  33. 14 0
      servers/rendering_server.h

+ 7 - 0
core/io/resource.cpp

@@ -110,6 +110,12 @@ String Resource::get_name() const {
 	return name;
 }
 
+void Resource::update_configuration_warning() {
+	if (_update_configuration_warning) {
+		_update_configuration_warning();
+	}
+}
+
 bool Resource::editor_can_reload_from_file() {
 	return true; //by default yes
 }
@@ -320,6 +326,7 @@ void Resource::setup_local_to_scene() {
 }
 
 Node *(*Resource::_get_local_scene_func)() = nullptr;
+void (*Resource::_update_configuration_warning)() = nullptr;
 
 void Resource::set_as_translation_remapped(bool p_remapped) {
 	if (remapped_list.in_list() == p_remapped) {

+ 2 - 0
core/io/resource.h

@@ -88,7 +88,9 @@ protected:
 
 public:
 	static Node *(*_get_local_scene_func)(); //used by editor
+	static void (*_update_configuration_warning)(); //used by editor
 
+	void update_configuration_warning();
 	virtual bool editor_can_reload_from_file();
 	virtual void reset_state(); //for resources that use variable amount of properties, either via _validate_property or _get_property_list, this function needs to be implemented to correctly clear state
 	virtual Error copy_from(const Ref<Resource> &p_resource);

+ 13 - 0
editor/scene_tree_dock.cpp

@@ -33,6 +33,7 @@
 #include "core/config/project_settings.h"
 #include "core/input/input.h"
 #include "core/io/resource_saver.h"
+#include "core/object/message_queue.h"
 #include "core/os/keyboard.h"
 #include "editor/debugger/editor_debugger_node.h"
 #include "editor/editor_feature_profile.h"
@@ -3016,7 +3017,16 @@ void SceneTreeDock::_bind_methods() {
 	ADD_SIGNAL(MethodInfo("node_created", PropertyInfo(Variant::OBJECT, "node", PROPERTY_HINT_RESOURCE_TYPE, "Node")));
 }
 
+SceneTreeDock *SceneTreeDock::singleton = nullptr;
+
+void SceneTreeDock::_update_configuration_warning() {
+	if (singleton) {
+		MessageQueue::get_singleton()->push_callable(callable_mp(singleton->scene_tree, &SceneTreeEditor::update_warning));
+	}
+}
+
 SceneTreeDock::SceneTreeDock(EditorNode *p_editor, Node *p_scene_root, EditorSelection *p_editor_selection, EditorData &p_editor_data) {
+	singleton = this;
 	set_name("Scene");
 	editor = p_editor;
 	edited_scene = nullptr;
@@ -3207,9 +3217,12 @@ SceneTreeDock::SceneTreeDock(EditorNode *p_editor, Node *p_scene_root, EditorSel
 	EDITOR_DEF("interface/editors/show_scene_tree_root_selection", true);
 	EDITOR_DEF("interface/editors/derive_script_globals_by_name", true);
 	EDITOR_DEF("_use_favorites_root_selection", false);
+
+	Resource::_update_configuration_warning = _update_configuration_warning;
 }
 
 SceneTreeDock::~SceneTreeDock() {
+	singleton = nullptr;
 	if (!node_clipboard.is_empty()) {
 		_clear_clipboard();
 	}

+ 3 - 0
editor/scene_tree_dock.h

@@ -244,6 +244,9 @@ class SceneTreeDock : public VBoxContainer {
 	bool profile_allow_editing;
 	bool profile_allow_script_editing;
 
+	static SceneTreeDock *singleton;
+	static void _update_configuration_warning();
+
 protected:
 	void _notification(int p_what);
 	static void _bind_methods();

+ 4 - 1
editor/scene_tree_editor.cpp

@@ -665,7 +665,7 @@ void SceneTreeEditor::_notification(int p_what) {
 			get_tree()->connect("tree_process_mode_changed", callable_mp(this, &SceneTreeEditor::_tree_process_mode_changed));
 			get_tree()->connect("node_removed", callable_mp(this, &SceneTreeEditor::_node_removed));
 			get_tree()->connect("node_renamed", callable_mp(this, &SceneTreeEditor::_node_renamed));
-			get_tree()->connect("node_configuration_warning_changed", callable_mp(this, &SceneTreeEditor::_warning_changed));
+			get_tree()->connect("node_configuration_warning_changed", callable_mp(this, &SceneTreeEditor::_warning_changed), varray(), CONNECT_DEFERRED);
 
 			tree->connect("item_collapsed", callable_mp(this, &SceneTreeEditor::_cell_collapsed));
 
@@ -1102,6 +1102,9 @@ void SceneTreeEditor::_rmb_select(const Vector2 &p_pos) {
 	emit_signal("rmb_pressed", tree->get_screen_transform().xform(p_pos));
 }
 
+void SceneTreeEditor::update_warning() {
+	_warning_changed(nullptr);
+}
 void SceneTreeEditor::_warning_changed(Node *p_for_node) {
 	//should use a timer
 	update_timer->start();

+ 2 - 0
editor/scene_tree_editor.h

@@ -157,6 +157,8 @@ public:
 
 	Tree *get_scene_tree() { return tree; }
 
+	void update_warning();
+
 	SceneTreeEditor(bool p_label = true, bool p_can_rename = false, bool p_can_open_instance = false);
 	~SceneTreeEditor();
 };

+ 165 - 5
scene/3d/gpu_particles_3d.cpp

@@ -181,12 +181,33 @@ void GPUParticles3D::set_draw_order(DrawOrder p_order) {
 	RS::get_singleton()->particles_set_draw_order(particles, RS::ParticlesDrawOrder(p_order));
 }
 
+void GPUParticles3D::set_enable_trail(bool p_enabled) {
+	trail_enabled = p_enabled;
+	RS::get_singleton()->particles_set_trails(particles, trail_enabled, trail_length);
+	update_configuration_warnings();
+}
+void GPUParticles3D::set_trail_length(float p_seconds) {
+	ERR_FAIL_COND(p_seconds < 0.001);
+	trail_length = p_seconds;
+	RS::get_singleton()->particles_set_trails(particles, trail_enabled, trail_length);
+}
+
+bool GPUParticles3D::is_trail_enabled() const {
+	return trail_enabled;
+}
+float GPUParticles3D::get_trail_length() const {
+	return trail_length;
+}
+
 GPUParticles3D::DrawOrder GPUParticles3D::get_draw_order() const {
 	return draw_order;
 }
 
 void GPUParticles3D::set_draw_passes(int p_count) {
 	ERR_FAIL_COND(p_count < 1);
+	for (int i = p_count; i < draw_passes.size(); i++) {
+		set_draw_pass_mesh(i, Ref<Mesh>());
+	}
 	draw_passes.resize(p_count);
 	RS::get_singleton()->particles_set_draw_passes(particles, p_count);
 	notify_property_list_changed();
@@ -199,8 +220,16 @@ int GPUParticles3D::get_draw_passes() const {
 void GPUParticles3D::set_draw_pass_mesh(int p_pass, const Ref<Mesh> &p_mesh) {
 	ERR_FAIL_INDEX(p_pass, draw_passes.size());
 
+	if (Engine::get_singleton()->is_editor_hint() && draw_passes.write[p_pass].is_valid()) {
+		draw_passes.write[p_pass]->disconnect("changed", callable_mp((Node *)this, &Node::update_configuration_warnings));
+	}
+
 	draw_passes.write[p_pass] = p_mesh;
 
+	if (Engine::get_singleton()->is_editor_hint() && draw_passes.write[p_pass].is_valid()) {
+		draw_passes.write[p_pass]->connect("changed", callable_mp((Node *)this, &Node::update_configuration_warnings), varray(), CONNECT_DEFERRED);
+	}
+
 	RID mesh_rid;
 	if (p_mesh.is_valid()) {
 		mesh_rid = p_mesh->get_rid();
@@ -208,6 +237,7 @@ void GPUParticles3D::set_draw_pass_mesh(int p_pass, const Ref<Mesh> &p_mesh) {
 
 	RS::get_singleton()->particles_set_draw_pass_mesh(particles, p_pass, mesh_rid);
 
+	_skinning_changed();
 	update_configuration_warnings();
 }
 
@@ -235,6 +265,15 @@ bool GPUParticles3D::get_fractional_delta() const {
 	return fractional_delta;
 }
 
+void GPUParticles3D::set_interpolate(bool p_enable) {
+	interpolate = p_enable;
+	RS::get_singleton()->particles_set_interpolate(particles, p_enable);
+}
+
+bool GPUParticles3D::get_interpolate() const {
+	return interpolate;
+}
+
 TypedArray<String> GPUParticles3D::get_configuration_warnings() const {
 	TypedArray<String> warnings = Node::get_configuration_warnings();
 
@@ -250,7 +289,7 @@ TypedArray<String> GPUParticles3D::get_configuration_warnings() const {
 			meshes_found = true;
 			for (int j = 0; j < draw_passes[i]->get_surface_count(); j++) {
 				anim_material_found = Object::cast_to<ShaderMaterial>(draw_passes[i]->surface_get_material(j).ptr()) != nullptr;
-				StandardMaterial3D *spat = Object::cast_to<StandardMaterial3D>(draw_passes[i]->surface_get_material(j).ptr());
+				BaseMaterial3D *spat = Object::cast_to<BaseMaterial3D>(draw_passes[i]->surface_get_material(j).ptr());
 				anim_material_found = anim_material_found || (spat && spat->get_billboard_mode() == StandardMaterial3D::BILLBOARD_PARTICLES);
 			}
 			if (anim_material_found) {
@@ -260,8 +299,10 @@ TypedArray<String> GPUParticles3D::get_configuration_warnings() const {
 	}
 
 	anim_material_found = anim_material_found || Object::cast_to<ShaderMaterial>(get_material_override().ptr()) != nullptr;
-	StandardMaterial3D *spat = Object::cast_to<StandardMaterial3D>(get_material_override().ptr());
-	anim_material_found = anim_material_found || (spat && spat->get_billboard_mode() == StandardMaterial3D::BILLBOARD_PARTICLES);
+	{
+		BaseMaterial3D *spat = Object::cast_to<BaseMaterial3D>(get_material_override().ptr());
+		anim_material_found = anim_material_found || (spat && spat->get_billboard_mode() == BaseMaterial3D::BILLBOARD_PARTICLES);
+	}
 
 	if (!meshes_found) {
 		warnings.push_back(TTR("Nothing is visible because meshes have not been assigned to draw passes."));
@@ -274,7 +315,57 @@ TypedArray<String> GPUParticles3D::get_configuration_warnings() const {
 		if (!anim_material_found && process &&
 				(process->get_param(ParticlesMaterial::PARAM_ANIM_SPEED) != 0.0 || process->get_param(ParticlesMaterial::PARAM_ANIM_OFFSET) != 0.0 ||
 						process->get_param_texture(ParticlesMaterial::PARAM_ANIM_SPEED).is_valid() || process->get_param_texture(ParticlesMaterial::PARAM_ANIM_OFFSET).is_valid())) {
-			warnings.push_back(TTR("Particles animation requires the usage of a StandardMaterial3D whose Billboard Mode is set to \"Particle Billboard\"."));
+			warnings.push_back(TTR("Particles animation requires the usage of a BaseMaterial3D whose Billboard Mode is set to \"Particle Billboard\"."));
+		}
+	}
+
+	if (trail_enabled) {
+		int dp_count = 0;
+		bool missing_trails = false;
+		bool no_materials = false;
+
+		for (int i = 0; i < draw_passes.size(); i++) {
+			Ref<Mesh> draw_pass = draw_passes[i];
+			if (draw_pass.is_valid() && draw_pass->get_builtin_bind_pose_count() > 0) {
+				dp_count++;
+			}
+
+			if (draw_pass.is_valid()) {
+				int mats_found = 0;
+				for (int j = 0; j < draw_passes[i]->get_surface_count(); j++) {
+					BaseMaterial3D *spat = Object::cast_to<BaseMaterial3D>(draw_passes[i]->surface_get_material(j).ptr());
+					if (spat) {
+						mats_found++;
+					}
+					if (spat && !spat->get_flag(BaseMaterial3D::FLAG_PARTICLE_TRAILS_MODE)) {
+						missing_trails = true;
+					}
+				}
+
+				if (mats_found != draw_passes[i]->get_surface_count()) {
+					no_materials = true;
+				}
+			}
+		}
+
+		BaseMaterial3D *spat = Object::cast_to<BaseMaterial3D>(get_material_override().ptr());
+		if (spat) {
+			no_materials = false;
+		}
+		if (spat && !spat->get_flag(BaseMaterial3D::FLAG_PARTICLE_TRAILS_MODE)) {
+			missing_trails = true;
+		}
+
+		if (dp_count && skin.is_valid()) {
+			warnings.push_back(TTR("Using Trail meshes with a skin causes Skin to override Trail poses. Suggest removing the Skin."));
+		} else if (dp_count == 0 && skin.is_null()) {
+			warnings.push_back(TTR("Trails active, but neither Trail meshes or a Skin were found."));
+		} else if (dp_count > 1) {
+			warnings.push_back(TTR("Only one Trail mesh is supported. If you want to use more than a single mesh, a Skin is needed (see documentation)."));
+		}
+
+		if ((dp_count || !skin.is_null()) && (missing_trails || no_materials)) {
+			warnings.push_back(TTR("Trails enabled, but one or more mesh materials are either missing or not set for trails rendering."));
 		}
 	}
 
@@ -366,6 +457,47 @@ void GPUParticles3D::_notification(int p_what) {
 	}
 }
 
+void GPUParticles3D::_skinning_changed() {
+	Vector<Transform> xforms;
+	if (skin.is_valid()) {
+		xforms.resize(skin->get_bind_count());
+		for (int i = 0; i < skin->get_bind_count(); i++) {
+			xforms.write[i] = skin->get_bind_pose(i);
+		}
+	} else {
+		for (int i = 0; i < draw_passes.size(); i++) {
+			Ref<Mesh> draw_pass = draw_passes[i];
+			if (draw_pass.is_valid() && draw_pass->get_builtin_bind_pose_count() > 0) {
+				xforms.resize(draw_pass->get_builtin_bind_pose_count());
+				for (int j = 0; j < draw_pass->get_builtin_bind_pose_count(); j++) {
+					xforms.write[i] = draw_pass->get_builtin_bind_pose(j);
+				}
+				break;
+			}
+		}
+	}
+
+	RS::get_singleton()->particles_set_trail_bind_poses(particles, xforms);
+	update_configuration_warnings();
+}
+
+void GPUParticles3D::set_skin(const Ref<Skin> &p_skin) {
+	skin = p_skin;
+	_skinning_changed();
+}
+Ref<Skin> GPUParticles3D::get_skin() const {
+	return skin;
+}
+
+void GPUParticles3D::set_transform_align(TransformAlign p_align) {
+	ERR_FAIL_INDEX(uint32_t(p_align), 4);
+	transform_align = p_align;
+	RS::get_singleton()->particles_set_transform_align(particles, RS::ParticlesTransformAlign(transform_align));
+}
+GPUParticles3D::TransformAlign GPUParticles3D::get_transform_align() const {
+	return transform_align;
+}
+
 void GPUParticles3D::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("set_emitting", "emitting"), &GPUParticles3D::set_emitting);
 	ClassDB::bind_method(D_METHOD("set_amount", "amount"), &GPUParticles3D::set_amount);
@@ -378,6 +510,7 @@ void GPUParticles3D::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("set_use_local_coordinates", "enable"), &GPUParticles3D::set_use_local_coordinates);
 	ClassDB::bind_method(D_METHOD("set_fixed_fps", "fps"), &GPUParticles3D::set_fixed_fps);
 	ClassDB::bind_method(D_METHOD("set_fractional_delta", "enable"), &GPUParticles3D::set_fractional_delta);
+	ClassDB::bind_method(D_METHOD("set_interpolate", "enable"), &GPUParticles3D::set_interpolate);
 	ClassDB::bind_method(D_METHOD("set_process_material", "material"), &GPUParticles3D::set_process_material);
 	ClassDB::bind_method(D_METHOD("set_speed_scale", "scale"), &GPUParticles3D::set_speed_scale);
 	ClassDB::bind_method(D_METHOD("set_collision_base_size", "size"), &GPUParticles3D::set_collision_base_size);
@@ -393,6 +526,7 @@ void GPUParticles3D::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("get_use_local_coordinates"), &GPUParticles3D::get_use_local_coordinates);
 	ClassDB::bind_method(D_METHOD("get_fixed_fps"), &GPUParticles3D::get_fixed_fps);
 	ClassDB::bind_method(D_METHOD("get_fractional_delta"), &GPUParticles3D::get_fractional_delta);
+	ClassDB::bind_method(D_METHOD("get_interpolate"), &GPUParticles3D::get_interpolate);
 	ClassDB::bind_method(D_METHOD("get_process_material"), &GPUParticles3D::get_process_material);
 	ClassDB::bind_method(D_METHOD("get_speed_scale"), &GPUParticles3D::get_speed_scale);
 	ClassDB::bind_method(D_METHOD("get_collision_base_size"), &GPUParticles3D::get_collision_base_size);
@@ -407,6 +541,9 @@ void GPUParticles3D::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("get_draw_passes"), &GPUParticles3D::get_draw_passes);
 	ClassDB::bind_method(D_METHOD("get_draw_pass_mesh", "pass"), &GPUParticles3D::get_draw_pass_mesh);
 
+	ClassDB::bind_method(D_METHOD("set_skin", "skin"), &GPUParticles3D::set_skin);
+	ClassDB::bind_method(D_METHOD("get_skin"), &GPUParticles3D::get_skin);
+
 	ClassDB::bind_method(D_METHOD("restart"), &GPUParticles3D::restart);
 	ClassDB::bind_method(D_METHOD("capture_aabb"), &GPUParticles3D::capture_aabb);
 
@@ -415,6 +552,15 @@ void GPUParticles3D::_bind_methods() {
 
 	ClassDB::bind_method(D_METHOD("emit_particle", "xform", "velocity", "color", "custom", "flags"), &GPUParticles3D::emit_particle);
 
+	ClassDB::bind_method(D_METHOD("set_enable_trail", "enabled"), &GPUParticles3D::set_enable_trail);
+	ClassDB::bind_method(D_METHOD("set_trail_length", "secs"), &GPUParticles3D::set_trail_length);
+
+	ClassDB::bind_method(D_METHOD("is_trail_enabled"), &GPUParticles3D::is_trail_enabled);
+	ClassDB::bind_method(D_METHOD("get_trail_length"), &GPUParticles3D::get_trail_length);
+
+	ClassDB::bind_method(D_METHOD("set_transform_align", "align"), &GPUParticles3D::set_transform_align);
+	ClassDB::bind_method(D_METHOD("get_transform_align"), &GPUParticles3D::get_transform_align);
+
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "emitting"), "set_emitting", "is_emitting");
 	ADD_PROPERTY(PropertyInfo(Variant::INT, "amount", PROPERTY_HINT_EXP_RANGE, "1,1000000,1"), "set_amount", "get_amount");
 	ADD_PROPERTY(PropertyInfo(Variant::NODE_PATH, "sub_emitter", PROPERTY_HINT_NODE_PATH_VALID_TYPES, "GPUParticles3D"), "set_sub_emitter", "get_sub_emitter");
@@ -426,6 +572,7 @@ void GPUParticles3D::_bind_methods() {
 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "explosiveness", PROPERTY_HINT_RANGE, "0,1,0.01"), "set_explosiveness_ratio", "get_explosiveness_ratio");
 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "randomness", PROPERTY_HINT_RANGE, "0,1,0.01"), "set_randomness_ratio", "get_randomness_ratio");
 	ADD_PROPERTY(PropertyInfo(Variant::INT, "fixed_fps", PROPERTY_HINT_RANGE, "0,1000,1"), "set_fixed_fps", "get_fixed_fps");
+	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "interpolate"), "set_interpolate", "get_interpolate");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "fract_delta"), "set_fractional_delta", "get_fractional_delta");
 	ADD_GROUP("Collision", "collision_");
 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "collision_base_size", PROPERTY_HINT_RANGE, "0,128,0.01,or_greater"), "set_collision_base_size", "get_collision_base_size");
@@ -433,6 +580,10 @@ void GPUParticles3D::_bind_methods() {
 	ADD_PROPERTY(PropertyInfo(Variant::AABB, "visibility_aabb"), "set_visibility_aabb", "get_visibility_aabb");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "local_coords"), "set_use_local_coordinates", "get_use_local_coordinates");
 	ADD_PROPERTY(PropertyInfo(Variant::INT, "draw_order", PROPERTY_HINT_ENUM, "Index,Lifetime,View Depth"), "set_draw_order", "get_draw_order");
+	ADD_PROPERTY(PropertyInfo(Variant::INT, "transform_align", PROPERTY_HINT_ENUM, "Disabled,ZBillboard,YToVelocity,ZBillboardYToVelocity"), "set_transform_align", "get_transform_align");
+	ADD_GROUP("Trails", "trail_");
+	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "trail_enabled"), "set_enable_trail", "is_trail_enabled");
+	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "trail_length_secs", PROPERTY_HINT_RANGE, "0.01,4,0.01"), "set_trail_length", "get_trail_length");
 	ADD_GROUP("Process Material", "");
 	ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "process_material", PROPERTY_HINT_RESOURCE_TYPE, "ShaderMaterial,ParticlesMaterial"), "set_process_material", "get_process_material");
 	ADD_GROUP("Draw Passes", "draw_");
@@ -440,6 +591,7 @@ void GPUParticles3D::_bind_methods() {
 	for (int i = 0; i < MAX_DRAW_PASSES; i++) {
 		ADD_PROPERTYI(PropertyInfo(Variant::OBJECT, "draw_pass_" + itos(i + 1), PROPERTY_HINT_RESOURCE_TYPE, "Mesh"), "set_draw_pass_mesh", "get_draw_pass_mesh", i);
 	}
+	ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "draw_skin", PROPERTY_HINT_RESOURCE_TYPE, "Skin"), "set_skin", "get_skin");
 
 	BIND_ENUM_CONSTANT(DRAW_ORDER_INDEX);
 	BIND_ENUM_CONSTANT(DRAW_ORDER_LIFETIME);
@@ -452,6 +604,11 @@ void GPUParticles3D::_bind_methods() {
 	BIND_ENUM_CONSTANT(EMIT_FLAG_CUSTOM);
 
 	BIND_CONSTANT(MAX_DRAW_PASSES);
+
+	BIND_CONSTANT(TRANSFORM_ALIGN_DISABLED);
+	BIND_CONSTANT(TRANSFORM_ALIGN_Z_BILLBOARD);
+	BIND_CONSTANT(TRANSFORM_ALIGN_Y_TO_VELOCITY);
+	BIND_CONSTANT(TRANSFORM_ALIGN_Z_BILLBOARD_Y_TO_VELOCITY);
 }
 
 GPUParticles3D::GPUParticles3D() {
@@ -462,17 +619,20 @@ GPUParticles3D::GPUParticles3D() {
 	set_one_shot(false);
 	set_amount(8);
 	set_lifetime(1);
-	set_fixed_fps(0);
+	set_fixed_fps(30);
 	set_fractional_delta(true);
+	set_interpolate(true);
 	set_pre_process_time(0);
 	set_explosiveness_ratio(0);
 	set_randomness_ratio(0);
+	set_trail_length(0.3);
 	set_visibility_aabb(AABB(Vector3(-4, -4, -4), Vector3(8, 8, 8)));
 	set_use_local_coordinates(true);
 	set_draw_passes(1);
 	set_draw_order(DRAW_ORDER_INDEX);
 	set_speed_scale(1);
 	set_collision_base_size(0.01);
+	set_transform_align(TRANSFORM_ALIGN_DISABLED);
 }
 
 GPUParticles3D::~GPUParticles3D() {

+ 31 - 0
scene/3d/gpu_particles_3d.h

@@ -34,6 +34,7 @@
 #include "core/templates/rid.h"
 #include "scene/3d/visual_instance_3d.h"
 #include "scene/resources/material.h"
+#include "scene/resources/skin.h"
 
 class GPUParticles3D : public GeometryInstance3D {
 private:
@@ -46,6 +47,13 @@ public:
 		DRAW_ORDER_VIEW_DEPTH,
 	};
 
+	enum TransformAlign {
+		TRANSFORM_ALIGN_DISABLED,
+		TRANSFORM_ALIGN_Z_BILLBOARD,
+		TRANSFORM_ALIGN_Y_TO_VELOCITY,
+		TRANSFORM_ALIGN_Z_BILLBOARD_Y_TO_VELOCITY
+	};
+
 	enum {
 		MAX_DRAW_PASSES = 4
 	};
@@ -64,17 +72,26 @@ private:
 	bool local_coords;
 	int fixed_fps;
 	bool fractional_delta;
+	bool interpolate = true;
 	NodePath sub_emitter;
 	float collision_base_size;
 
+	bool trail_enabled = false;
+	float trail_length = 0.3;
+
+	TransformAlign transform_align = TRANSFORM_ALIGN_DISABLED;
+
 	Ref<Material> process_material;
 
 	DrawOrder draw_order;
 
 	Vector<Ref<Mesh>> draw_passes;
+	Ref<Skin> skin;
 
 	void _attach_sub_emitter();
 
+	void _skinning_changed();
+
 protected:
 	static void _bind_methods();
 	void _notification(int p_what);
@@ -96,6 +113,8 @@ public:
 	void set_process_material(const Ref<Material> &p_material);
 	void set_speed_scale(float p_scale);
 	void set_collision_base_size(float p_ratio);
+	void set_enable_trail(bool p_enabled);
+	void set_trail_length(float p_seconds);
 
 	bool is_emitting() const;
 	int get_amount() const;
@@ -109,6 +128,8 @@ public:
 	Ref<Material> get_process_material() const;
 	float get_speed_scale() const;
 	float get_collision_base_size() const;
+	bool is_trail_enabled() const;
+	float get_trail_length() const;
 
 	void set_fixed_fps(int p_count);
 	int get_fixed_fps() const;
@@ -116,6 +137,9 @@ public:
 	void set_fractional_delta(bool p_enable);
 	bool get_fractional_delta() const;
 
+	void set_interpolate(bool p_enable);
+	bool get_interpolate() const;
+
 	void set_draw_order(DrawOrder p_order);
 	DrawOrder get_draw_order() const;
 
@@ -130,6 +154,12 @@ public:
 	void set_sub_emitter(const NodePath &p_path);
 	NodePath get_sub_emitter() const;
 
+	void set_skin(const Ref<Skin> &p_skin);
+	Ref<Skin> get_skin() const;
+
+	void set_transform_align(TransformAlign p_align);
+	TransformAlign get_transform_align() const;
+
 	void restart();
 
 	enum EmitFlags {
@@ -148,6 +178,7 @@ public:
 };
 
 VARIANT_ENUM_CAST(GPUParticles3D::DrawOrder)
+VARIANT_ENUM_CAST(GPUParticles3D::TransformAlign)
 VARIANT_ENUM_CAST(GPUParticles3D::EmitFlags)
 
 #endif // PARTICLES_H

+ 2 - 0
scene/register_scene_types.cpp

@@ -689,6 +689,8 @@ void register_scene_types() {
 	ClassDB::register_class<PrismMesh>();
 	ClassDB::register_class<QuadMesh>();
 	ClassDB::register_class<SphereMesh>();
+	ClassDB::register_class<TubeTrailMesh>();
+	ClassDB::register_class<RibbonTrailMesh>();
 	ClassDB::register_class<PointMesh>();
 	ClassDB::register_virtual_class<Material>();
 	ClassDB::register_virtual_class<BaseMaterial3D>();

+ 2 - 2
scene/resources/curve.cpp

@@ -445,10 +445,10 @@ void Curve::set_bake_resolution(int p_resolution) {
 	_baked_cache_dirty = true;
 }
 
-real_t Curve::interpolate_baked(real_t offset) {
+real_t Curve::interpolate_baked(real_t offset) const {
 	if (_baked_cache_dirty) {
 		// Last-second bake if not done already
-		bake();
+		const_cast<Curve *>(this)->bake();
 	}
 
 	// Special cases if the cache is too small

+ 1 - 1
scene/resources/curve.h

@@ -122,7 +122,7 @@ public:
 	void bake();
 	int get_bake_resolution() const { return _bake_resolution; }
 	void set_bake_resolution(int p_resolution);
-	real_t interpolate_baked(real_t offset);
+	real_t interpolate_baked(real_t offset) const;
 
 	void ensure_default_setup(float p_min, float p_max);
 

+ 10 - 0
scene/resources/material.cpp

@@ -543,6 +543,9 @@ void BaseMaterial3D::_update_shader() {
 	if (flags[FLAG_DISABLE_DEPTH_TEST]) {
 		code += ",depth_test_disabled";
 	}
+	if (flags[FLAG_PARTICLE_TRAILS_MODE]) {
+		code += ",particle_trails";
+	}
 	if (shading_mode == SHADING_MODE_PER_VERTEX) {
 		code += ",vertex_lighting";
 	}
@@ -1597,6 +1600,9 @@ void BaseMaterial3D::set_flag(Flags p_flag, bool p_enabled) {
 	if (p_flag == FLAG_USE_SHADOW_TO_OPACITY || p_flag == FLAG_USE_TEXTURE_REPEAT || p_flag == FLAG_SUBSURFACE_MODE_SKIN || p_flag == FLAG_USE_POINT_SIZE) {
 		notify_property_list_changed();
 	}
+	if (p_flag == FLAG_PARTICLE_TRAILS_MODE) {
+		update_configuration_warning();
+	}
 	_queue_shader_change();
 }
 
@@ -2177,6 +2183,8 @@ Shader::Mode BaseMaterial3D::get_shader_mode() const {
 }
 
 void BaseMaterial3D::_bind_methods() {
+	static_assert(sizeof(MaterialKey) == 16, "MaterialKey should be 16 bytes");
+
 	ClassDB::bind_method(D_METHOD("set_albedo", "albedo"), &BaseMaterial3D::set_albedo);
 	ClassDB::bind_method(D_METHOD("get_albedo"), &BaseMaterial3D::get_albedo);
 
@@ -2534,6 +2542,7 @@ void BaseMaterial3D::_bind_methods() {
 	ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "fixed_size"), "set_flag", "get_flag", FLAG_FIXED_SIZE);
 	ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "use_point_size"), "set_flag", "get_flag", FLAG_USE_POINT_SIZE);
 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "point_size", PROPERTY_HINT_RANGE, "0.1,128,0.1"), "set_point_size", "get_point_size");
+	ADD_PROPERTYI(PropertyInfo(Variant::BOOL, "use_particle_trails"), "set_flag", "get_flag", FLAG_PARTICLE_TRAILS_MODE);
 	ADD_GROUP("Proximity Fade", "proximity_fade_");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "proximity_fade_enable"), "set_proximity_fade", "is_proximity_fade_enabled");
 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "proximity_fade_distance", PROPERTY_HINT_RANGE, "0,4096,0.01"), "set_proximity_fade_distance", "get_proximity_fade_distance");
@@ -2635,6 +2644,7 @@ void BaseMaterial3D::_bind_methods() {
 	BIND_ENUM_CONSTANT(FLAG_USE_TEXTURE_REPEAT);
 	BIND_ENUM_CONSTANT(FLAG_INVERT_HEIGHTMAP);
 	BIND_ENUM_CONSTANT(FLAG_SUBSURFACE_MODE_SKIN);
+	BIND_ENUM_CONSTANT(FLAG_PARTICLE_TRAILS_MODE);
 	BIND_ENUM_CONSTANT(FLAG_MAX);
 
 	BIND_ENUM_CONSTANT(DIFFUSE_BURLEY);

+ 5 - 5
scene/resources/material.h

@@ -235,6 +235,7 @@ public:
 		FLAG_USE_TEXTURE_REPEAT,
 		FLAG_INVERT_HEIGHTMAP,
 		FLAG_SUBSURFACE_MODE_SKIN,
+		FLAG_PARTICLE_TRAILS_MODE,
 		FLAG_MAX
 	};
 
@@ -305,16 +306,15 @@ private:
 		uint64_t roughness_channel : get_num_bits(TEXTURE_CHANNEL_MAX - 1);
 		uint64_t emission_op : get_num_bits(EMISSION_OP_MAX - 1);
 		uint64_t distance_fade : get_num_bits(DISTANCE_FADE_MAX - 1);
-
-		// flag bitfield
-		uint64_t feature_mask : FEATURE_MAX - 1;
-		uint64_t flags : FLAG_MAX - 1;
-
 		// booleans
 		uint64_t deep_parallax : 1;
 		uint64_t grow : 1;
 		uint64_t proximity_fade : 1;
 
+		// flag bitfield
+		uint32_t feature_mask;
+		uint32_t flags;
+
 		MaterialKey() {
 			memset(this, 0, sizeof(MaterialKey));
 		}

+ 7 - 0
scene/resources/mesh.cpp

@@ -579,6 +579,13 @@ Vector<Ref<Shape3D>> Mesh::convex_decompose() const {
 	return ret;
 }
 
+int Mesh::get_builtin_bind_pose_count() const {
+	return 0;
+}
+Transform Mesh::get_builtin_bind_pose(int p_index) const {
+	return Transform();
+}
+
 Mesh::Mesh() {
 }
 

+ 3 - 0
scene/resources/mesh.h

@@ -165,6 +165,9 @@ public:
 
 	Vector<Ref<Shape3D>> convex_decompose() const;
 
+	virtual int get_builtin_bind_pose_count() const;
+	virtual Transform get_builtin_bind_pose(int p_index) const;
+
 	Mesh();
 };
 

+ 551 - 0
scene/resources/primitive_meshes.cpp

@@ -1538,3 +1538,554 @@ void PointMesh::_create_mesh_array(Array &p_arr) const {
 PointMesh::PointMesh() {
 	primitive_type = PRIMITIVE_POINTS;
 }
+// TUBE TRAIL
+
+void TubeTrailMesh::set_radius(const float p_radius) {
+	radius = p_radius;
+	_request_update();
+}
+float TubeTrailMesh::get_radius() const {
+	return radius;
+}
+
+void TubeTrailMesh::set_radial_steps(const int p_radial_steps) {
+	ERR_FAIL_COND(p_radial_steps < 3 || p_radial_steps > 128);
+	radial_steps = p_radial_steps;
+	_request_update();
+}
+int TubeTrailMesh::get_radial_steps() const {
+	return radial_steps;
+}
+
+void TubeTrailMesh::set_sections(const int p_sections) {
+	ERR_FAIL_COND(p_sections < 2 || p_sections > 128);
+	sections = p_sections;
+	_request_update();
+}
+int TubeTrailMesh::get_sections() const {
+	return sections;
+}
+
+void TubeTrailMesh::set_section_length(float p_section_length) {
+	section_length = p_section_length;
+	_request_update();
+}
+float TubeTrailMesh::get_section_length() const {
+	return section_length;
+}
+
+void TubeTrailMesh::set_section_rings(const int p_section_rings) {
+	ERR_FAIL_COND(p_section_rings < 1 || p_section_rings > 1024);
+	section_rings = p_section_rings;
+	_request_update();
+}
+int TubeTrailMesh::get_section_rings() const {
+	return section_rings;
+}
+
+void TubeTrailMesh::set_curve(const Ref<Curve> &p_curve) {
+	if (curve == p_curve) {
+		return;
+	}
+	if (curve.is_valid()) {
+		curve->disconnect("changed", callable_mp(this, &TubeTrailMesh::_curve_changed));
+	}
+	curve = p_curve;
+	if (curve.is_valid()) {
+		curve->connect("changed", callable_mp(this, &TubeTrailMesh::_curve_changed));
+	}
+	_request_update();
+}
+Ref<Curve> TubeTrailMesh::get_curve() const {
+	return curve;
+}
+
+void TubeTrailMesh::_curve_changed() {
+	_request_update();
+}
+int TubeTrailMesh::get_builtin_bind_pose_count() const {
+	return sections + 1;
+}
+
+Transform TubeTrailMesh::get_builtin_bind_pose(int p_index) const {
+	float depth = section_length * sections;
+
+	Transform xform;
+	xform.origin.y = depth / 2.0 - section_length * float(p_index);
+	xform.origin.y = -xform.origin.y; //bind is an inverse transform, so negate y
+
+	return xform;
+}
+
+void TubeTrailMesh::_create_mesh_array(Array &p_arr) const {
+	PackedVector3Array points;
+	PackedVector3Array normals;
+	PackedFloat32Array tangents;
+	PackedVector2Array uvs;
+	PackedInt32Array bone_indices;
+	PackedFloat32Array bone_weights;
+	PackedInt32Array indices;
+
+	int point = 0;
+
+#define ADD_TANGENT(m_x, m_y, m_z, m_d) \
+	tangents.push_back(m_x);            \
+	tangents.push_back(m_y);            \
+	tangents.push_back(m_z);            \
+	tangents.push_back(m_d);
+
+	int thisrow = 0;
+	int prevrow = 0;
+
+	int total_rings = section_rings * sections;
+	float depth = section_length * sections;
+
+	for (int j = 0; j <= total_rings; j++) {
+		float v = j;
+		v /= total_rings;
+
+		float y = depth * v;
+		y = (depth * 0.5) - y;
+
+		int bone = j / section_rings;
+		float blend = 1.0 - float(j % section_rings) / float(section_rings);
+
+		for (int i = 0; i <= radial_steps; i++) {
+			float u = i;
+			u /= radial_steps;
+
+			float r = radius;
+			if (curve.is_valid() && curve->get_point_count() > 0) {
+				r *= curve->interpolate_baked(v);
+			}
+			float x = sin(u * Math_TAU);
+			float z = cos(u * Math_TAU);
+
+			Vector3 p = Vector3(x * r, y, z * r);
+			points.push_back(p);
+			normals.push_back(Vector3(x, 0, z));
+			ADD_TANGENT(z, 0.0, -x, 1.0)
+			uvs.push_back(Vector2(u, v * 0.5));
+			point++;
+			{
+				bone_indices.push_back(bone);
+				bone_indices.push_back(MIN(sections, bone + 1));
+				bone_indices.push_back(0);
+				bone_indices.push_back(0);
+
+				bone_weights.push_back(blend);
+				bone_weights.push_back(1.0 - blend);
+				bone_weights.push_back(0);
+				bone_weights.push_back(0);
+			}
+
+			if (i > 0 && j > 0) {
+				indices.push_back(prevrow + i - 1);
+				indices.push_back(prevrow + i);
+				indices.push_back(thisrow + i - 1);
+
+				indices.push_back(prevrow + i);
+				indices.push_back(thisrow + i);
+				indices.push_back(thisrow + i - 1);
+			}
+		}
+
+		prevrow = thisrow;
+		thisrow = point;
+	}
+
+	// add top
+	float scale_pos = 1.0;
+	if (curve.is_valid() && curve->get_point_count() > 0) {
+		scale_pos = curve->interpolate_baked(0);
+	}
+
+	if (scale_pos > CMP_EPSILON) {
+		float y = depth * 0.5;
+
+		thisrow = point;
+		points.push_back(Vector3(0.0, y, 0));
+		normals.push_back(Vector3(0.0, 1.0, 0.0));
+		ADD_TANGENT(1.0, 0.0, 0.0, 1.0)
+		uvs.push_back(Vector2(0.25, 0.75));
+		point++;
+
+		bone_indices.push_back(0);
+		bone_indices.push_back(0);
+		bone_indices.push_back(0);
+		bone_indices.push_back(0);
+
+		bone_weights.push_back(1.0);
+		bone_weights.push_back(0);
+		bone_weights.push_back(0);
+		bone_weights.push_back(0);
+
+		float rm = radius * scale_pos;
+
+		for (int i = 0; i <= radial_steps; i++) {
+			float r = i;
+			r /= radial_steps;
+
+			float x = sin(r * Math_TAU);
+			float z = cos(r * Math_TAU);
+
+			float u = ((x + 1.0) * 0.25);
+			float v = 0.5 + ((z + 1.0) * 0.25);
+
+			Vector3 p = Vector3(x * rm, y, z * rm);
+			points.push_back(p);
+			normals.push_back(Vector3(0.0, 1.0, 0.0));
+			ADD_TANGENT(1.0, 0.0, 0.0, 1.0)
+			uvs.push_back(Vector2(u, v));
+			point++;
+
+			bone_indices.push_back(0);
+			bone_indices.push_back(0);
+			bone_indices.push_back(0);
+			bone_indices.push_back(0);
+
+			bone_weights.push_back(1.0);
+			bone_weights.push_back(0);
+			bone_weights.push_back(0);
+			bone_weights.push_back(0);
+
+			if (i > 0) {
+				indices.push_back(thisrow);
+				indices.push_back(point - 1);
+				indices.push_back(point - 2);
+			};
+		};
+	};
+
+	float scale_neg = 1.0;
+	if (curve.is_valid() && curve->get_point_count() > 0) {
+		scale_neg = curve->interpolate_baked(1.0);
+	}
+
+	// add bottom
+	if (scale_neg > CMP_EPSILON) {
+		float y = depth * -0.5;
+
+		thisrow = point;
+		points.push_back(Vector3(0.0, y, 0.0));
+		normals.push_back(Vector3(0.0, -1.0, 0.0));
+		ADD_TANGENT(1.0, 0.0, 0.0, 1.0)
+		uvs.push_back(Vector2(0.75, 0.75));
+		point++;
+
+		bone_indices.push_back(sections);
+		bone_indices.push_back(0);
+		bone_indices.push_back(0);
+		bone_indices.push_back(0);
+
+		bone_weights.push_back(1.0);
+		bone_weights.push_back(0);
+		bone_weights.push_back(0);
+		bone_weights.push_back(0);
+
+		float rm = radius * scale_neg;
+
+		for (int i = 0; i <= radial_steps; i++) {
+			float r = i;
+			r /= radial_steps;
+
+			float x = sin(r * Math_TAU);
+			float z = cos(r * Math_TAU);
+
+			float u = 0.5 + ((x + 1.0) * 0.25);
+			float v = 1.0 - ((z + 1.0) * 0.25);
+
+			Vector3 p = Vector3(x * rm, y, z * rm);
+			points.push_back(p);
+			normals.push_back(Vector3(0.0, -1.0, 0.0));
+			ADD_TANGENT(1.0, 0.0, 0.0, 1.0)
+			uvs.push_back(Vector2(u, v));
+			point++;
+
+			bone_indices.push_back(sections);
+			bone_indices.push_back(0);
+			bone_indices.push_back(0);
+			bone_indices.push_back(0);
+
+			bone_weights.push_back(1.0);
+			bone_weights.push_back(0);
+			bone_weights.push_back(0);
+			bone_weights.push_back(0);
+
+			if (i > 0) {
+				indices.push_back(thisrow);
+				indices.push_back(point - 2);
+				indices.push_back(point - 1);
+			};
+		};
+	};
+
+	p_arr[RS::ARRAY_VERTEX] = points;
+	p_arr[RS::ARRAY_NORMAL] = normals;
+	p_arr[RS::ARRAY_TANGENT] = tangents;
+	p_arr[RS::ARRAY_TEX_UV] = uvs;
+	p_arr[RS::ARRAY_BONES] = bone_indices;
+	p_arr[RS::ARRAY_WEIGHTS] = bone_weights;
+	p_arr[RS::ARRAY_INDEX] = indices;
+}
+
+void TubeTrailMesh::_bind_methods() {
+	ClassDB::bind_method(D_METHOD("set_radius", "radius"), &TubeTrailMesh::set_radius);
+	ClassDB::bind_method(D_METHOD("get_radius"), &TubeTrailMesh::get_radius);
+
+	ClassDB::bind_method(D_METHOD("set_radial_steps", "radial_steps"), &TubeTrailMesh::set_radial_steps);
+	ClassDB::bind_method(D_METHOD("get_radial_steps"), &TubeTrailMesh::get_radial_steps);
+
+	ClassDB::bind_method(D_METHOD("set_sections", "sections"), &TubeTrailMesh::set_sections);
+	ClassDB::bind_method(D_METHOD("get_sections"), &TubeTrailMesh::get_sections);
+
+	ClassDB::bind_method(D_METHOD("set_section_length", "section_length"), &TubeTrailMesh::set_section_length);
+	ClassDB::bind_method(D_METHOD("get_section_length"), &TubeTrailMesh::get_section_length);
+
+	ClassDB::bind_method(D_METHOD("set_section_rings", "section_rings"), &TubeTrailMesh::set_section_rings);
+	ClassDB::bind_method(D_METHOD("get_section_rings"), &TubeTrailMesh::get_section_rings);
+
+	ClassDB::bind_method(D_METHOD("set_curve", "curve"), &TubeTrailMesh::set_curve);
+	ClassDB::bind_method(D_METHOD("get_curve"), &TubeTrailMesh::get_curve);
+
+	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "radius", PROPERTY_HINT_RANGE, "0.001,100.0,0.001,or_greater"), "set_radius", "get_radius");
+
+	ADD_PROPERTY(PropertyInfo(Variant::INT, "radial_steps", PROPERTY_HINT_RANGE, "3,128,1"), "set_radial_steps", "get_radial_steps");
+	ADD_PROPERTY(PropertyInfo(Variant::INT, "sections", PROPERTY_HINT_RANGE, "2,128,1"), "set_sections", "get_sections");
+
+	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "section_length", PROPERTY_HINT_RANGE, "0.001,1024.0,0.001,or_greater"), "set_section_length", "get_section_length");
+
+	ADD_PROPERTY(PropertyInfo(Variant::INT, "section_rings", PROPERTY_HINT_RANGE, "1,128,1"), "set_section_rings", "get_section_rings");
+
+	ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "curve", PROPERTY_HINT_RESOURCE_TYPE, "Curve"), "set_curve", "get_curve");
+}
+
+TubeTrailMesh::TubeTrailMesh() {
+}
+
+// TUBE TRAIL
+
+void RibbonTrailMesh::set_shape(Shape p_shape) {
+	shape = p_shape;
+	_request_update();
+}
+RibbonTrailMesh::Shape RibbonTrailMesh::get_shape() const {
+	return shape;
+}
+
+void RibbonTrailMesh::set_size(const float p_size) {
+	size = p_size;
+	_request_update();
+}
+float RibbonTrailMesh::get_size() const {
+	return size;
+}
+
+void RibbonTrailMesh::set_sections(const int p_sections) {
+	ERR_FAIL_COND(p_sections < 2 || p_sections > 128);
+	sections = p_sections;
+	_request_update();
+}
+int RibbonTrailMesh::get_sections() const {
+	return sections;
+}
+
+void RibbonTrailMesh::set_section_length(float p_section_length) {
+	section_length = p_section_length;
+	_request_update();
+}
+float RibbonTrailMesh::get_section_length() const {
+	return section_length;
+}
+
+void RibbonTrailMesh::set_section_segments(const int p_section_segments) {
+	ERR_FAIL_COND(p_section_segments < 1 || p_section_segments > 1024);
+	section_segments = p_section_segments;
+	_request_update();
+}
+int RibbonTrailMesh::get_section_segments() const {
+	return section_segments;
+}
+
+void RibbonTrailMesh::set_curve(const Ref<Curve> &p_curve) {
+	if (curve == p_curve) {
+		return;
+	}
+	if (curve.is_valid()) {
+		curve->disconnect("changed", callable_mp(this, &RibbonTrailMesh::_curve_changed));
+	}
+	curve = p_curve;
+	if (curve.is_valid()) {
+		curve->connect("changed", callable_mp(this, &RibbonTrailMesh::_curve_changed));
+	}
+	_request_update();
+}
+Ref<Curve> RibbonTrailMesh::get_curve() const {
+	return curve;
+}
+
+void RibbonTrailMesh::_curve_changed() {
+	_request_update();
+}
+int RibbonTrailMesh::get_builtin_bind_pose_count() const {
+	return sections + 1;
+}
+
+Transform RibbonTrailMesh::get_builtin_bind_pose(int p_index) const {
+	float depth = section_length * sections;
+
+	Transform xform;
+	xform.origin.y = depth / 2.0 - section_length * float(p_index);
+	xform.origin.y = -xform.origin.y; //bind is an inverse transform, so negate y
+
+	return xform;
+}
+
+void RibbonTrailMesh::_create_mesh_array(Array &p_arr) const {
+	PackedVector3Array points;
+	PackedVector3Array normals;
+	PackedFloat32Array tangents;
+	PackedVector2Array uvs;
+	PackedInt32Array bone_indices;
+	PackedFloat32Array bone_weights;
+	PackedInt32Array indices;
+
+#define ADD_TANGENT(m_x, m_y, m_z, m_d) \
+	tangents.push_back(m_x);            \
+	tangents.push_back(m_y);            \
+	tangents.push_back(m_z);            \
+	tangents.push_back(m_d);
+
+	int total_segments = section_segments * sections;
+	float depth = section_length * sections;
+
+	for (int j = 0; j <= total_segments; j++) {
+		float v = j;
+		v /= total_segments;
+
+		float y = depth * v;
+		y = (depth * 0.5) - y;
+
+		int bone = j / section_segments;
+		float blend = 1.0 - float(j % section_segments) / float(section_segments);
+
+		float s = size;
+
+		if (curve.is_valid() && curve->get_point_count() > 0) {
+			s *= curve->interpolate_baked(v);
+		}
+
+		points.push_back(Vector3(-s * 0.5, y, 0));
+		points.push_back(Vector3(+s * 0.5, y, 0));
+		if (shape == SHAPE_CROSS) {
+			points.push_back(Vector3(0, y, -s * 0.5));
+			points.push_back(Vector3(0, y, +s * 0.5));
+		}
+
+		normals.push_back(Vector3(0, 0, 1));
+		normals.push_back(Vector3(0, 0, 1));
+		if (shape == SHAPE_CROSS) {
+			normals.push_back(Vector3(1, 0, 0));
+			normals.push_back(Vector3(1, 0, 0));
+		}
+
+		uvs.push_back(Vector2(0, v));
+		uvs.push_back(Vector2(1, v));
+		if (shape == SHAPE_CROSS) {
+			uvs.push_back(Vector2(0, v));
+			uvs.push_back(Vector2(1, v));
+		}
+
+		ADD_TANGENT(0.0, 1.0, 0.0, 1.0)
+		ADD_TANGENT(0.0, 1.0, 0.0, 1.0)
+		if (shape == SHAPE_CROSS) {
+			ADD_TANGENT(0.0, 1.0, 0.0, 1.0)
+			ADD_TANGENT(0.0, 1.0, 0.0, 1.0)
+		}
+
+		for (int i = 0; i < (shape == SHAPE_CROSS ? 4 : 2); i++) {
+			bone_indices.push_back(bone);
+			bone_indices.push_back(MIN(sections, bone + 1));
+			bone_indices.push_back(0);
+			bone_indices.push_back(0);
+
+			bone_weights.push_back(blend);
+			bone_weights.push_back(1.0 - blend);
+			bone_weights.push_back(0);
+			bone_weights.push_back(0);
+		}
+
+		if (j > 0) {
+			if (shape == SHAPE_CROSS) {
+				int base = j * 4 - 4;
+				indices.push_back(base + 0);
+				indices.push_back(base + 1);
+				indices.push_back(base + 4);
+
+				indices.push_back(base + 1);
+				indices.push_back(base + 5);
+				indices.push_back(base + 4);
+
+				indices.push_back(base + 2);
+				indices.push_back(base + 3);
+				indices.push_back(base + 6);
+
+				indices.push_back(base + 3);
+				indices.push_back(base + 7);
+				indices.push_back(base + 6);
+			} else {
+				int base = j * 2 - 2;
+				indices.push_back(base + 0);
+				indices.push_back(base + 1);
+				indices.push_back(base + 2);
+
+				indices.push_back(base + 1);
+				indices.push_back(base + 3);
+				indices.push_back(base + 2);
+			}
+		}
+	}
+
+	p_arr[RS::ARRAY_VERTEX] = points;
+	p_arr[RS::ARRAY_NORMAL] = normals;
+	p_arr[RS::ARRAY_TANGENT] = tangents;
+	p_arr[RS::ARRAY_TEX_UV] = uvs;
+	p_arr[RS::ARRAY_BONES] = bone_indices;
+	p_arr[RS::ARRAY_WEIGHTS] = bone_weights;
+	p_arr[RS::ARRAY_INDEX] = indices;
+}
+
+void RibbonTrailMesh::_bind_methods() {
+	ClassDB::bind_method(D_METHOD("set_size", "size"), &RibbonTrailMesh::set_size);
+	ClassDB::bind_method(D_METHOD("get_size"), &RibbonTrailMesh::get_size);
+
+	ClassDB::bind_method(D_METHOD("set_sections", "sections"), &RibbonTrailMesh::set_sections);
+	ClassDB::bind_method(D_METHOD("get_sections"), &RibbonTrailMesh::get_sections);
+
+	ClassDB::bind_method(D_METHOD("set_section_length", "section_length"), &RibbonTrailMesh::set_section_length);
+	ClassDB::bind_method(D_METHOD("get_section_length"), &RibbonTrailMesh::get_section_length);
+
+	ClassDB::bind_method(D_METHOD("set_section_segments", "section_segments"), &RibbonTrailMesh::set_section_segments);
+	ClassDB::bind_method(D_METHOD("get_section_segments"), &RibbonTrailMesh::get_section_segments);
+
+	ClassDB::bind_method(D_METHOD("set_curve", "curve"), &RibbonTrailMesh::set_curve);
+	ClassDB::bind_method(D_METHOD("get_curve"), &RibbonTrailMesh::get_curve);
+
+	ClassDB::bind_method(D_METHOD("set_shape", "shape"), &RibbonTrailMesh::set_shape);
+	ClassDB::bind_method(D_METHOD("get_shape"), &RibbonTrailMesh::get_shape);
+
+	ADD_PROPERTY(PropertyInfo(Variant::INT, "shape", PROPERTY_HINT_ENUM, "Flat,Cross"), "set_shape", "get_shape");
+
+	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "size", PROPERTY_HINT_RANGE, "0.001,100.0,0.001,or_greater"), "set_size", "get_size");
+
+	ADD_PROPERTY(PropertyInfo(Variant::INT, "sections", PROPERTY_HINT_RANGE, "2,128,1"), "set_sections", "get_sections");
+
+	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "section_length", PROPERTY_HINT_RANGE, "0.001,1024.0,0.001,or_greater"), "set_section_length", "get_section_length");
+
+	ADD_PROPERTY(PropertyInfo(Variant::INT, "section_segments", PROPERTY_HINT_RANGE, "1,128,1"), "set_section_segments", "get_section_segments");
+
+	ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "curve", PROPERTY_HINT_RESOURCE_TYPE, "Curve"), "set_curve", "get_curve");
+}
+
+RibbonTrailMesh::RibbonTrailMesh() {
+}

+ 94 - 0
scene/resources/primitive_meshes.h

@@ -336,4 +336,98 @@ public:
 	PointMesh();
 };
 
+class TubeTrailMesh : public PrimitiveMesh {
+	GDCLASS(TubeTrailMesh, PrimitiveMesh);
+
+private:
+	float radius = 1.0;
+	int radial_steps = 8;
+	int sections = 5;
+	float section_length = 0.2;
+	int section_rings = 3;
+
+	Ref<Curve> curve;
+
+	void _curve_changed();
+
+protected:
+	static void _bind_methods();
+	virtual void _create_mesh_array(Array &p_arr) const override;
+
+public:
+	void set_radius(const float p_radius);
+	float get_radius() const;
+
+	void set_radial_steps(const int p_radial_steps);
+	int get_radial_steps() const;
+
+	void set_sections(const int p_sections);
+	int get_sections() const;
+
+	void set_section_length(float p_sectionlength);
+	float get_section_length() const;
+
+	void set_section_rings(const int p_section_rings);
+	int get_section_rings() const;
+
+	void set_curve(const Ref<Curve> &p_curve);
+	Ref<Curve> get_curve() const;
+
+	virtual int get_builtin_bind_pose_count() const override;
+	virtual Transform get_builtin_bind_pose(int p_index) const override;
+
+	TubeTrailMesh();
+};
+
+class RibbonTrailMesh : public PrimitiveMesh {
+	GDCLASS(RibbonTrailMesh, PrimitiveMesh);
+
+public:
+	enum Shape {
+		SHAPE_FLAT,
+		SHAPE_CROSS
+	};
+
+private:
+	float size = 1.0;
+	int sections = 5;
+	float section_length = 0.2;
+	int section_segments = 3;
+
+	Shape shape = SHAPE_CROSS;
+
+	Ref<Curve> curve;
+
+	void _curve_changed();
+
+protected:
+	static void _bind_methods();
+	virtual void _create_mesh_array(Array &p_arr) const override;
+
+public:
+	void set_shape(Shape p_shape);
+	Shape get_shape() const;
+
+	void set_size(const float p_size);
+	float get_size() const;
+
+	void set_sections(const int p_sections);
+	int get_sections() const;
+
+	void set_section_length(float p_sectionlength);
+	float get_section_length() const;
+
+	void set_section_segments(const int p_section_segments);
+	int get_section_segments() const;
+
+	void set_curve(const Ref<Curve> &p_curve);
+	Ref<Curve> get_curve() const;
+
+	virtual int get_builtin_bind_pose_count() const override;
+	virtual Transform get_builtin_bind_pose(int p_index) const override;
+
+	RibbonTrailMesh();
+};
+
+VARIANT_ENUM_CAST(RibbonTrailMesh::Shape)
 #endif

+ 13 - 21
servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp

@@ -466,6 +466,10 @@ void RenderForwardClustered::_render_list_template(RenderingDevice::DrawListID p
 		RD::get_singleton()->draw_list_set_push_constant(draw_list, &push_constant, sizeof(SceneState::PushConstant));
 
 		uint32_t instance_count = surf->owner->instance_count > 1 ? surf->owner->instance_count : element_info.repeat;
+		if (surf->flags & GeometryInstanceSurfaceDataCache::FLAG_USES_PARTICLE_TRAILS) {
+			instance_count /= surf->owner->trail_steps;
+		}
+
 		RD::get_singleton()->draw_list_draw(draw_list, index_array_rd.is_valid(), instance_count);
 		i += element_info.repeat - 1; //skip equal elements
 	}
@@ -2379,9 +2383,13 @@ void RenderForwardClustered::_geometry_instance_add_surface_with_material(Geomet
 		flags |= GeometryInstanceSurfaceDataCache::FLAG_PASS_SHADOW;
 	}
 
+	if (p_material->shader_data->uses_particle_trails) {
+		flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_PARTICLE_TRAILS;
+	}
+
 	SceneShaderForwardClustered::MaterialData *material_shadow = nullptr;
 	void *surface_shadow = nullptr;
-	if (!p_material->shader_data->writes_modelview_or_projection && !p_material->shader_data->uses_vertex && !p_material->shader_data->uses_discard && !p_material->shader_data->uses_depth_pre_pass) {
+	if (!p_material->shader_data->uses_particle_trails && !p_material->shader_data->writes_modelview_or_projection && !p_material->shader_data->uses_vertex && !p_material->shader_data->uses_discard && !p_material->shader_data->uses_depth_pre_pass) {
 		flags |= GeometryInstanceSurfaceDataCache::FLAG_USES_SHARED_SHADOW_MATERIAL;
 		material_shadow = (SceneShaderForwardClustered::MaterialData *)storage->material_get_data(scene_shader.default_material, RendererStorageRD::SHADER_TYPE_3D);
 
@@ -2550,7 +2558,7 @@ void RenderForwardClustered::_geometry_instance_update(GeometryInstance *p_geome
 				}
 			}
 
-			ginstance->instance_count = storage->particles_get_amount(ginstance->data->base);
+			ginstance->instance_count = storage->particles_get_amount(ginstance->data->base, ginstance->trail_steps);
 
 		} break;
 
@@ -2564,42 +2572,26 @@ void RenderForwardClustered::_geometry_instance_update(GeometryInstance *p_geome
 
 	if (ginstance->data->base_type == RS::INSTANCE_MULTIMESH) {
 		ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH;
-		uint32_t stride;
 		if (storage->multimesh_get_transform_format(ginstance->data->base) == RS::MULTIMESH_TRANSFORM_2D) {
 			ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_FORMAT_2D;
-			stride = 2;
-		} else {
-			stride = 3;
 		}
 		if (storage->multimesh_uses_colors(ginstance->data->base)) {
 			ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_COLOR;
-			stride += 1;
 		}
 		if (storage->multimesh_uses_custom_data(ginstance->data->base)) {
 			ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_CUSTOM_DATA;
-			stride += 1;
 		}
 
-		ginstance->base_flags |= (stride << INSTANCE_DATA_FLAGS_MULTIMESH_STRIDE_SHIFT);
 		ginstance->transforms_uniform_set = storage->multimesh_get_3d_uniform_set(ginstance->data->base, scene_shader.default_shader_rd, TRANSFORMS_UNIFORM_SET);
 
 	} else if (ginstance->data->base_type == RS::INSTANCE_PARTICLES) {
 		ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH;
-		uint32_t stride;
-		if (false) { // 2D particles
-			ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_FORMAT_2D;
-			stride = 2;
-		} else {
-			stride = 3;
-		}
 
 		ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_COLOR;
-		stride += 1;
-
 		ginstance->base_flags |= INSTANCE_DATA_FLAG_MULTIMESH_HAS_CUSTOM_DATA;
-		stride += 1;
 
-		ginstance->base_flags |= (stride << INSTANCE_DATA_FLAGS_MULTIMESH_STRIDE_SHIFT);
+		//for particles, stride is the trail size
+		ginstance->base_flags |= (ginstance->trail_steps << INSTANCE_DATA_FLAGS_PARTICLE_TRAIL_SHIFT);
 
 		if (!storage->particles_is_using_local_coords(ginstance->data->base)) {
 			store_transform = false;
@@ -2608,7 +2600,6 @@ void RenderForwardClustered::_geometry_instance_update(GeometryInstance *p_geome
 
 	} else if (ginstance->data->base_type == RS::INSTANCE_MESH) {
 		if (storage->skeleton_is_valid(ginstance->data->skeleton)) {
-			ginstance->base_flags |= INSTANCE_DATA_FLAG_SKELETON;
 			ginstance->transforms_uniform_set = storage->skeleton_get_3d_uniform_set(ginstance->data->skeleton, scene_shader.default_shader_rd, TRANSFORMS_UNIFORM_SET);
 			if (ginstance->data->dirty_dependencies) {
 				storage->skeleton_update_dependency(ginstance->data->skeleton, &ginstance->data->dependency_tracker);
@@ -2643,6 +2634,7 @@ void RenderForwardClustered::_geometry_instance_dependency_changed(RendererStora
 	switch (p_notification) {
 		case RendererStorage::DEPENDENCY_CHANGED_MATERIAL:
 		case RendererStorage::DEPENDENCY_CHANGED_MESH:
+		case RendererStorage::DEPENDENCY_CHANGED_PARTICLES:
 		case RendererStorage::DEPENDENCY_CHANGED_MULTIMESH:
 		case RendererStorage::DEPENDENCY_CHANGED_SKELETON_DATA: {
 			static_cast<RenderForwardClustered *>(singleton)->_geometry_instance_mark_dirty(static_cast<GeometryInstance *>(p_tracker->userdata));

+ 5 - 3
servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h

@@ -196,9 +196,9 @@ class RenderForwardClustered : public RendererSceneRenderRD {
 		INSTANCE_DATA_FLAG_MULTIMESH_FORMAT_2D = 1 << 13,
 		INSTANCE_DATA_FLAG_MULTIMESH_HAS_COLOR = 1 << 14,
 		INSTANCE_DATA_FLAG_MULTIMESH_HAS_CUSTOM_DATA = 1 << 15,
-		INSTANCE_DATA_FLAGS_MULTIMESH_STRIDE_SHIFT = 16,
-		INSTANCE_DATA_FLAGS_MULTIMESH_STRIDE_MASK = 0x7,
-		INSTANCE_DATA_FLAG_SKELETON = 1 << 19,
+		INSTANCE_DATA_FLAGS_PARTICLE_TRAIL_SHIFT = 16,
+		INSTANCE_DATA_FLAGS_PARTICLE_TRAIL_MASK = 0xFF,
+		INSTANCE_DATA_FLAGS_NON_UNIFORM_SCALE = 1 << 24,
 	};
 
 	struct SceneState {
@@ -398,6 +398,7 @@ class RenderForwardClustered : public RendererSceneRenderRD {
 			FLAG_USES_DEPTH_TEXTURE = 8192,
 			FLAG_USES_NORMAL_TEXTURE = 16384,
 			FLAG_USES_DOUBLE_SIDED_SHADOWS = 32768,
+			FLAG_USES_PARTICLE_TRAILS = 65536,
 		};
 
 		union {
@@ -453,6 +454,7 @@ class RenderForwardClustered : public RendererSceneRenderRD {
 		uint32_t layer_mask = 1;
 		RID transforms_uniform_set;
 		uint32_t instance_count = 0;
+		uint32_t trail_steps = 1;
 		RID mesh_instance;
 		bool can_sdfgi = false;
 		//used during setup

+ 3 - 0
servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp

@@ -73,6 +73,7 @@ void SceneShaderForwardClustered::ShaderData::set_code(const String &p_code) {
 	uses_time = false;
 	writes_modelview_or_projection = false;
 	uses_world_coordinates = false;
+	uses_particle_trails = false;
 
 	int depth_drawi = DEPTH_DRAW_OPAQUE;
 
@@ -101,6 +102,7 @@ void SceneShaderForwardClustered::ShaderData::set_code(const String &p_code) {
 
 	actions.render_mode_flags["unshaded"] = &unshaded;
 	actions.render_mode_flags["wireframe"] = &wireframe;
+	actions.render_mode_flags["particle_trails"] = &uses_particle_trails;
 
 	actions.usage_flag_pointers["ALPHA"] = &uses_alpha;
 	actions.render_mode_flags["depth_prepass_alpha"] = &uses_depth_pre_pass;
@@ -714,6 +716,7 @@ void SceneShaderForwardClustered::init(RendererStorageRD *p_storage, const Strin
 		actions.render_mode_defines["ensure_correct_normals"] = "#define ENSURE_CORRECT_NORMALS\n";
 		actions.render_mode_defines["cull_front"] = "#define DO_SIDE_CHECK\n";
 		actions.render_mode_defines["cull_disabled"] = "#define DO_SIDE_CHECK\n";
+		actions.render_mode_defines["particle_trails"] = "#define USE_PARTICLE_TRAILS\n";
 
 		bool force_lambert = GLOBAL_GET("rendering/shading/overrides/force_lambert_over_burley");
 

+ 1 - 0
servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h

@@ -126,6 +126,7 @@ public:
 		bool uses_discard;
 		bool uses_roughness;
 		bool uses_normal;
+		bool uses_particle_trails;
 
 		bool unshaded;
 		bool uses_vertex;

+ 331 - 71
servers/rendering/renderer_rd/renderer_storage_rd.cpp

@@ -3888,22 +3888,37 @@ bool RendererStorageRD::particles_get_emitting(RID p_particles) {
 }
 
 void RendererStorageRD::_particles_free_data(Particles *particles) {
-	if (!particles->particle_buffer.is_valid()) {
-		return;
+	if (particles->particle_buffer.is_valid()) {
+		RD::get_singleton()->free(particles->particle_buffer);
+		particles->particle_buffer = RID();
+		RD::get_singleton()->free(particles->particle_instance_buffer);
+		particles->particle_instance_buffer = RID();
+	}
+
+	if (particles->frame_params_buffer.is_valid()) {
+		RD::get_singleton()->free(particles->frame_params_buffer);
+		particles->frame_params_buffer = RID();
 	}
-	RD::get_singleton()->free(particles->particle_buffer);
-	RD::get_singleton()->free(particles->frame_params_buffer);
-	RD::get_singleton()->free(particles->particle_instance_buffer);
 	particles->particles_transforms_buffer_uniform_set = RID();
-	particles->particle_buffer = RID();
 
+	if (RD::get_singleton()->uniform_set_is_valid(particles->trail_bind_pose_uniform_set)) {
+		RD::get_singleton()->free(particles->trail_bind_pose_uniform_set);
+	}
+	particles->trail_bind_pose_uniform_set = RID();
+
+	if (particles->trail_bind_pose_buffer.is_valid()) {
+		RD::get_singleton()->free(particles->trail_bind_pose_buffer);
+		particles->trail_bind_pose_buffer = RID();
+	}
 	if (RD::get_singleton()->uniform_set_is_valid(particles->collision_textures_uniform_set)) {
 		RD::get_singleton()->free(particles->collision_textures_uniform_set);
 	}
+	particles->collision_textures_uniform_set = RID();
 
 	if (particles->particles_sort_buffer.is_valid()) {
 		RD::get_singleton()->free(particles->particles_sort_buffer);
 		particles->particles_sort_buffer = RID();
+		particles->particles_sort_uniform_set = RID();
 	}
 
 	if (particles->emission_buffer != nullptr) {
@@ -3912,6 +3927,12 @@ void RendererStorageRD::_particles_free_data(Particles *particles) {
 		RD::get_singleton()->free(particles->emission_storage_buffer);
 		particles->emission_storage_buffer = RID();
 	}
+
+	if (RD::get_singleton()->uniform_set_is_valid(particles->particles_material_uniform_set)) {
+		//will need to be re-created
+		RD::get_singleton()->free(particles->particles_material_uniform_set);
+	}
+	particles->particles_material_uniform_set = RID();
 }
 
 void RendererStorageRD::particles_set_amount(RID p_particles, int p_amount) {
@@ -3926,38 +3947,12 @@ void RendererStorageRD::particles_set_amount(RID p_particles, int p_amount) {
 
 	particles->amount = p_amount;
 
-	if (particles->amount > 0) {
-		particles->particle_buffer = RD::get_singleton()->storage_buffer_create(sizeof(ParticleData) * p_amount);
-		particles->frame_params_buffer = RD::get_singleton()->storage_buffer_create(sizeof(ParticlesFrameParams) * 1);
-		particles->particle_instance_buffer = RD::get_singleton()->storage_buffer_create(sizeof(float) * 4 * (3 + 1 + 1) * p_amount);
-		//needs to clear it
-
-		{
-			Vector<RD::Uniform> uniforms;
-
-			{
-				RD::Uniform u;
-				u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
-				u.binding = 1;
-				u.ids.push_back(particles->particle_buffer);
-				uniforms.push_back(u);
-			}
-			{
-				RD::Uniform u;
-				u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
-				u.binding = 2;
-				u.ids.push_back(particles->particle_instance_buffer);
-				uniforms.push_back(u);
-			}
-
-			particles->particles_copy_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, particles_shader.copy_shader.version_get_shader(particles_shader.copy_shader_version, 0), 0);
-		}
-	}
-
 	particles->prev_ticks = 0;
 	particles->phase = 0;
 	particles->prev_phase = 0;
 	particles->clear = true;
+
+	particles->dependency.changed_notify(DEPENDENCY_CHANGED_PARTICLES);
 }
 
 void RendererStorageRD::particles_set_lifetime(RID p_particles, float p_lifetime) {
@@ -4013,6 +4008,22 @@ void RendererStorageRD::particles_set_fixed_fps(RID p_particles, int p_fps) {
 	ERR_FAIL_COND(!particles);
 
 	particles->fixed_fps = p_fps;
+
+	_particles_free_data(particles);
+
+	particles->prev_ticks = 0;
+	particles->phase = 0;
+	particles->prev_phase = 0;
+	particles->clear = true;
+
+	particles->dependency.changed_notify(DEPENDENCY_CHANGED_PARTICLES);
+}
+
+void RendererStorageRD::particles_set_interpolate(RID p_particles, bool p_enable) {
+	Particles *particles = particles_owner.getornull(p_particles);
+	ERR_FAIL_COND(!particles);
+
+	particles->interpolate = p_enable;
 }
 
 void RendererStorageRD::particles_set_fractional_delta(RID p_particles, bool p_enable) {
@@ -4022,6 +4033,42 @@ void RendererStorageRD::particles_set_fractional_delta(RID p_particles, bool p_e
 	particles->fractional_delta = p_enable;
 }
 
+void RendererStorageRD::particles_set_trails(RID p_particles, bool p_enable, float p_length) {
+	Particles *particles = particles_owner.getornull(p_particles);
+	ERR_FAIL_COND(!particles);
+	ERR_FAIL_COND(p_length < 0.1);
+	p_length = MIN(10.0, p_length);
+
+	particles->trails_enabled = p_enable;
+	particles->trail_length = p_length;
+
+	_particles_free_data(particles);
+
+	particles->prev_ticks = 0;
+	particles->phase = 0;
+	particles->prev_phase = 0;
+	particles->clear = true;
+
+	particles->dependency.changed_notify(DEPENDENCY_CHANGED_PARTICLES);
+}
+
+void RendererStorageRD::particles_set_trail_bind_poses(RID p_particles, const Vector<Transform> &p_bind_poses) {
+	Particles *particles = particles_owner.getornull(p_particles);
+	ERR_FAIL_COND(!particles);
+	if (particles->trail_bind_pose_buffer.is_valid() && particles->trail_bind_poses.size() != p_bind_poses.size()) {
+		_particles_free_data(particles);
+
+		particles->prev_ticks = 0;
+		particles->phase = 0;
+		particles->prev_phase = 0;
+		particles->clear = true;
+	}
+	particles->trail_bind_poses = p_bind_poses;
+	particles->trail_bind_poses_dirty = true;
+
+	particles->dependency.changed_notify(DEPENDENCY_CHANGED_PARTICLES);
+}
+
 void RendererStorageRD::particles_set_collision_base_size(RID p_particles, float p_size) {
 	Particles *particles = particles_owner.getornull(p_particles);
 	ERR_FAIL_COND(!particles);
@@ -4029,6 +4076,13 @@ void RendererStorageRD::particles_set_collision_base_size(RID p_particles, float
 	particles->collision_base_size = p_size;
 }
 
+void RendererStorageRD::particles_set_transform_align(RID p_particles, RS::ParticlesTransformAlign p_transform_align) {
+	Particles *particles = particles_owner.getornull(p_particles);
+	ERR_FAIL_COND(!particles);
+
+	particles->transform_align = p_transform_align;
+}
+
 void RendererStorageRD::particles_set_process_material(RID p_particles, RID p_material) {
 	Particles *particles = particles_owner.getornull(p_particles);
 	ERR_FAIL_COND(!particles);
@@ -4152,8 +4206,13 @@ AABB RendererStorageRD::particles_get_current_aabb(RID p_particles) {
 	const Particles *particles = particles_owner.getornull(p_particles);
 	ERR_FAIL_COND_V(!particles, AABB());
 
+	int total_amount = particles->amount;
+	if (particles->trails_enabled && particles->trail_bind_poses.size() > 1) {
+		total_amount *= particles->trail_bind_poses.size();
+	}
+
 	Vector<ParticleData> data;
-	data.resize(particles->amount);
+	data.resize(total_amount);
 
 	Vector<uint8_t> buffer = RD::get_singleton()->buffer_get_data(particles->particle_buffer);
 
@@ -4162,8 +4221,9 @@ AABB RendererStorageRD::particles_get_current_aabb(RID p_particles) {
 	AABB aabb;
 	if (buffer.size()) {
 		bool first = true;
+
 		const ParticleData *particle_data = (const ParticleData *)data.ptr();
-		for (int i = 0; i < particles->amount; i++) {
+		for (int i = 0; i < total_amount; i++) {
 			if (particle_data[i].active) {
 				Vector3 pos = Vector3(particle_data[i].xform[12], particle_data[i].xform[13], particle_data[i].xform[14]);
 				if (!particles->use_local_coords) {
@@ -4224,14 +4284,12 @@ RID RendererStorageRD::particles_get_draw_pass_mesh(RID p_particles, int p_pass)
 void RendererStorageRD::particles_add_collision(RID p_particles, RID p_particles_collision_instance) {
 	Particles *particles = particles_owner.getornull(p_particles);
 	ERR_FAIL_COND(!particles);
-
 	particles->collisions.insert(p_particles_collision_instance);
 }
 
 void RendererStorageRD::particles_remove_collision(RID p_particles, RID p_particles_collision_instance) {
 	Particles *particles = particles_owner.getornull(p_particles);
 	ERR_FAIL_COND(!particles);
-
 	particles->collisions.erase(p_particles_collision_instance);
 }
 
@@ -4286,7 +4344,12 @@ void RendererStorageRD::_particles_process(Particles *p_particles, float p_delta
 
 	float new_phase = Math::fmod((float)p_particles->phase + (p_delta / p_particles->lifetime) * p_particles->speed_scale, (float)1.0);
 
-	ParticlesFrameParams &frame_params = p_particles->frame_params;
+	//move back history (if there is any)
+	for (uint32_t i = p_particles->frame_history.size() - 1; i > 0; i--) {
+		p_particles->frame_history[i] = p_particles->frame_history[i - 1];
+	}
+	//update current frame
+	ParticlesFrameParams &frame_params = p_particles->frame_history[0];
 
 	if (p_particles->clear) {
 		p_particles->cycle_number = 0;
@@ -4317,6 +4380,10 @@ void RendererStorageRD::_particles_process(Particles *p_particles, float p_delta
 	}
 
 	frame_params.cycle = p_particles->cycle_number;
+	frame_params.frame = p_particles->frame_counter++;
+	frame_params.pad0 = 0;
+	frame_params.pad1 = 0;
+	frame_params.pad2 = 0;
 
 	{ //collision and attractors
 
@@ -4515,12 +4582,18 @@ void RendererStorageRD::_particles_process(Particles *p_particles, float p_delta
 
 	ParticlesShader::PushConstant push_constant;
 
+	int process_amount = p_particles->amount;
+
+	if (p_particles->trails_enabled && p_particles->trail_bind_poses.size() > 1) {
+		process_amount *= p_particles->trail_bind_poses.size();
+	}
 	push_constant.clear = p_particles->clear;
 	push_constant.total_particles = p_particles->amount;
 	push_constant.lifetime = p_particles->lifetime;
-	push_constant.trail_size = 1;
+	push_constant.trail_size = p_particles->trail_params.size();
 	push_constant.use_fractional_delta = p_particles->fractional_delta;
 	push_constant.sub_emitter_mode = !p_particles->emitting && p_particles->emission_buffer && (p_particles->emission_buffer->particle_count > 0 || p_particles->force_sub_emit);
+	push_constant.trail_pass = false;
 
 	p_particles->force_sub_emit = false; //reset
 
@@ -4553,7 +4626,17 @@ void RendererStorageRD::_particles_process(Particles *p_particles, float p_delta
 
 	p_particles->clear = false;
 
-	RD::get_singleton()->buffer_update(p_particles->frame_params_buffer, 0, sizeof(ParticlesFrameParams), &frame_params);
+	if (p_particles->trail_params.size() > 1) {
+		//fill the trail params
+		for (uint32_t i = 0; i < p_particles->trail_params.size(); i++) {
+			uint32_t src_idx = i * p_particles->frame_history.size() / p_particles->trail_params.size();
+			p_particles->trail_params[i] = p_particles->frame_history[src_idx];
+		}
+	} else {
+		p_particles->trail_params[0] = p_particles->frame_history[0];
+	}
+
+	RD::get_singleton()->buffer_update(p_particles->frame_params_buffer, 0, sizeof(ParticlesFrameParams) * p_particles->trail_params.size(), p_particles->trail_params.ptr());
 
 	ParticlesMaterialData *m = (ParticlesMaterialData *)material_get_data(p_particles->process_material, SHADER_TYPE_PARTICLES);
 	if (!m) {
@@ -4575,27 +4658,45 @@ void RendererStorageRD::_particles_process(Particles *p_particles, float p_delta
 
 	RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ParticlesShader::PushConstant));
 
-	RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_particles->amount, 1, 1);
+	if (p_particles->trails_enabled && p_particles->trail_bind_poses.size() > 1) {
+		//trails requires two passes in order to catch particle starts
+		RD::get_singleton()->compute_list_dispatch_threads(compute_list, process_amount / p_particles->trail_bind_poses.size(), 1, 1);
+
+		RD::get_singleton()->compute_list_add_barrier(compute_list);
+
+		push_constant.trail_pass = true;
+		RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ParticlesShader::PushConstant));
+		RD::get_singleton()->compute_list_dispatch_threads(compute_list, process_amount - p_particles->amount, 1, 1);
+	} else {
+		RD::get_singleton()->compute_list_dispatch_threads(compute_list, process_amount, 1, 1);
+	}
 
 	RD::get_singleton()->compute_list_end();
 }
 
-void RendererStorageRD::particles_set_view_axis(RID p_particles, const Vector3 &p_axis) {
+void RendererStorageRD::particles_set_view_axis(RID p_particles, const Vector3 &p_axis, const Vector3 &p_up_axis) {
 	Particles *particles = particles_owner.getornull(p_particles);
 	ERR_FAIL_COND(!particles);
 
-	if (particles->draw_order != RS::PARTICLES_DRAW_ORDER_VIEW_DEPTH) {
-		return; //uninteresting for other modes
+	if (particles->draw_order != RS::PARTICLES_DRAW_ORDER_VIEW_DEPTH && particles->transform_align != RS::PARTICLES_TRANSFORM_ALIGN_Z_BILLBOARD && particles->transform_align != RS::PARTICLES_TRANSFORM_ALIGN_Z_BILLBOARD_Y_TO_VELOCITY) {
+		return;
+	}
+
+	if (particles->particle_buffer.is_null()) {
+		return; //particles have not processed yet
 	}
 
+	bool do_sort = particles->draw_order == RS::PARTICLES_DRAW_ORDER_VIEW_DEPTH;
+
 	//copy to sort buffer
-	if (particles->particles_sort_buffer == RID()) {
+	if (do_sort && particles->particles_sort_buffer == RID()) {
 		uint32_t size = particles->amount;
 		if (size & 1) {
 			size++; //make multiple of 16
 		}
 		size *= sizeof(float) * 2;
 		particles->particles_sort_buffer = RD::get_singleton()->storage_buffer_create(size);
+
 		{
 			Vector<RD::Uniform> uniforms;
 
@@ -4611,41 +4712,105 @@ void RendererStorageRD::particles_set_view_axis(RID p_particles, const Vector3 &
 		}
 	}
 
+	ParticlesShader::CopyPushConstant copy_push_constant;
+
+	if (particles->trails_enabled && particles->trail_bind_poses.size() > 1) {
+		int fixed_fps = 60.0;
+		if (particles->fixed_fps > 0) {
+			fixed_fps = particles->fixed_fps;
+		}
+
+		copy_push_constant.trail_size = particles->trail_bind_poses.size();
+		copy_push_constant.trail_total = particles->frame_history.size();
+		copy_push_constant.frame_delta = 1.0 / fixed_fps;
+	} else {
+		copy_push_constant.trail_size = 1;
+		copy_push_constant.trail_total = 1;
+		copy_push_constant.frame_delta = 0.0;
+	}
+	copy_push_constant.frame_remainder = particles->interpolate ? particles->frame_remainder : 0.0;
+	copy_push_constant.total_particles = particles->amount;
+
 	Vector3 axis = -p_axis; // cameras look to z negative
 
 	if (particles->use_local_coords) {
 		axis = particles->emission_transform.basis.xform_inv(axis).normalized();
 	}
 
-	ParticlesShader::CopyPushConstant copy_push_constant;
-	copy_push_constant.total_particles = particles->amount;
 	copy_push_constant.sort_direction[0] = axis.x;
 	copy_push_constant.sort_direction[1] = axis.y;
 	copy_push_constant.sort_direction[2] = axis.z;
 
-	RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
-	RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, particles_shader.copy_pipelines[ParticlesShader::COPY_MODE_FILL_SORT_BUFFER]);
-	RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_copy_uniform_set, 0);
-	RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_sort_uniform_set, 1);
-	RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy_push_constant, sizeof(ParticlesShader::CopyPushConstant));
+	copy_push_constant.align_up[0] = p_up_axis.x;
+	copy_push_constant.align_up[1] = p_up_axis.y;
+	copy_push_constant.align_up[2] = p_up_axis.z;
 
-	RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1);
+	copy_push_constant.align_mode = particles->transform_align;
 
-	RD::get_singleton()->compute_list_end();
+	if (do_sort) {
+		RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
+
+		RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, particles_shader.copy_pipelines[ParticlesShader::COPY_MODE_FILL_SORT_BUFFER]);
+		RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_copy_uniform_set, 0);
+		RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_sort_uniform_set, 1);
+		RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->trail_bind_pose_uniform_set, 2);
+		RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy_push_constant, sizeof(ParticlesShader::CopyPushConstant));
+
+		RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1);
 
-	effects.sort_buffer(particles->particles_sort_uniform_set, particles->amount);
+		RD::get_singleton()->compute_list_end();
+		effects.sort_buffer(particles->particles_sort_uniform_set, particles->amount);
+	}
+
+	copy_push_constant.total_particles *= copy_push_constant.total_particles;
 
-	compute_list = RD::get_singleton()->compute_list_begin();
-	RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, particles_shader.copy_pipelines[ParticlesShader::COPY_MODE_FILL_INSTANCES_WITH_SORT_BUFFER]);
+	RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
+	RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, particles_shader.copy_pipelines[do_sort ? ParticlesShader::COPY_MODE_FILL_INSTANCES_WITH_SORT_BUFFER : ParticlesShader::COPY_MODE_FILL_INSTANCES]);
 	RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_copy_uniform_set, 0);
-	RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_sort_uniform_set, 1);
+	if (do_sort) {
+		RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_sort_uniform_set, 1);
+	}
+	RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->trail_bind_pose_uniform_set, 2);
+
 	RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy_push_constant, sizeof(ParticlesShader::CopyPushConstant));
 
-	RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1);
+	RD::get_singleton()->compute_list_dispatch_threads(compute_list, copy_push_constant.total_particles, 1, 1);
 
 	RD::get_singleton()->compute_list_end();
 }
 
+void RendererStorageRD::_particles_update_buffers(Particles *particles) {
+	if (particles->amount > 0 && particles->particle_buffer.is_null()) {
+		int total_amount = particles->amount;
+		if (particles->trails_enabled && particles->trail_bind_poses.size() > 1) {
+			total_amount *= particles->trail_bind_poses.size();
+		}
+		particles->particle_buffer = RD::get_singleton()->storage_buffer_create(sizeof(ParticleData) * total_amount);
+		particles->particle_instance_buffer = RD::get_singleton()->storage_buffer_create(sizeof(float) * 4 * (3 + 1 + 1) * total_amount);
+		//needs to clear it
+
+		{
+			Vector<RD::Uniform> uniforms;
+
+			{
+				RD::Uniform u;
+				u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+				u.binding = 1;
+				u.ids.push_back(particles->particle_buffer);
+				uniforms.push_back(u);
+			}
+			{
+				RD::Uniform u;
+				u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+				u.binding = 2;
+				u.ids.push_back(particles->particle_instance_buffer);
+				uniforms.push_back(u);
+			}
+
+			particles->particles_copy_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, particles_shader.copy_shader.version_get_shader(particles_shader.copy_shader_version, 0), 0);
+		}
+	}
+}
 void RendererStorageRD::update_particles() {
 	while (particle_update_list) {
 		//use transform feedback to process particles
@@ -4657,6 +4822,8 @@ void RendererStorageRD::update_particles() {
 		particles->update_list = nullptr;
 		particles->dirty = false;
 
+		_particles_update_buffers(particles);
+
 		if (particles->restart_request) {
 			particles->prev_ticks = 0;
 			particles->phase = 0;
@@ -4688,12 +4855,81 @@ void RendererStorageRD::update_particles() {
 			}
 		}
 
+#ifndef _MSC_VER
+#warning Should use display refresh rate for all this
+#endif
+
+		float screen_hz = 60;
+
+		int fixed_fps = 0;
+		if (particles->fixed_fps > 0) {
+			fixed_fps = particles->fixed_fps;
+		} else if (particles->trails_enabled && particles->trail_bind_poses.size() > 1) {
+			fixed_fps = screen_hz;
+		}
+		{
+			//update trails
+			int history_size = 1;
+			int trail_steps = 1;
+			if (particles->trails_enabled && particles->trail_bind_poses.size() > 1) {
+				history_size = MAX(1, int(particles->trail_length * fixed_fps));
+				trail_steps = particles->trail_bind_poses.size();
+			}
+
+			if (uint32_t(history_size) != particles->frame_history.size()) {
+				particles->frame_history.resize(history_size);
+				memset(particles->frame_history.ptr(), 0, sizeof(ParticlesFrameParams) * history_size);
+			}
+
+			if (uint32_t(trail_steps) != particles->trail_params.size() || particles->frame_params_buffer.is_null()) {
+				particles->trail_params.resize(trail_steps);
+				if (particles->frame_params_buffer.is_valid()) {
+					RD::get_singleton()->free(particles->frame_params_buffer);
+				}
+				particles->frame_params_buffer = RD::get_singleton()->storage_buffer_create(sizeof(ParticlesFrameParams) * trail_steps);
+			}
+
+			if (particles->trail_bind_poses.size() > 1 && particles->trail_bind_pose_buffer.is_null()) {
+				particles->trail_bind_pose_buffer = RD::get_singleton()->storage_buffer_create(sizeof(float) * 16 * particles->trail_bind_poses.size());
+				particles->trail_bind_poses_dirty = true;
+			}
+
+			if (particles->trail_bind_pose_uniform_set.is_null()) {
+				Vector<RD::Uniform> uniforms;
+				{
+					RD::Uniform u;
+					u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
+					u.binding = 0;
+					if (particles->trail_bind_pose_buffer.is_valid()) {
+						u.ids.push_back(particles->trail_bind_pose_buffer);
+					} else {
+						u.ids.push_back(default_rd_storage_buffer);
+					}
+					uniforms.push_back(u);
+				}
+
+				particles->trail_bind_pose_uniform_set = RD::get_singleton()->uniform_set_create(uniforms, particles_shader.copy_shader.version_get_shader(particles_shader.copy_shader_version, 0), 2);
+			}
+
+			if (particles->trail_bind_pose_buffer.is_valid() && particles->trail_bind_poses_dirty) {
+				if (particles_shader.pose_update_buffer.size() < uint32_t(particles->trail_bind_poses.size()) * 16) {
+					particles_shader.pose_update_buffer.resize(particles->trail_bind_poses.size() * 16);
+				}
+
+				for (int i = 0; i < particles->trail_bind_poses.size(); i++) {
+					store_transform(particles->trail_bind_poses[i], &particles_shader.pose_update_buffer[i * 16]);
+				}
+
+				RD::get_singleton()->buffer_update(particles->trail_bind_pose_buffer, 0, particles->trail_bind_poses.size() * 16 * sizeof(float), particles_shader.pose_update_buffer.ptr());
+			}
+		}
+
 		bool zero_time_scale = Engine::get_singleton()->get_time_scale() <= 0.0;
 
 		if (particles->clear && particles->pre_process_time > 0.0) {
 			float frame_time;
-			if (particles->fixed_fps > 0) {
-				frame_time = 1.0 / particles->fixed_fps;
+			if (fixed_fps > 0) {
+				frame_time = 1.0 / fixed_fps;
 			} else {
 				frame_time = 1.0 / 30.0;
 			}
@@ -4706,14 +4942,14 @@ void RendererStorageRD::update_particles() {
 			}
 		}
 
-		if (particles->fixed_fps > 0) {
+		if (fixed_fps > 0) {
 			float frame_time;
 			float decr;
 			if (zero_time_scale) {
 				frame_time = 0.0;
-				decr = 1.0 / particles->fixed_fps;
+				decr = 1.0 / fixed_fps;
 			} else {
-				frame_time = 1.0 / particles->fixed_fps;
+				frame_time = 1.0 / fixed_fps;
 				decr = frame_time;
 			}
 			float delta = RendererCompositorRD::singleton->get_frame_delta_time();
@@ -4741,16 +4977,39 @@ void RendererStorageRD::update_particles() {
 
 		//copy particles to instance buffer
 
-		if (particles->draw_order != RS::PARTICLES_DRAW_ORDER_VIEW_DEPTH) {
+		if (particles->draw_order != RS::PARTICLES_DRAW_ORDER_VIEW_DEPTH && particles->transform_align != RS::PARTICLES_TRANSFORM_ALIGN_Z_BILLBOARD && particles->transform_align != RS::PARTICLES_TRANSFORM_ALIGN_Z_BILLBOARD_Y_TO_VELOCITY) {
+			//does not need view dependent operation, do copy here
 			ParticlesShader::CopyPushConstant copy_push_constant;
-			copy_push_constant.total_particles = particles->amount;
+
+			int total_amount = particles->amount;
+			if (particles->trails_enabled && particles->trail_bind_poses.size() > 1) {
+				total_amount *= particles->trail_bind_poses.size();
+			}
+
+			copy_push_constant.total_particles = total_amount;
+			copy_push_constant.frame_remainder = particles->interpolate ? particles->frame_remainder : 0.0;
+			copy_push_constant.align_mode = particles->transform_align;
+			copy_push_constant.align_up[0] = 0;
+			copy_push_constant.align_up[1] = 0;
+			copy_push_constant.align_up[2] = 0;
+
+			if (particles->trails_enabled && particles->trail_bind_poses.size() > 1) {
+				copy_push_constant.trail_size = particles->trail_bind_poses.size();
+				copy_push_constant.trail_total = particles->frame_history.size();
+				copy_push_constant.frame_delta = 1.0 / fixed_fps;
+			} else {
+				copy_push_constant.trail_size = 1;
+				copy_push_constant.trail_total = 1;
+				copy_push_constant.frame_delta = 0.0;
+			}
 
 			RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin();
 			RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, particles_shader.copy_pipelines[ParticlesShader::COPY_MODE_FILL_INSTANCES]);
 			RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->particles_copy_uniform_set, 0);
+			RD::get_singleton()->compute_list_bind_uniform_set(compute_list, particles->trail_bind_pose_uniform_set, 2);
 			RD::get_singleton()->compute_list_set_push_constant(compute_list, &copy_push_constant, sizeof(ParticlesShader::CopyPushConstant));
 
-			RD::get_singleton()->compute_list_dispatch_threads(compute_list, particles->amount, 1, 1);
+			RD::get_singleton()->compute_list_dispatch_threads(compute_list, total_amount, 1, 1);
 
 			RD::get_singleton()->compute_list_end();
 		}
@@ -8324,9 +8583,10 @@ bool RendererStorageRD::free(RID p_rid) {
 		light_owner.free(p_rid);
 
 	} else if (particles_owner.owns(p_rid)) {
+		update_particles();
 		Particles *particles = particles_owner.getornull(p_rid);
-		_particles_free_data(particles);
 		particles->dependency.deleted_notify(p_rid);
+		_particles_free_data(particles);
 		particles_owner.free(p_rid);
 	} else if (particles_collision_owner.owns(p_rid)) {
 		ParticlesCollision *particles_collision = particles_collision_owner.getornull(p_rid);
@@ -8863,14 +9123,14 @@ RendererStorageRD::RendererStorageRD() {
 		actions.renames["COLOR"] = "PARTICLE.color";
 		actions.renames["VELOCITY"] = "PARTICLE.velocity";
 		//actions.renames["MASS"] = "mass"; ?
-		actions.renames["ACTIVE"] = "PARTICLE.is_active";
+		actions.renames["ACTIVE"] = "particle_active";
 		actions.renames["RESTART"] = "restart";
 		actions.renames["CUSTOM"] = "PARTICLE.custom";
 		actions.renames["TRANSFORM"] = "PARTICLE.xform";
 		actions.renames["TIME"] = "FRAME.time";
 		actions.renames["LIFETIME"] = "params.lifetime";
 		actions.renames["DELTA"] = "local_delta";
-		actions.renames["NUMBER"] = "particle";
+		actions.renames["NUMBER"] = "particle_number";
 		actions.renames["INDEX"] = "index";
 		//actions.renames["GRAVITY"] = "current_gravity";
 		actions.renames["EMISSION_TRANSFORM"] = "FRAME.emission_transform";

+ 50 - 6
servers/rendering/renderer_rd/renderer_storage_rd.h

@@ -660,6 +660,11 @@ private:
 		float time;
 		float delta;
 
+		uint32_t frame;
+		uint32_t pad0;
+		uint32_t pad1;
+		uint32_t pad2;
+
 		uint32_t random_seed;
 		uint32_t attractor_count;
 		uint32_t collider_count;
@@ -704,10 +709,16 @@ private:
 		AABB custom_aabb = AABB(Vector3(-4, -4, -4), Vector3(8, 8, 8));
 		bool use_local_coords = true;
 		RID process_material;
+		uint32_t frame_counter = 0;
+		RS::ParticlesTransformAlign transform_align = RS::PARTICLES_TRANSFORM_ALIGN_DISABLED;
 
 		RS::ParticlesDrawOrder draw_order = RS::PARTICLES_DRAW_ORDER_INDEX;
 
 		Vector<RID> draw_passes;
+		Vector<Transform> trail_bind_poses;
+		bool trail_bind_poses_dirty = false;
+		RID trail_bind_pose_buffer;
+		RID trail_bind_pose_uniform_set;
 
 		RID particle_buffer;
 		RID particle_instance_buffer;
@@ -739,7 +750,8 @@ private:
 
 		float speed_scale = 1.0;
 
-		int fixed_fps = 0;
+		int fixed_fps = 30;
+		bool interpolate = true;
 		bool fractional_delta = false;
 		float frame_remainder = 0;
 		float collision_base_size = 0.01;
@@ -759,12 +771,19 @@ private:
 
 		Dependency dependency;
 
-		ParticlesFrameParams frame_params;
+		float trail_length = 1.0;
+		bool trails_enabled = false;
+		LocalVector<ParticlesFrameParams> frame_history;
+		LocalVector<ParticlesFrameParams> trail_params;
+
+		Particles() {
+		}
 	};
 
 	void _particles_process(Particles *p_particles, float p_delta);
 	void _particles_allocate_emission_buffer(Particles *particles);
 	void _particles_free_data(Particles *particles);
+	void _particles_update_buffers(Particles *particles);
 
 	struct ParticlesShader {
 		struct PushConstant {
@@ -776,7 +795,7 @@ private:
 			uint32_t use_fractional_delta;
 			uint32_t sub_emitter_mode;
 			uint32_t can_emit;
-			uint32_t pad;
+			uint32_t trail_pass;
 		};
 
 		ParticlesShaderRD shader;
@@ -791,6 +810,14 @@ private:
 		struct CopyPushConstant {
 			float sort_direction[3];
 			uint32_t total_particles;
+
+			uint32_t trail_size;
+			uint32_t trail_total;
+			float frame_delta;
+			float frame_remainder;
+
+			float align_up[3];
+			uint32_t align_mode;
 		};
 
 		enum {
@@ -804,6 +831,8 @@ private:
 		RID copy_shader_version;
 		RID copy_pipelines[COPY_MODE_MAX];
 
+		LocalVector<float> pose_update_buffer;
+
 	} particles_shader;
 
 	Particles *particle_update_list = nullptr;
@@ -2076,10 +2105,17 @@ public:
 	void particles_set_use_local_coordinates(RID p_particles, bool p_enable);
 	void particles_set_process_material(RID p_particles, RID p_material);
 	void particles_set_fixed_fps(RID p_particles, int p_fps);
+	void particles_set_interpolate(RID p_particles, bool p_enable);
 	void particles_set_fractional_delta(RID p_particles, bool p_enable);
 	void particles_set_collision_base_size(RID p_particles, float p_size);
+	void particles_set_transform_align(RID p_particles, RS::ParticlesTransformAlign p_transform_align);
+
+	void particles_set_trails(RID p_particles, bool p_enable, float p_length);
+	void particles_set_trail_bind_poses(RID p_particles, const Vector<Transform> &p_bind_poses);
+
 	void particles_restart(RID p_particles);
 	void particles_emit(RID p_particles, const Transform &p_transform, const Vector3 &p_velocity, const Color &p_color, const Color &p_custom, uint32_t p_emit_flags);
+
 	void particles_set_subemitter(RID p_particles, RID p_subemitter_particles);
 
 	void particles_set_draw_order(RID p_particles, RS::ParticlesDrawOrder p_order);
@@ -2097,15 +2133,21 @@ public:
 	int particles_get_draw_passes(RID p_particles) const;
 	RID particles_get_draw_pass_mesh(RID p_particles, int p_pass) const;
 
-	void particles_set_view_axis(RID p_particles, const Vector3 &p_axis);
+	void particles_set_view_axis(RID p_particles, const Vector3 &p_axis, const Vector3 &p_up_axis);
 
 	virtual bool particles_is_inactive(RID p_particles) const;
 
-	_FORCE_INLINE_ uint32_t particles_get_amount(RID p_particles) {
+	_FORCE_INLINE_ uint32_t particles_get_amount(RID p_particles, uint32_t &r_trail_divisor) {
 		Particles *particles = particles_owner.getornull(p_particles);
 		ERR_FAIL_COND_V(!particles, 0);
 
-		return particles->amount;
+		if (particles->trails_enabled && particles->trail_bind_poses.size() > 1) {
+			r_trail_divisor = particles->trail_bind_poses.size();
+		} else {
+			r_trail_divisor = 1;
+		}
+
+		return particles->amount * r_trail_divisor;
 	}
 
 	_FORCE_INLINE_ uint32_t particles_is_using_local_coords(RID p_particles) {
@@ -2119,6 +2161,8 @@ public:
 		Particles *particles = particles_owner.getornull(p_particles);
 		ERR_FAIL_COND_V(!particles, RID());
 		if (particles->particles_transforms_buffer_uniform_set.is_null()) {
+			_particles_update_buffers(particles);
+
 			Vector<RD::Uniform> uniforms;
 
 			{

+ 64 - 19
servers/rendering/renderer_rd/shaders/particles.glsl

@@ -76,6 +76,11 @@ struct FrameParams {
 	float time;
 	float delta;
 
+	uint frame;
+	uint pad0;
+	uint pad1;
+	uint pad2;
+
 	uint random_seed;
 	uint attractor_count;
 	uint collider_count;
@@ -92,10 +97,16 @@ layout(set = 1, binding = 0, std430) restrict buffer FrameHistory {
 }
 frame_history;
 
+#define PARTICLE_FLAG_ACTIVE uint(1)
+#define PARTICLE_FLAG_STARTED uint(2)
+#define PARTICLE_FLAG_TRAILED uint(4)
+#define PARTICLE_FRAME_MASK uint(0xFFFF)
+#define PARTICLE_FRAME_SHIFT uint(16)
+
 struct ParticleData {
 	mat4 xform;
 	vec3 velocity;
-	bool is_active;
+	uint flags;
 	vec4 color;
 	vec4 custom;
 };
@@ -162,7 +173,7 @@ layout(push_constant, binding = 0, std430) uniform Params {
 	bool use_fractional_delta;
 	bool sub_emitter_mode;
 	bool can_emit;
-	uint pad;
+	bool trail_pass;
 }
 params;
 
@@ -201,6 +212,14 @@ bool emit_subparticle(mat4 p_xform, vec3 p_velocity, vec4 p_color, vec4 p_custom
 void main() {
 	uint particle = gl_GlobalInvocationID.x;
 
+	if (params.trail_size > 1) {
+		if (params.trail_pass) {
+			particle += (particle / (params.trail_size - 1)) + 1;
+		} else {
+			particle *= params.trail_size;
+		}
+	}
+
 	if (particle >= params.total_particles * params.trail_size) {
 		return; //discard
 	}
@@ -229,7 +248,7 @@ void main() {
 		PARTICLE.color = vec4(1.0);
 		PARTICLE.custom = vec4(0.0);
 		PARTICLE.velocity = vec3(0.0);
-		PARTICLE.is_active = false;
+		PARTICLE.flags = 0;
 		PARTICLE.xform = mat4(
 				vec4(1.0, 0.0, 0.0, 0.0),
 				vec4(0.0, 1.0, 0.0, 0.0),
@@ -237,6 +256,29 @@ void main() {
 				vec4(0.0, 0.0, 0.0, 1.0));
 	}
 
+	//clear started flag if set
+
+	if (params.trail_pass) {
+		//trail started
+		uint src_idx = index * params.trail_size;
+		if (bool(particles.data[src_idx].flags & PARTICLE_FLAG_STARTED)) {
+			//save start conditions for trails
+			PARTICLE.color = particles.data[src_idx].color;
+			PARTICLE.custom = particles.data[src_idx].custom;
+			PARTICLE.velocity = particles.data[src_idx].velocity;
+			PARTICLE.flags = PARTICLE_FLAG_TRAILED | ((frame_history.data[0].frame & PARTICLE_FRAME_MASK) << PARTICLE_FRAME_SHIFT); //mark it as trailed, save in which frame it will start
+			PARTICLE.xform = particles.data[src_idx].xform;
+		}
+
+		if (bool(PARTICLE.flags & PARTICLE_FLAG_TRAILED) && ((PARTICLE.flags >> PARTICLE_FRAME_SHIFT) == (FRAME.frame & PARTICLE_FRAME_MASK))) { //check this is trailed and see if it should start now
+			// we just assume that this is the first frame of the particle, the rest is deterministic
+			PARTICLE.flags = PARTICLE_FLAG_ACTIVE | (particles.data[src_idx].flags & (PARTICLE_FRAME_MASK << PARTICLE_FRAME_SHIFT));
+			return; //- this appears like it should be correct, but it seems not to be.. wonder why.
+		}
+	} else {
+		PARTICLE.flags &= ~PARTICLE_FLAG_STARTED;
+	}
+
 	bool collided = false;
 	vec3 collision_normal = vec3(0.0);
 	float collision_depth = 0.0;
@@ -245,19 +287,17 @@ void main() {
 
 #if !defined(DISABLE_VELOCITY)
 
-	if (PARTICLE.is_active) {
+	if (bool(PARTICLE.flags & PARTICLE_FLAG_ACTIVE)) {
 		PARTICLE.xform[3].xyz += PARTICLE.velocity * local_delta;
 	}
 #endif
 
-	/* Process physics if active */
-
-	if (params.sub_emitter_mode) {
-		if (!PARTICLE.is_active) {
+	if (!params.trail_pass && params.sub_emitter_mode) {
+		if (!bool(PARTICLE.flags & PARTICLE_FLAG_ACTIVE)) {
 			int src_index = atomicAdd(src_particles.particle_count, -1) - 1;
 
 			if (src_index >= 0) {
-				PARTICLE.is_active = true;
+				PARTICLE.flags = (PARTICLE_FLAG_ACTIVE | PARTICLE_FLAG_STARTED | (FRAME.cycle << PARTICLE_FRAME_SHIFT));
 				restart = true;
 
 				if (bool(src_particles.data[src_index].flags & EMISSION_FLAG_HAS_POSITION)) {
@@ -339,16 +379,12 @@ void main() {
 			}
 		}
 
-		uint current_cycle = FRAME.cycle;
-
-		if (FRAME.system_phase < restart_phase) {
-			current_cycle -= uint(1);
+		if (params.trail_pass) {
+			restart = false;
 		}
 
-		uint particle_number = current_cycle * uint(params.total_particles) + particle;
-
 		if (restart) {
-			PARTICLE.is_active = FRAME.emitting;
+			PARTICLE.flags = FRAME.emitting ? (PARTICLE_FLAG_ACTIVE | PARTICLE_FLAG_STARTED | (FRAME.cycle << PARTICLE_FRAME_SHIFT)) : 0;
 			restart_position = true;
 			restart_rotation_scale = true;
 			restart_velocity = true;
@@ -357,11 +393,15 @@ void main() {
 		}
 	}
 
-	if (restart && PARTICLE.is_active) {
+	bool particle_active = bool(PARTICLE.flags & PARTICLE_FLAG_ACTIVE);
+
+	uint particle_number = (PARTICLE.flags >> PARTICLE_FRAME_SHIFT) * uint(params.total_particles) + index;
+
+	if (restart && particle_active) {
 #CODE : START
 	}
 
-	if (PARTICLE.is_active) {
+	if (particle_active) {
 		for (uint i = 0; i < FRAME.attractor_count; i++) {
 			vec3 dir;
 			float amount;
@@ -539,7 +579,12 @@ void main() {
 		}
 	}
 
-	if (PARTICLE.is_active) {
+	if (particle_active) {
 #CODE : PROCESS
 	}
+
+	PARTICLE.flags &= ~PARTICLE_FLAG_ACTIVE;
+	if (particle_active) {
+		PARTICLE.flags |= PARTICLE_FLAG_ACTIVE;
+	}
 }

+ 96 - 5
servers/rendering/renderer_rd/shaders/particles_copy.glsl

@@ -6,10 +6,14 @@
 
 layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 
+#define PARTICLE_FLAG_ACTIVE uint(1)
+#define PARTICLE_FLAG_STARTED uint(2)
+#define PARTICLE_FLAG_TRAILED uint(4)
+
 struct ParticleData {
 	mat4 xform;
 	vec3 velocity;
-	bool is_active;
+	uint flags;
 	vec4 color;
 	vec4 custom;
 };
@@ -33,12 +37,30 @@ sort_buffer;
 
 #endif // USE_SORT_BUFFER
 
+layout(set = 2, binding = 0, std430) restrict readonly buffer TrailBindPoses {
+	mat4 data[];
+}
+trail_bind_poses;
+
 layout(push_constant, binding = 0, std430) uniform Params {
 	vec3 sort_direction;
 	uint total_particles;
+
+	uint trail_size;
+	uint trail_total;
+	float frame_delta;
+	float frame_remainder;
+
+	vec3 align_up;
+	uint align_mode;
 }
 params;
 
+#define TRANSFORM_ALIGN_DISABLED 0
+#define TRANSFORM_ALIGN_Z_BILLBOARD 1
+#define TRANSFORM_ALIGN_Y_TO_VELOCITY 2
+#define TRANSFORM_ALIGN_Z_BILLBOARD_Y_TO_VELOCITY 3
+
 void main() {
 #ifdef MODE_FILL_SORT_BUFFER
 
@@ -47,7 +69,11 @@ void main() {
 		return; //discard
 	}
 
-	sort_buffer.data[particle].x = dot(params.sort_direction, particles.data[particle].xform[3].xyz);
+	uint src_particle = particle;
+	if (params.trail_size > 1) {
+		src_particle = src_particle * params.trail_size + params.trail_size / 2; //use trail center for sorting
+	}
+	sort_buffer.data[particle].x = dot(params.sort_direction, particles.data[src_particle].xform[3].xyz);
 	sort_buffer.data[particle].y = float(particle);
 #endif
 
@@ -61,13 +87,78 @@ void main() {
 	}
 
 #ifdef USE_SORT_BUFFER
-	particle = uint(sort_buffer.data[particle].y); //use index from sort buffer
+
+	if (params.trail_size > 1) {
+		particle = uint(sort_buffer.data[particle / params.trail_size].y) + (particle % params.trail_size);
+	} else {
+		particle = uint(sort_buffer.data[particle].y); //use index from sort buffer
+	}
 #endif
 
 	mat4 txform;
 
-	if (particles.data[particle].is_active) {
-		txform = transpose(particles.data[particle].xform);
+	if (bool(particles.data[particle].flags & PARTICLE_FLAG_ACTIVE) || bool(particles.data[particle].flags & PARTICLE_FLAG_TRAILED)) {
+		txform = particles.data[particle].xform;
+		if (params.trail_size > 1) {
+			// since the steps dont fit precisely in the history frames, must do a tiny bit of
+			// interpolation to get them close to their intended location.
+			uint part_ofs = particle % params.trail_size;
+			float natural_ofs = fract((float(part_ofs) / float(params.trail_size)) * float(params.trail_total)) * params.frame_delta;
+
+			txform[3].xyz -= particles.data[particle].velocity * natural_ofs;
+		}
+
+		switch (params.align_mode) {
+			case TRANSFORM_ALIGN_DISABLED: {
+			} break; //nothing
+			case TRANSFORM_ALIGN_Z_BILLBOARD: {
+				mat3 local = mat3(normalize(cross(params.align_up, params.sort_direction)), params.align_up, params.sort_direction);
+				local = local * mat3(txform);
+				txform[0].xyz = local[0];
+				txform[1].xyz = local[1];
+				txform[2].xyz = local[2];
+
+			} break;
+			case TRANSFORM_ALIGN_Y_TO_VELOCITY: {
+				vec3 v = particles.data[particle].velocity;
+				float s = (length(txform[0]) + length(txform[1]) + length(txform[2])) / 3.0;
+				if (length(v) > 0.0) {
+					txform[1].xyz = normalize(v);
+				} else {
+					txform[1].xyz = normalize(txform[1].xyz);
+				}
+
+				txform[0].xyz = normalize(cross(txform[1].xyz, txform[2].xyz));
+				txform[2].xyz = vec3(0.0, 0.0, 1.0) * s;
+				txform[0].xyz *= s;
+				txform[1].xyz *= s;
+			} break;
+			case TRANSFORM_ALIGN_Z_BILLBOARD_Y_TO_VELOCITY: {
+				vec3 v = particles.data[particle].velocity;
+				vec3 sv = v - params.sort_direction * dot(params.sort_direction, v); //screen velocity
+				float s = (length(txform[0]) + length(txform[1]) + length(txform[2])) / 3.0;
+
+				if (length(sv) == 0) {
+					sv = params.align_up;
+				}
+
+				sv = normalize(sv);
+
+				txform[0].xyz = normalize(cross(sv, params.sort_direction)) * s;
+				txform[1].xyz = sv * s;
+				txform[2].xyz = params.sort_direction * s;
+
+			} break;
+		}
+
+		txform[3].xyz += particles.data[particle].velocity * params.frame_remainder;
+
+		if (params.trail_size > 1) {
+			uint part_ofs = particle % params.trail_size;
+			txform = txform * trail_bind_poses.data[part_ofs];
+		}
+
+		txform = transpose(txform);
 	} else {
 		txform = mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); //zero scale, becomes invisible
 	}

+ 67 - 30
servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl

@@ -48,11 +48,11 @@ layout(location = 8) in vec4 custom2_attrib;
 layout(location = 9) in vec4 custom3_attrib;
 #endif
 
-#if defined(BONES_USED)
+#if defined(BONES_USED) || defined(USE_PARTICLE_TRAILS)
 layout(location = 10) in uvec4 bone_attrib;
 #endif
 
-#if defined(WEIGHTS_USED)
+#if defined(WEIGHTS_USED) || defined(USE_PARTICLE_TRAILS)
 layout(location = 11) in vec4 weight_attrib;
 #endif
 
@@ -125,10 +125,72 @@ void main() {
 
 	if (is_multimesh) {
 		//multimesh, instances are for it
-		uint offset = (instances.data[instance_index].flags >> INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT) & INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK;
-		offset *= gl_InstanceIndex;
 
 		mat4 matrix;
+
+#ifdef USE_PARTICLE_TRAILS
+		uint trail_size = (instances.data[instance_index].flags >> INSTANCE_FLAGS_PARTICLE_TRAIL_SHIFT) & INSTANCE_FLAGS_PARTICLE_TRAIL_MASK;
+		uint stride = 3 + 1 + 1; //particles always uses this format
+
+		uint offset = trail_size * stride * gl_InstanceIndex;
+
+#ifdef COLOR_USED
+		vec4 pcolor;
+#endif
+		{
+			uint boffset = offset + bone_attrib.x * stride;
+			matrix = mat4(transforms.data[boffset + 0], transforms.data[boffset + 1], transforms.data[boffset + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weight_attrib.x;
+#ifdef COLOR_USED
+			pcolor = transforms.data[boffset + 3] * weight_attrib.x;
+#endif
+		}
+		if (weight_attrib.y > 0.001) {
+			uint boffset = offset + bone_attrib.y * stride;
+			matrix += mat4(transforms.data[boffset + 0], transforms.data[boffset + 1], transforms.data[boffset + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weight_attrib.y;
+#ifdef COLOR_USED
+			pcolor += transforms.data[boffset + 3] * weight_attrib.y;
+#endif
+		}
+		if (weight_attrib.z > 0.001) {
+			uint boffset = offset + bone_attrib.z * stride;
+			matrix += mat4(transforms.data[boffset + 0], transforms.data[boffset + 1], transforms.data[boffset + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weight_attrib.z;
+#ifdef COLOR_USED
+			pcolor += transforms.data[boffset + 3] * weight_attrib.z;
+#endif
+		}
+		if (weight_attrib.w > 0.001) {
+			uint boffset = offset + bone_attrib.w * stride;
+			matrix += mat4(transforms.data[boffset + 0], transforms.data[boffset + 1], transforms.data[boffset + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weight_attrib.w;
+#ifdef COLOR_USED
+			pcolor += transforms.data[boffset + 3] * weight_attrib.w;
+#endif
+		}
+
+		instance_custom = transforms.data[offset + 4];
+
+#ifdef COLOR_USED
+		color_interp *= pcolor;
+#endif
+
+#else
+		uint stride = 0;
+		{
+			//TODO implement a small lookup table for the stride
+			if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) {
+				stride += 2;
+			} else {
+				stride += 3;
+			}
+			if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_HAS_COLOR)) {
+				stride += 1;
+			}
+			if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA)) {
+				stride += 1;
+			}
+		}
+
+		uint offset = stride * gl_InstanceIndex;
+
 		if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH_FORMAT_2D)) {
 			matrix = mat4(transforms.data[offset + 0], transforms.data[offset + 1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
 			offset += 2;
@@ -148,6 +210,7 @@ void main() {
 			instance_custom = transforms.data[offset];
 		}
 
+#endif
 		//transpose
 		matrix = transpose(matrix);
 		world_matrix = world_matrix * matrix;
@@ -165,32 +228,6 @@ void main() {
 	vec3 binormal = normalize(cross(normal, tangent) * binormalf);
 #endif
 
-#if 0
-	if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_SKELETON)) {
-		//multimesh, instances are for it
-
-		uvec2 bones_01 = uvec2(bone_attrib.x & 0xFFFF, bone_attrib.x >> 16) * 3;
-		uvec2 bones_23 = uvec2(bone_attrib.y & 0xFFFF, bone_attrib.y >> 16) * 3;
-		vec2 weights_01 = unpackUnorm2x16(bone_attrib.z);
-		vec2 weights_23 = unpackUnorm2x16(bone_attrib.w);
-
-		mat4 m = mat4(transforms.data[bones_01.x], transforms.data[bones_01.x + 1], transforms.data[bones_01.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.x;
-		m += mat4(transforms.data[bones_01.y], transforms.data[bones_01.y + 1], transforms.data[bones_01.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_01.y;
-		m += mat4(transforms.data[bones_23.x], transforms.data[bones_23.x + 1], transforms.data[bones_23.x + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.x;
-		m += mat4(transforms.data[bones_23.y], transforms.data[bones_23.y + 1], transforms.data[bones_23.y + 2], vec4(0.0, 0.0, 0.0, 1.0)) * weights_23.y;
-
-		//reverse order because its transposed
-		vertex = (vec4(vertex, 1.0) * m).xyz;
-		normal = (vec4(normal, 0.0) * m).xyz;
-
-#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED)
-
-		tangent = (vec4(tangent, 0.0) * m).xyz;
-		binormal = (vec4(binormal, 0.0) * m).xyz;
-#endif
-	}
-#endif
-
 #ifdef UV_USED
 	uv_interp = uv_attrib;
 #endif

+ 3 - 4
servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl

@@ -61,12 +61,11 @@ layout(set = 0, binding = 2) uniform sampler shadow_sampler;
 #define INSTANCE_FLAGS_MULTIMESH_FORMAT_2D (1 << 13)
 #define INSTANCE_FLAGS_MULTIMESH_HAS_COLOR (1 << 14)
 #define INSTANCE_FLAGS_MULTIMESH_HAS_CUSTOM_DATA (1 << 15)
-#define INSTANCE_FLAGS_MULTIMESH_STRIDE_SHIFT 16
+#define INSTANCE_FLAGS_PARTICLE_TRAIL_SHIFT 16
 //3 bits of stride
-#define INSTANCE_FLAGS_MULTIMESH_STRIDE_MASK 0x7
+#define INSTANCE_FLAGS_PARTICLE_TRAIL_MASK 0xFF
 
-#define INSTANCE_FLAGS_SKELETON (1 << 19)
-#define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 20)
+#define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 24)
 
 layout(set = 0, binding = 3, std430) restrict readonly buffer OmniLights {
 	LightData data[];

+ 4 - 1
servers/rendering/renderer_scene_cull.cpp

@@ -1427,6 +1427,9 @@ void RendererSceneCull::_update_instance(Instance *p_instance) {
 		pair.pair_mask |= 1 << RS::INSTANCE_LIGHT;
 		pair.pair_mask |= 1 << RS::INSTANCE_GI_PROBE;
 		pair.pair_mask |= 1 << RS::INSTANCE_LIGHTMAP;
+		if (p_instance->base_type == RS::INSTANCE_PARTICLES) {
+			pair.pair_mask |= 1 << RS::INSTANCE_PARTICLES_COLLISION;
+		}
 
 		pair.pair_mask |= geometry_instance_pair_mask;
 
@@ -2410,7 +2413,7 @@ void RendererSceneCull::_frustum_cull(CullData &cull_data, FrustumCullResult &cu
 						cull_data.cull->lock.lock();
 						RSG::storage->particles_request_process(idata.base_rid);
 						cull_data.cull->lock.unlock();
-						RSG::storage->particles_set_view_axis(idata.base_rid, -cull_data.cam_transform.basis.get_axis(2).normalized());
+						RSG::storage->particles_set_view_axis(idata.base_rid, -cull_data.cam_transform.basis.get_axis(2).normalized(), cull_data.cam_transform.basis.get_axis(1).normalized());
 						//particles visible? request redraw
 						RenderingServerDefault::redraw_request();
 					}

+ 2 - 0
servers/rendering/renderer_scene_cull.h

@@ -443,6 +443,7 @@ public:
 					singleton->_instance_queue_update(instance, false, true);
 				} break;
 				case RendererStorage::DEPENDENCY_CHANGED_MESH:
+				case RendererStorage::DEPENDENCY_CHANGED_PARTICLES:
 				case RendererStorage::DEPENDENCY_CHANGED_MULTIMESH:
 				case RendererStorage::DEPENDENCY_CHANGED_DECAL:
 				case RendererStorage::DEPENDENCY_CHANGED_LIGHT:
@@ -661,6 +662,7 @@ public:
 
 		_FORCE_INLINE_ bool operator()(void *p_data) {
 			Instance *p_instance = (Instance *)p_data;
+
 			if (instance != p_instance && instance->transformed_aabb.intersects(p_instance->transformed_aabb) && (pair_mask & (1 << p_instance->base_type))) {
 				//test is more coarse in indexer
 				p_instance->pair_check = pair_pass;

+ 9 - 1
servers/rendering/renderer_storage.h

@@ -43,6 +43,7 @@ public:
 		DEPENDENCY_CHANGED_MESH,
 		DEPENDENCY_CHANGED_MULTIMESH,
 		DEPENDENCY_CHANGED_MULTIMESH_VISIBLE_INSTANCES,
+		DEPENDENCY_CHANGED_PARTICLES,
 		DEPENDENCY_CHANGED_DECAL,
 		DEPENDENCY_CHANGED_SKELETON_DATA,
 		DEPENDENCY_CHANGED_SKELETON_BONES,
@@ -498,8 +499,15 @@ public:
 	virtual void particles_set_use_local_coordinates(RID p_particles, bool p_enable) = 0;
 	virtual void particles_set_process_material(RID p_particles, RID p_material) = 0;
 	virtual void particles_set_fixed_fps(RID p_particles, int p_fps) = 0;
+	virtual void particles_set_interpolate(RID p_particles, bool p_enable) = 0;
 	virtual void particles_set_fractional_delta(RID p_particles, bool p_enable) = 0;
 	virtual void particles_set_collision_base_size(RID p_particles, float p_size) = 0;
+
+	virtual void particles_set_transform_align(RID p_particles, RS::ParticlesTransformAlign p_transform_align) = 0;
+
+	virtual void particles_set_trails(RID p_particles, bool p_enable, float p_length) = 0;
+	virtual void particles_set_trail_bind_poses(RID p_particles, const Vector<Transform> &p_bind_poses) = 0;
+
 	virtual void particles_restart(RID p_particles) = 0;
 	virtual void particles_emit(RID p_particles, const Transform &p_transform, const Vector3 &p_velocity, const Color &p_color, const Color &p_custom, uint32_t p_emit_flags) = 0;
 	virtual void particles_set_subemitter(RID p_particles, RID p_subemitter_particles) = 0;
@@ -520,7 +528,7 @@ public:
 	virtual int particles_get_draw_passes(RID p_particles) const = 0;
 	virtual RID particles_get_draw_pass_mesh(RID p_particles, int p_pass) const = 0;
 
-	virtual void particles_set_view_axis(RID p_particles, const Vector3 &p_axis) = 0;
+	virtual void particles_set_view_axis(RID p_particles, const Vector3 &p_axis, const Vector3 &p_up_axis) = 0;
 
 	virtual void particles_add_collision(RID p_particles, RID p_particles_collision_instance) = 0;
 	virtual void particles_remove_collision(RID p_particles, RID p_particles_collision_instance) = 0;

+ 6 - 0
servers/rendering/rendering_server_default.h

@@ -491,14 +491,20 @@ public:
 	FUNC2(particles_set_use_local_coordinates, RID, bool)
 	FUNC2(particles_set_process_material, RID, RID)
 	FUNC2(particles_set_fixed_fps, RID, int)
+	FUNC2(particles_set_interpolate, RID, bool)
 	FUNC2(particles_set_fractional_delta, RID, bool)
 	FUNC1R(bool, particles_is_inactive, RID)
+	FUNC3(particles_set_trails, RID, bool, float)
+	FUNC2(particles_set_trail_bind_poses, RID, const Vector<Transform> &)
+
 	FUNC1(particles_request_process, RID)
 	FUNC1(particles_restart, RID)
 	FUNC6(particles_emit, RID, const Transform &, const Vector3 &, const Color &, const Color &, uint32_t)
 	FUNC2(particles_set_subemitter, RID, RID)
 	FUNC2(particles_set_collision_base_size, RID, float)
 
+	FUNC2(particles_set_transform_align, RID, RS::ParticlesTransformAlign)
+
 	FUNC2(particles_set_draw_order, RID, RS::ParticlesDrawOrder)
 
 	FUNC2(particles_set_draw_passes, RID, int)

+ 2 - 1
servers/rendering/shader_types.cpp

@@ -219,6 +219,7 @@ ShaderTypes::ShaderTypes() {
 	shader_modes[RS::SHADER_SPATIAL].modes.push_back("shadow_to_opacity");
 
 	shader_modes[RS::SHADER_SPATIAL].modes.push_back("vertex_lighting");
+	shader_modes[RS::SHADER_SPATIAL].modes.push_back("particle_trails");
 
 	shader_modes[RS::SHADER_SPATIAL].modes.push_back("alpha_to_coverage");
 	shader_modes[RS::SHADER_SPATIAL].modes.push_back("alpha_to_coverage_and_one");
@@ -327,7 +328,7 @@ ShaderTypes::ShaderTypes() {
 	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["LIFETIME"] = constt(ShaderLanguage::TYPE_FLOAT);
 	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["DELTA"] = constt(ShaderLanguage::TYPE_FLOAT);
 	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["NUMBER"] = constt(ShaderLanguage::TYPE_UINT);
-	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["INDEX"] = constt(ShaderLanguage::TYPE_INT);
+	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["INDEX"] = constt(ShaderLanguage::TYPE_UINT);
 	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["EMISSION_TRANSFORM"] = constt(ShaderLanguage::TYPE_MAT4);
 	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["RANDOM_SEED"] = constt(ShaderLanguage::TYPE_UINT);
 	shader_modes[RS::SHADER_PARTICLES].functions["start"].built_ins["RESTART_POSITION"] = constt(ShaderLanguage::TYPE_BOOL);

+ 14 - 0
servers/rendering_server.h

@@ -631,8 +631,22 @@ public:
 	virtual void particles_set_use_local_coordinates(RID p_particles, bool p_enable) = 0;
 	virtual void particles_set_process_material(RID p_particles, RID p_material) = 0;
 	virtual void particles_set_fixed_fps(RID p_particles, int p_fps) = 0;
+	virtual void particles_set_interpolate(RID p_particles, bool p_enable) = 0;
 	virtual void particles_set_fractional_delta(RID p_particles, bool p_enable) = 0;
 	virtual void particles_set_collision_base_size(RID p_particles, float p_size) = 0;
+
+	enum ParticlesTransformAlign {
+		PARTICLES_TRANSFORM_ALIGN_DISABLED,
+		PARTICLES_TRANSFORM_ALIGN_Z_BILLBOARD,
+		PARTICLES_TRANSFORM_ALIGN_Y_TO_VELOCITY,
+		PARTICLES_TRANSFORM_ALIGN_Z_BILLBOARD_Y_TO_VELOCITY,
+	};
+
+	virtual void particles_set_transform_align(RID p_particles, ParticlesTransformAlign p_transform_align) = 0;
+
+	virtual void particles_set_trails(RID p_particles, bool p_enable, float p_length_sec) = 0;
+	virtual void particles_set_trail_bind_poses(RID p_particles, const Vector<Transform> &p_bind_poses) = 0;
+
 	virtual bool particles_is_inactive(RID p_particles) = 0;
 	virtual void particles_request_process(RID p_particles) = 0;
 	virtual void particles_restart(RID p_particles) = 0;