Browse Source

Merge pull request #99504 from LunaCapra/audio-metadata

Add metadata tags to WAV and OGG audio streams
Rémi Verschelde 4 months ago
parent
commit
8131883b16

+ 7 - 0
doc/classes/AudioStream.xml

@@ -51,6 +51,13 @@
 				Override this method to customize the name assigned to this audio stream. Unused by the engine.
 			</description>
 		</method>
+		<method name="_get_tags" qualifiers="virtual const">
+			<return type="Dictionary" />
+			<description>
+				Override this method to customize the tags for this audio stream. Should return a [Dictionary] of strings with the tag as the key and its content as the value.
+				Commonly used tags include [code]title[/code], [code]artist[/code], [code]album[/code], [code]tracknumber[/code], and [code]date[/code].
+			</description>
+		</method>
 		<method name="_has_loop" qualifiers="virtual const">
 			<return type="bool" />
 			<description>

+ 6 - 0
doc/classes/AudioStreamWAV.xml

@@ -79,6 +79,12 @@
 		<member name="stereo" type="bool" setter="set_stereo" getter="is_stereo" default="false">
 			If [code]true[/code], audio is stereo.
 		</member>
+		<member name="tags" type="Dictionary" setter="set_tags" getter="get_tags" default="{}">
+			Contains user-defined tags if found in the WAV data.
+			Commonly used tags include [code]title[/code], [code]artist[/code], [code]album[/code], [code]tracknumber[/code], and [code]date[/code] ([code]date[/code] does not have a standard date format).
+			[b]Note:[/b] No tag is [i]guaranteed[/i] to be present in every file, so make sure to account for the keys not always existing.
+			[b]Note:[/b] Only WAV files using a [code]LIST[/code] chunk with an identifier of [code]INFO[/code] to encode the tags are currently supported.
+		</member>
 	</members>
 	<constants>
 		<constant name="FORMAT_8_BITS" value="0" enum="Format">

+ 29 - 0
modules/vorbis/audio_stream_ogg_vorbis.cpp

@@ -456,6 +456,23 @@ void AudioStreamOggVorbis::maybe_update_info() {
 		ERR_FAIL_COND_MSG(err != 0, "Error parsing header packet " + itos(i) + ": " + itos(err));
 	}
 
+	Dictionary dictionary;
+	for (int i = 0; i < comment.comments; i++) {
+		String c = String::utf8(comment.user_comments[i]);
+		int equals = c.find_char('=');
+
+		if (equals == -1) {
+			WARN_PRINT("Invalid comment in Ogg Vorbis file.");
+			continue;
+		}
+
+		String tag = c.substr(0, equals);
+		String tag_value = c.substr(equals + 1);
+
+		dictionary[tag.to_lower()] = tag_value;
+	}
+	tags = dictionary;
+
 	packet_sequence->set_sampling_rate(info.rate);
 
 	vorbis_comment_clear(&comment);
@@ -524,6 +541,14 @@ int AudioStreamOggVorbis::get_bar_beats() const {
 	return bar_beats;
 }
 
+void AudioStreamOggVorbis::set_tags(const Dictionary &p_tags) {
+	tags = p_tags;
+}
+
+Dictionary AudioStreamOggVorbis::get_tags() const {
+	return tags;
+}
+
 bool AudioStreamOggVorbis::is_monophonic() const {
 	return false;
 }
@@ -692,10 +717,14 @@ void AudioStreamOggVorbis::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("set_bar_beats", "count"), &AudioStreamOggVorbis::set_bar_beats);
 	ClassDB::bind_method(D_METHOD("get_bar_beats"), &AudioStreamOggVorbis::get_bar_beats);
 
+	ClassDB::bind_method(D_METHOD("set_tags", "tags"), &AudioStreamOggVorbis::set_tags);
+	ClassDB::bind_method(D_METHOD("get_tags"), &AudioStreamOggVorbis::get_tags);
+
 	ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "packet_sequence", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR), "set_packet_sequence", "get_packet_sequence");
 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "bpm", PROPERTY_HINT_RANGE, "0,400,0.01,or_greater"), "set_bpm", "get_bpm");
 	ADD_PROPERTY(PropertyInfo(Variant::INT, "beat_count", PROPERTY_HINT_RANGE, "0,512,1,or_greater"), "set_beat_count", "get_beat_count");
 	ADD_PROPERTY(PropertyInfo(Variant::INT, "bar_beats", PROPERTY_HINT_RANGE, "2,32,1,or_greater"), "set_bar_beats", "get_bar_beats");
+	ADD_PROPERTY(PropertyInfo(Variant::DICTIONARY, "tags", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR), "set_tags", "get_tags");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "loop"), "set_loop", "has_loop");
 	ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "loop_offset"), "set_loop_offset", "get_loop_offset");
 }

+ 4 - 0
modules/vorbis/audio_stream_ogg_vorbis.h

@@ -133,6 +133,7 @@ class AudioStreamOggVorbis : public AudioStream {
 	double bpm = 0;
 	int beat_count = 0;
 	int bar_beats = 4;
+	Dictionary tags;
 
 protected:
 	static void _bind_methods();
@@ -156,6 +157,9 @@ public:
 	void set_bar_beats(int p_bar_beats);
 	virtual int get_bar_beats() const override;
 
+	void set_tags(const Dictionary &p_tags);
+	virtual Dictionary get_tags() const override;
+
 	virtual Ref<AudioStreamPlayback> instantiate_playback() override;
 	virtual String get_stream_name() const override;
 

+ 5 - 0
modules/vorbis/doc_classes/AudioStreamOggVorbis.xml

@@ -41,5 +41,10 @@
 		<member name="packet_sequence" type="OggPacketSequence" setter="set_packet_sequence" getter="get_packet_sequence">
 			Contains the raw Ogg data for this stream.
 		</member>
+		<member name="tags" type="Dictionary" setter="set_tags" getter="get_tags" default="{}">
+			Contains user-defined tags if found in the Ogg Vorbis data.
+			Commonly used tags include [code]title[/code], [code]artist[/code], [code]album[/code], [code]tracknumber[/code], and [code]date[/code] ([code]date[/code] does not have a standard date format).
+			[b]Note:[/b] No tag is [i]guaranteed[/i] to be present in every file, so make sure to account for the keys not always existing.
+		</member>
 	</members>
 </class>

+ 83 - 0
scene/resources/audio_stream_wav.cpp

@@ -477,6 +477,18 @@ bool AudioStreamWAV::is_stereo() const {
 	return stereo;
 }
 
+void AudioStreamWAV::set_tags(const Dictionary &p_tags) {
+	tags = p_tags;
+}
+
+Dictionary AudioStreamWAV::get_tags() const {
+	return tags;
+}
+
+HashMap<String, String>::ConstIterator AudioStreamWAV::remap_tag_id(const String &p_tag_id) {
+	return tag_id_remaps.find(p_tag_id);
+}
+
 double AudioStreamWAV::get_length() const {
 	int len = data_bytes;
 	switch (format) {
@@ -703,6 +715,8 @@ Ref<AudioStreamWAV> AudioStreamWAV::load_from_buffer(const Vector<uint8_t> &p_st
 
 	Vector<float> data;
 
+	HashMap<String, String> tag_map;
+
 	while (!file->eof_reached()) {
 		/* chunk */
 		char chunk_id[4];
@@ -858,6 +872,40 @@ Ref<AudioStreamWAV> AudioStreamWAV::load_from_buffer(const Vector<uint8_t> &p_st
 				loop_end = file->get_32();
 			}
 		}
+
+		if (chunk_id[0] == 'L' && chunk_id[1] == 'I' && chunk_id[2] == 'S' && chunk_id[3] == 'T') {
+			// RIFF 'LIST' chunk.
+			// See https://www.recordingblogs.com/wiki/list-chunk-of-a-wave-file
+
+			char list_id[4];
+			file->get_buffer((uint8_t *)&list_id, 4);
+
+			if (list_id[0] == 'I' && list_id[1] == 'N' && list_id[2] == 'F' && list_id[3] == 'O') {
+				// 'INFO' list type.
+				// The size of an entry can be arbitrary.
+				uint32_t end_of_chunk = file_pos + chunksize - 4;
+				while (file->get_position() < end_of_chunk) {
+					char info_id[4];
+					file->get_buffer((uint8_t *)&info_id, 4);
+
+					uint32_t text_size = file->get_32();
+
+					Vector<char> text;
+					text.resize(text_size);
+					file->get_buffer((uint8_t *)&text[0], text_size);
+
+					// The data is always an ASCII string. ASCII is a subset of UTF-8.
+					String tag;
+					tag.append_utf8(&info_id[0], 4);
+
+					String tag_value;
+					tag_value.append_utf8(&text[0], text_size);
+
+					tag_map[tag] = tag_value;
+				}
+			}
+		}
+
 		// Move to the start of the next chunk. Note that RIFF requires a padding byte for odd
 		// chunk sizes.
 		file->seek(file_pos + chunksize + (chunksize & 1));
@@ -1097,6 +1145,18 @@ Ref<AudioStreamWAV> AudioStreamWAV::load_from_buffer(const Vector<uint8_t> &p_st
 	sample->set_loop_begin(loop_begin);
 	sample->set_loop_end(loop_end);
 	sample->set_stereo(format_channels == 2);
+
+	Dictionary tag_dictionary;
+	for (const KeyValue<String, String> &E : tag_map) {
+		HashMap<String, String>::ConstIterator remap = sample->remap_tag_id(E.key);
+		if (remap) {
+			tag_map.replace_key(E.key, remap->value);
+		}
+
+		tag_dictionary[E.key] = E.value;
+	}
+	sample->set_tags(tag_dictionary);
+
 	return sample;
 }
 
@@ -1131,6 +1191,9 @@ void AudioStreamWAV::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("set_stereo", "stereo"), &AudioStreamWAV::set_stereo);
 	ClassDB::bind_method(D_METHOD("is_stereo"), &AudioStreamWAV::is_stereo);
 
+	ClassDB::bind_method(D_METHOD("set_tags", "tags"), &AudioStreamWAV::set_tags);
+	ClassDB::bind_method(D_METHOD("get_tags"), &AudioStreamWAV::get_tags);
+
 	ClassDB::bind_method(D_METHOD("save_to_wav", "path"), &AudioStreamWAV::save_to_wav);
 
 	ADD_PROPERTY(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "data", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR), "set_data", "get_data");
@@ -1140,6 +1203,7 @@ void AudioStreamWAV::_bind_methods() {
 	ADD_PROPERTY(PropertyInfo(Variant::INT, "loop_end"), "set_loop_end", "get_loop_end");
 	ADD_PROPERTY(PropertyInfo(Variant::INT, "mix_rate"), "set_mix_rate", "get_mix_rate");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "stereo"), "set_stereo", "is_stereo");
+	ADD_PROPERTY(PropertyInfo(Variant::DICTIONARY, "tags", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR), "set_tags", "get_tags");
 
 	BIND_ENUM_CONSTANT(FORMAT_8_BITS);
 	BIND_ENUM_CONSTANT(FORMAT_16_BITS);
@@ -1151,3 +1215,22 @@ void AudioStreamWAV::_bind_methods() {
 	BIND_ENUM_CONSTANT(LOOP_PINGPONG);
 	BIND_ENUM_CONSTANT(LOOP_BACKWARD);
 }
+
+AudioStreamWAV::AudioStreamWAV() {
+	// Used to make the metadata tags more unified across different AudioStreams.
+	// See https://www.recordingblogs.com/wiki/list-chunk-of-a-wave-file
+	tag_id_remaps["IARL"] = "location";
+	tag_id_remaps["IART"] = "artist";
+	tag_id_remaps["ICMS"] = "organization";
+	tag_id_remaps["ICMT"] = "comments";
+	tag_id_remaps["ICOP"] = "copyright";
+	tag_id_remaps["ICRD"] = "date";
+	tag_id_remaps["IGNR"] = "genre";
+	tag_id_remaps["IKEY"] = "keywords";
+	tag_id_remaps["IMED"] = "medium";
+	tag_id_remaps["INAM"] = "title";
+	tag_id_remaps["IPRD"] = "album";
+	tag_id_remaps["ISBJ"] = "description";
+	tag_id_remaps["ISFT"] = "software";
+	tag_id_remaps["ITRK"] = "tracknumber";
+}

+ 10 - 0
scene/resources/audio_stream_wav.h

@@ -124,6 +124,9 @@ private:
 	LocalVector<uint8_t> data;
 	uint32_t data_bytes = 0;
 
+	HashMap<String, String> tag_id_remaps;
+	Dictionary tags;
+
 protected:
 	static void _bind_methods();
 
@@ -149,6 +152,11 @@ public:
 	void set_stereo(bool p_enable);
 	bool is_stereo() const;
 
+	void set_tags(const Dictionary &p_tags);
+	virtual Dictionary get_tags() const override;
+
+	HashMap<String, String>::ConstIterator remap_tag_id(const String &p_tag_id);
+
 	virtual double get_length() const override; //if supported, otherwise return 0
 
 	virtual bool is_monophonic() const override;
@@ -284,6 +292,8 @@ public:
 			dst_ptr += qoa_encode_frame(data16.ptr(), p_desc, frame_len, dst_ptr);
 		}
 	}
+
+	AudioStreamWAV();
 };
 
 VARIANT_ENUM_CAST(AudioStreamWAV::Format)

+ 7 - 0
servers/audio/audio_stream.cpp

@@ -297,6 +297,12 @@ int AudioStream::get_beat_count() const {
 	return ret;
 }
 
+Dictionary AudioStream::get_tags() const {
+	Dictionary ret;
+	GDVIRTUAL_CALL(_get_tags, ret);
+	return ret;
+}
+
 void AudioStream::tag_used(float p_offset) {
 	if (tagged_frame != AudioServer::get_singleton()->get_mixed_frames()) {
 		offset_count = 0;
@@ -350,6 +356,7 @@ void AudioStream::_bind_methods() {
 	GDVIRTUAL_BIND(_is_monophonic);
 	GDVIRTUAL_BIND(_get_bpm)
 	GDVIRTUAL_BIND(_get_beat_count)
+	GDVIRTUAL_BIND(_get_tags);
 	GDVIRTUAL_BIND(_get_parameter_list)
 	GDVIRTUAL_BIND(_has_loop);
 	GDVIRTUAL_BIND(_get_bar_beats);

+ 2 - 0
servers/audio/audio_stream.h

@@ -178,6 +178,7 @@ protected:
 	GDVIRTUAL0RC(bool, _has_loop)
 	GDVIRTUAL0RC(int, _get_bar_beats)
 	GDVIRTUAL0RC(int, _get_beat_count)
+	GDVIRTUAL0RC(Dictionary, _get_tags);
 	GDVIRTUAL0RC(TypedArray<Dictionary>, _get_parameter_list)
 
 public:
@@ -188,6 +189,7 @@ public:
 	virtual bool has_loop() const;
 	virtual int get_bar_beats() const;
 	virtual int get_beat_count() const;
+	virtual Dictionary get_tags() const;
 
 	virtual double get_length() const;
 	virtual bool is_monophonic() const;