Quellcode durchsuchen

Add H265RtpDepacketizer

This commit adds an H265 depacketizer which takes incoming H265 RTP packets and emits H265 access units. It is closely based on the `H264RtpDepacketizer` added by @Sean-Der in https://github.com/paullouisageneau/libdatachannel/pull/1082.

I originally started with a version of this commit that was closer to the `H264RtpDepacketizer` and which emitted individual H265 NALUs in `H265RtpDepacketizer::buildFrames()`. This resulted in calling my `Track::onFrame()` callback for each NALU, which did not work well with the decoder that I'm using which wants to see the VPS/SPS/PPS NALUs as a unit before initializing the decoder (https://intel.github.io/libvpl/v2.10/API_ref/VPL_func_vid_decode.html#mfxvideodecode-decodeheader).

So for the `H265RtpDepacketizer` I've tried to make it emit access units rather than NALUs. An "access unit" is (RFC 7798):

> A set of NAL units that are associated with each other according to a specified classification rule, that are consecutive in decoding order, *and that contain exactly one coded picture.*

"Exactly one coded picture" seems to correspond with what a caller might expect an "onFrame" callback to do. Maybe the `H264RtpDepacketizer` should be revised to similarly emit H264 access units rather than NALUs, too. At least, I could not find a way to receive individual NALUs from the depacketizer and run the VPL decoder without needing to do my own buffering/copying of the NALUs.

With this commit I can now do the following:

* Generate encoded bitstream output from the Intel VPL encoder.
* Pass the output of the encoder one frame at a time to libdatachannel's `Track::send()` on a track with an `H265RtpPacketizer` media handler.
* Transport the video track over a WebRTC connection to a libdatachannel peer.
* Depacketize it with the `H265RtpDepacketizer` media handler in this commit.
* Pass the depacketized output via my `Track::onFrame()` callback to the Intel VPL decoder in "complete frame" mode (https://intel.github.io/libvpl/v2.10/API_ref/VPL_enums.html#_CPPv428MFX_BITSTREAM_COMPLETE_FRAME). Each "onFrame" callback corresponds to a single call to the decoder API to decode a frame.
Robert Edmonds vor 1 Jahr
Ursprung
Commit
d5350373ab
5 geänderte Dateien mit 191 neuen und 1 gelöschten Zeilen
  1. 2 0
      CMakeLists.txt
  2. 44 0
      include/rtc/h265rtpdepacketizer.hpp
  3. 1 0
      include/rtc/rtc.hpp
  4. 1 1
      src/h265nalunit.cpp
  5. 143 0
      src/h265rtpdepacketizer.cpp

+ 2 - 0
CMakeLists.txt

@@ -82,6 +82,7 @@ set(LIBDATACHANNEL_SOURCES
 	${CMAKE_CURRENT_SOURCE_DIR}/src/h264rtpdepacketizer.cpp
 	${CMAKE_CURRENT_SOURCE_DIR}/src/nalunit.cpp
 	${CMAKE_CURRENT_SOURCE_DIR}/src/h265rtppacketizer.cpp
+	${CMAKE_CURRENT_SOURCE_DIR}/src/h265rtpdepacketizer.cpp
 	${CMAKE_CURRENT_SOURCE_DIR}/src/h265nalunit.cpp
 	${CMAKE_CURRENT_SOURCE_DIR}/src/av1rtppacketizer.cpp
 	${CMAKE_CURRENT_SOURCE_DIR}/src/rtcpnackresponder.cpp
@@ -120,6 +121,7 @@ set(LIBDATACHANNEL_HEADERS
 	${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/h264rtpdepacketizer.hpp
 	${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/nalunit.hpp
 	${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/h265rtppacketizer.hpp
+	${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/h265rtpdepacketizer.hpp
 	${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/h265nalunit.hpp
 	${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/av1rtppacketizer.hpp
 	${CMAKE_CURRENT_SOURCE_DIR}/include/rtc/rtcpnackresponder.hpp

+ 44 - 0
include/rtc/h265rtpdepacketizer.hpp

@@ -0,0 +1,44 @@
+/**
+ * Copyright (c) 2020 Staz Modrzynski
+ * Copyright (c) 2020-2024 Paul-Louis Ageneau
+ * Copyright (c) 2024 Robert Edmonds
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef RTC_H265_RTP_DEPACKETIZER_H
+#define RTC_H265_RTP_DEPACKETIZER_H
+
+#if RTC_ENABLE_MEDIA
+
+#include "common.hpp"
+#include "mediahandler.hpp"
+#include "message.hpp"
+#include "rtp.hpp"
+
+#include <iterator>
+
+namespace rtc {
+
+/// RTP depacketization for H265
+class RTC_CPP_EXPORT H265RtpDepacketizer : public MediaHandler {
+public:
+	H265RtpDepacketizer() = default;
+	virtual ~H265RtpDepacketizer() = default;
+
+	void incoming(message_vector &messages, const message_callback &send) override;
+
+private:
+	std::vector<message_ptr> mRtpBuffer;
+
+	message_vector buildFrames(message_vector::iterator firstPkt, message_vector::iterator lastPkt,
+	                           uint32_t timestamp);
+};
+
+} // namespace rtc
+
+#endif // RTC_ENABLE_MEDIA
+
+#endif // RTC_H265_RTP_DEPACKETIZER_H

+ 1 - 0
include/rtc/rtc.hpp

@@ -32,6 +32,7 @@
 #include "h264rtppacketizer.hpp"
 #include "h264rtpdepacketizer.hpp"
 #include "h265rtppacketizer.hpp"
+#include "h265rtpdepacketizer.hpp"
 #include "mediahandler.hpp"
 #include "plihandler.hpp"
 #include "rembhandler.hpp"

+ 1 - 1
src/h265nalunit.cpp

@@ -34,7 +34,7 @@ H265NalUnitFragment::fragmentsFrom(shared_ptr<H265NalUnit> nalu, uint16_t maxFra
 	auto fragments_count = ceil(double(nalu->size()) / maxFragmentSize);
 	maxFragmentSize = uint16_t(int(ceil(nalu->size() / fragments_count)));
 
-	// 3 bytes for FU indicator and FU header
+	// 3 bytes for NALU header and FU header
 	maxFragmentSize -= (H265_NAL_HEADER_SIZE + H265_FU_HEADER_SIZE);
 	auto f = nalu->forbiddenBit();
 	uint8_t nuhLayerId = nalu->nuhLayerId() & 0x3F;        // 6 bits

+ 143 - 0
src/h265rtpdepacketizer.cpp

@@ -0,0 +1,143 @@
+/**
+ * Copyright (c) 2023-2024 Paul-Louis Ageneau
+ * Copyright (c) 2024 Robert Edmonds
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/.
+ */
+
+#if RTC_ENABLE_MEDIA
+
+#include "h265rtpdepacketizer.hpp"
+#include "h265nalunit.hpp"
+
+#include "impl/internals.hpp"
+
+namespace rtc {
+
+const binary naluStartCode = {byte{0}, byte{0}, byte{0}, byte{1}};
+
+const uint8_t naluTypeAP = 48;
+const uint8_t naluTypeFU = 49;
+
+message_vector H265RtpDepacketizer::buildFrames(message_vector::iterator begin,
+                                                message_vector::iterator end, uint32_t timestamp) {
+	message_vector out = {};
+	auto accessUnit = binary{};
+	auto frameInfo = std::make_shared<FrameInfo>(timestamp);
+	auto nFrags = 0;
+
+	for (auto it = begin; it != end; ++it) {
+		auto pkt = it->get();
+		auto pktParsed = reinterpret_cast<const rtc::RtpHeader *>(pkt->data());
+		auto rtpHeaderSize = pktParsed->getSize() + pktParsed->getExtensionHeaderSize();
+		auto nalUnitHeader =
+		    H265NalUnitHeader{std::to_integer<uint8_t>(pkt->at(rtpHeaderSize)),
+		                      std::to_integer<uint8_t>(pkt->at(rtpHeaderSize + 1))};
+
+		if (nalUnitHeader.unitType() == naluTypeFU) {
+			auto nalUnitFragmentHeader = H265NalUnitFragmentHeader{
+			    std::to_integer<uint8_t>(pkt->at(rtpHeaderSize + sizeof(H265NalUnitHeader)))};
+
+			if (nFrags++ == 0) {
+				accessUnit.insert(accessUnit.end(), naluStartCode.begin(), naluStartCode.end());
+
+				nalUnitHeader.setUnitType(nalUnitFragmentHeader.unitType());
+				accessUnit.emplace_back(byte(nalUnitHeader._first));
+				accessUnit.emplace_back(byte(nalUnitHeader._second));
+			}
+
+			accessUnit.insert(accessUnit.end(),
+			                  pkt->begin() + rtpHeaderSize + sizeof(H265NalUnitHeader) +
+			                      sizeof(H265NalUnitFragmentHeader),
+			                  pkt->end());
+		} else if (nalUnitHeader.unitType() == naluTypeAP) {
+			auto currOffset = rtpHeaderSize + sizeof(H265NalUnitHeader);
+
+			while (currOffset + sizeof(uint16_t) < pkt->size()) {
+				auto naluSize = std::to_integer<uint16_t>(pkt->at(currOffset)) << 8 |
+				                std::to_integer<uint16_t>(pkt->at(currOffset + 1));
+
+				currOffset += sizeof(uint16_t);
+
+				if (pkt->size() < currOffset + naluSize) {
+					throw std::runtime_error("H265 AP declared size is larger than buffer");
+				}
+
+				accessUnit.insert(accessUnit.end(), naluStartCode.begin(), naluStartCode.end());
+
+				accessUnit.insert(accessUnit.end(), pkt->begin() + currOffset,
+				                  pkt->begin() + currOffset + naluSize);
+
+				currOffset += naluSize;
+			}
+		} else if (nalUnitHeader.unitType() < naluTypeAP) {
+			// "NAL units with NAL unit type values in the range of 0 to 47, inclusive, may be
+			// passed to the decoder."
+			accessUnit.insert(accessUnit.end(), naluStartCode.begin(), naluStartCode.end());
+			accessUnit.insert(accessUnit.end(), pkt->begin() + rtpHeaderSize, pkt->end());
+		} else {
+			// "NAL-unit-like structures with NAL unit type values in the range of 48 to 63,
+			// inclusive, MUST NOT be passed to the decoder."
+		}
+	}
+
+	if (!accessUnit.empty()) {
+		out.emplace_back(make_message(accessUnit.begin(), accessUnit.end(), Message::Binary, 0,
+		                              nullptr, frameInfo));
+	}
+
+	return out;
+}
+
+void H265RtpDepacketizer::incoming(message_vector &messages, const message_callback &) {
+	messages.erase(std::remove_if(messages.begin(), messages.end(),
+	                              [&](message_ptr message) {
+		                              if (message->type == Message::Control) {
+			                              return false;
+		                              }
+
+		                              if (message->size() < sizeof(RtpHeader)) {
+			                              PLOG_VERBOSE << "RTP packet is too small, size="
+			                                           << message->size();
+			                              return true;
+		                              }
+
+		                              mRtpBuffer.push_back(std::move(message));
+		                              return true;
+	                              }),
+	               messages.end());
+
+	while (mRtpBuffer.size() != 0) {
+		uint32_t current_timestamp = 0;
+		size_t packets_in_timestamp = 0;
+
+		for (const auto &pkt : mRtpBuffer) {
+			auto p = reinterpret_cast<const rtc::RtpHeader *>(pkt->data());
+
+			if (current_timestamp == 0) {
+				current_timestamp = p->timestamp();
+			} else if (current_timestamp != p->timestamp()) {
+				break;
+			}
+
+			packets_in_timestamp++;
+		}
+
+		if (packets_in_timestamp == mRtpBuffer.size()) {
+			break;
+		}
+
+		auto begin = mRtpBuffer.begin();
+		auto end = mRtpBuffer.begin() + (packets_in_timestamp - 1);
+
+		auto frames = buildFrames(begin, end + 1, current_timestamp);
+		messages.insert(messages.end(), frames.begin(), frames.end());
+		mRtpBuffer.erase(mRtpBuffer.begin(), mRtpBuffer.begin() + packets_in_timestamp);
+	}
+}
+
+} // namespace rtc
+
+#endif // RTC_ENABLE_MEDIA