Browse Source

SIMD vectorized 4x4 pixel upscaling.

David Piuva 4 years ago
parent
commit
38ef749085
1 changed files with 56 additions and 2 deletions
  1. 56 2
      Source/DFPSR/image/draw.cpp

+ 56 - 2
Source/DFPSR/image/draw.cpp

@@ -1199,6 +1199,54 @@ static void blockMagnify_2x2(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& sou
 	#endif
 }
 
+// Pre-condition:
+//   * The source and target images have the same pack order
+//   * Both source and target are 16-byte aligned, but does not have to own their padding
+//   * clipWidth % 4 == 0
+//   * clipHeight % 4 == 0
+static void blockMagnify_4x4(ImageRgbaU8Impl& target, const ImageRgbaU8Impl& source, int clipWidth, int clipHeight) {
+	const SafePointer<uint32_t> sourceRow = imageInternal::getSafeData<uint32_t>(source);
+	SafePointer<uint32_t> targetRowA = imageInternal::getSafeData<uint32_t>(target, 0);
+	SafePointer<uint32_t> targetRowB = imageInternal::getSafeData<uint32_t>(target, 1);
+	SafePointer<uint32_t> targetRowC = imageInternal::getSafeData<uint32_t>(target, 2);
+	SafePointer<uint32_t> targetRowD = imageInternal::getSafeData<uint32_t>(target, 3);
+	int quadTargetStride = target.stride * 4;
+	for (int upperTargetY = 0; upperTargetY + 4 <= clipHeight; upperTargetY+=4) {
+		// Carriage return
+		const SafePointer<uint32_t> sourcePixel = sourceRow;
+		SafePointer<uint32_t> targetPixelA = targetRowA;
+		SafePointer<uint32_t> targetPixelB = targetRowB;
+		SafePointer<uint32_t> targetPixelC = targetRowC;
+		SafePointer<uint32_t> targetPixelD = targetRowD;
+		// Write to whole multiples of 8 pixels
+		int writeLeftX = 0;
+		while (writeLeftX + 4 <= clipWidth) {
+			// Read one pixel at a time
+			uint32_t scalarValue = *sourcePixel;
+			sourcePixel += 1;
+			// Convert scalar to SIMD vector of 4 repeated pixels
+			ALIGN16 U32x4 sourcePixels = U32x4(scalarValue);
+			// Write to 4x4 pixels using 4 SIMD writes
+			sourcePixels.writeAligned(targetPixelA, "blockMagnify_4x4 @ write A");
+			sourcePixels.writeAligned(targetPixelB, "blockMagnify_4x4 @ write B");
+			sourcePixels.writeAligned(targetPixelC, "blockMagnify_4x4 @ write C");
+			sourcePixels.writeAligned(targetPixelD, "blockMagnify_4x4 @ write D");
+			targetPixelA += 4;
+			targetPixelB += 4;
+			targetPixelC += 4;
+			targetPixelD += 4;
+			// Count
+			writeLeftX += 4;
+		}
+		// Line feed
+		sourceRow.increaseBytes(source.stride);
+		targetRowA.increaseBytes(quadTargetStride);
+		targetRowB.increaseBytes(quadTargetStride);
+		targetRowC.increaseBytes(quadTargetStride);
+		targetRowD.increaseBytes(quadTargetStride);
+	}
+}
+
 static void blackEdges(ImageRgbaU8Impl& target, int excludedWidth, int excludedHeight) {
 	// Right side
 	drawSolidRectangleMemset<Color4xU8>(target, excludedWidth, 0, target.width, excludedHeight, 0);
@@ -1214,8 +1262,14 @@ void dsr::imageImpl_blockMagnify(ImageRgbaU8Impl& target, const ImageRgbaU8Impl&
 	int clipWidth = roundDown(std::min(target.width, source.width * pixelWidth), pixelWidth);
 	int clipHeight = roundDown(std::min(target.height, source.height * pixelHeight), pixelHeight);
 	if (sameOrder) {
-		if (imageIs16ByteAligned(source) && imageIs16ByteAligned(target) && pixelWidth == 2 && pixelHeight == 2) {
-			blockMagnify_2x2(target, source, clipWidth, clipHeight);
+		if (imageIs16ByteAligned(source) && imageIs16ByteAligned(target)) {
+			if (pixelWidth == 2 && pixelHeight == 2) {
+				blockMagnify_2x2(target, source, clipWidth, clipHeight);
+			} else if (pixelWidth == 4 && pixelHeight == 4) {
+				blockMagnify_4x4(target, source, clipWidth, clipHeight);
+			} else {
+				blockMagnify_reference<false>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
+			}
 		} else {
 			blockMagnify_reference<false>(target, source, pixelWidth, pixelHeight, clipWidth, clipHeight);
 		}