2 rokov pred · 955532c4ba
--- a/Source/DFPSR/render/shader/Shader.cpp
+++ b/Source/DFPSR/render/shader/Shader.cpp
@@ -136,17 +136,17 @@ inline static void fillQuadSuper(const Shader& shader, int x, SafePointer<uint32
 
				 	if (vis0 || vis1 || vis2 || vis3) {

			
 
				 		if (COLOR_WRITE) {

			
 
				 			// Get the color

			
 
				-			ALIGN16 U32x4 packedColor(0u); // Allow uninitialized memory?

			
 
				+			U32x4 packedColor(0u); // Allow uninitialized memory?

			
 
				 			// Execute the shader

			
 
				-			ALIGN16 Rgba_F32 planarSourceColor = shader.getPixels_2x2(weights);

			
 
				+			Rgba_F32 planarSourceColor = shader.getPixels_2x2(weights);

			
 
				 			// Apply alpha filtering

			
 
				 			if (FILTER == Filter::Alpha) {

			
 
				 				// Get opacity from the source color

			
 
				-				ALIGN16 F32x4 opacity = planarSourceColor.alpha * (1.0f / 255.0f);

			
 
				+				F32x4 opacity = planarSourceColor.alpha * (1.0f / 255.0f);

			
 
				 				// Read the packed colors for alpha blending

			
 
				-				ALIGN16 U32x4 packedTargetColor = clippedRead<CLIP_SIDES>(pixelDataUpper, pixelDataLower, vis0, vis1, vis2, vis3);

			
 
				+				U32x4 packedTargetColor = clippedRead<CLIP_SIDES>(pixelDataUpper, pixelDataLower, vis0, vis1, vis2, vis3);

			
 
				 				// Unpack the target color into planar RGBA format so that it can be mixed with the source color

			
 
				-				ALIGN16 Rgba_F32 planarTargetColor(packedTargetColor, targetPackingOrder);

			
 
				+				Rgba_F32 planarTargetColor(packedTargetColor, targetPackingOrder);

			
 
				 				// Blend linearly using floats

			
 
				 				planarSourceColor = (planarSourceColor * opacity) + (planarTargetColor * (1.0f - opacity));

			
 
				 			}

			
@@ -172,15 +172,15 @@ template<bool CLIP_SIDES, bool COLOR_WRITE, bool DEPTH_READ, bool DEPTH_WRITE, F
 
				 static inline void fillRowSuper(const Shader& shader, SafePointer<uint32_t> pixelDataUpper, SafePointer<uint32_t> pixelDataLower, SafePointer<float> depthDataUpper, SafePointer<float> depthDataLower, FVector3D pWeightUpper, FVector3D pWeightLower, const FVector3D &pWeightDx, int startX, int endX, const RowInterval &upperRow, const RowInterval &lowerRow, const PackOrder &targetPackingOrder) {

			
 
				 	if (AFFINE) {

			
 
				 		FVector3D dx2 = pWeightDx * 2.0f;

			
 
				-		ALIGN16 F32x4 vLinearDepth(pWeightUpper.x, pWeightUpper.x + pWeightDx.x, pWeightLower.x, pWeightLower.x + pWeightDx.x);

			
 
				-		ALIGN16 F32x4 weightB(pWeightUpper.y, pWeightUpper.y + pWeightDx.y, pWeightLower.y, pWeightLower.y + pWeightDx.y);

			
 
				-		ALIGN16 F32x4 weightC(pWeightUpper.z, pWeightUpper.z + pWeightDx.z, pWeightLower.z, pWeightLower.z + pWeightDx.z);

			
 
				+		F32x4 vLinearDepth(pWeightUpper.x, pWeightUpper.x + pWeightDx.x, pWeightLower.x, pWeightLower.x + pWeightDx.x);

			
 
				+		F32x4 weightB(pWeightUpper.y, pWeightUpper.y + pWeightDx.y, pWeightLower.y, pWeightLower.y + pWeightDx.y);

			
 
				+		F32x4 weightC(pWeightUpper.z, pWeightUpper.z + pWeightDx.z, pWeightLower.z, pWeightLower.z + pWeightDx.z);

			
 
				 		for (int x = startX; x < endX; x += 2) {

			
 
				 			// Get the linear depth

			
 
				 			FVector4D depth = vLinearDepth.get();

			
 
				 			// Calculate the weight of the first vertex from the other two

			
 
				-			ALIGN16 F32x4 weightA = 1.0f - (weightB + weightC);

			
 
				-			ALIGN16 F32x4x3 weights(weightA, weightB, weightC);

			
 
				+			F32x4 weightA = 1.0f - (weightB + weightC);

			
 
				+			F32x4x3 weights(weightA, weightB, weightC);

			
 
				 			fillQuadSuper<CLIP_SIDES, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>(shader, x, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, upperRow, lowerRow, targetPackingOrder, depth, weights);

			
 
				 			// Iterate projection

			
 
				 			vLinearDepth = vLinearDepth + dx2.x;

			
@@ -192,21 +192,21 @@ static inline void fillRowSuper(const Shader& shader, SafePointer<uint32_t> pixe
 
				 		}

			
 
				 	} else {

			
 
				 		FVector3D dx2 = pWeightDx * 2.0f;

			
 
				-		ALIGN16 F32x4 vRecDepth(pWeightUpper.x, pWeightUpper.x + pWeightDx.x, pWeightLower.x, pWeightLower.x + pWeightDx.x);

			
 
				-		ALIGN16 F32x4 vRecU(pWeightUpper.y, pWeightUpper.y + pWeightDx.y, pWeightLower.y, pWeightLower.y + pWeightDx.y);

			
 
				-		ALIGN16 F32x4 vRecV(pWeightUpper.z, pWeightUpper.z + pWeightDx.z, pWeightLower.z, pWeightLower.z + pWeightDx.z);

			
 
				+		F32x4 vRecDepth(pWeightUpper.x, pWeightUpper.x + pWeightDx.x, pWeightLower.x, pWeightLower.x + pWeightDx.x);

			
 
				+		F32x4 vRecU(pWeightUpper.y, pWeightUpper.y + pWeightDx.y, pWeightLower.y, pWeightLower.y + pWeightDx.y);

			
 
				+		F32x4 vRecV(pWeightUpper.z, pWeightUpper.z + pWeightDx.z, pWeightLower.z, pWeightLower.z + pWeightDx.z);

			
 
				 		for (int x = startX; x < endX; x += 2) {

			
 
				 			// Get the reciprocal depth

			
 
				 			FVector4D depth = vRecDepth.get();

			
 
				 			// After linearly interpolating (1 / W, U / W, V / W) based on the affine weights...

			
 
				 			// Divide 1 by 1 / W to get the linear depth W

			
 
				-			ALIGN16 F32x4 vLinearDepth = vRecDepth.reciprocal();

			
 
				+			F32x4 vLinearDepth = vRecDepth.reciprocal();

			
 
				 			// Multiply the vertex weights to the second and third edges with the depth to compensate for that we divided them by depth before interpolating.

			
 
				-			ALIGN16 F32x4 weightB = vRecU * vLinearDepth;

			
 
				-			ALIGN16 F32x4 weightC = vRecV * vLinearDepth;

			
 
				+			F32x4 weightB = vRecU * vLinearDepth;

			
 
				+			F32x4 weightC = vRecV * vLinearDepth;

			
 
				 			// Calculate the weight of the first vertex from the other two

			
 
				-			ALIGN16 F32x4 weightA = 1.0f - (weightB + weightC);

			
 
				-			ALIGN16 F32x4x3 weights(weightA, weightB, weightC);

			
 
				+			F32x4 weightA = 1.0f - (weightB + weightC);

			
 
				+			F32x4x3 weights(weightA, weightB, weightC);

			
 
				 			fillQuadSuper<CLIP_SIDES, COLOR_WRITE, DEPTH_READ, DEPTH_WRITE, FILTER, AFFINE>(shader, x, pixelDataUpper, pixelDataLower, depthDataUpper, depthDataLower, upperRow, lowerRow, targetPackingOrder, depth, weights);

			
 
				 			// Iterate projection

			
 
				 			vRecDepth = vRecDepth + dx2.x;