Forráskód Böngészése

Renaming float4_t to simd128_t.

Branimir Karadžić 9 éve
szülő
commit
ce878f9180
2 módosított fájl, 116 hozzáadás és 116 törlés
  1. 38 38
      examples/14-shadowvolumes/shadowvolumes.cpp
  2. 78 78
      src/image.cpp

+ 38 - 38
examples/14-shadowvolumes/shadowvolumes.cpp

@@ -23,7 +23,7 @@ using namespace std::tr1;
 #include <bx/timer.h>
 #include <bx/allocator.h>
 #include <bx/hash.h>
-#include <bx/float4_t.h>
+#include <bx/simd_t.h>
 #include <bx/fpumath.h>
 #include <bx/crtimpl.h>
 #include "entry/entry.h"
@@ -1513,9 +1513,9 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume
 
 			using namespace bx;
 
-			const float4_t lx = float4_splat(_light[0]);
-			const float4_t ly = float4_splat(_light[1]);
-			const float4_t lz = float4_splat(_light[2]);
+			const simd128_t lx = simd_splat(_light[0]);
+			const simd128_t ly = simd_splat(_light[1]);
+			const simd128_t lz = simd_splat(_light[2]);
 
 			for (; ii < numEdgesRounded; ii+=2)
 			{
@@ -1524,47 +1524,47 @@ void shadowVolumeCreate(ShadowVolume& _shadowVolume
 				const Plane* edgePlane0 = &edgePlanes[ii*2];
 				const Plane* edgePlane1 = &edgePlanes[ii*2 + 2];
 
-				const float4_t reverse =
-					float4_ild(edge0.m_faceReverseOrder[0]
+				const simd128_t reverse =
+					simd_ild(edge0.m_faceReverseOrder[0]
 							, edge1.m_faceReverseOrder[0]
 							, edge0.m_faceReverseOrder[1]
 							, edge1.m_faceReverseOrder[1]
 							);
 
-				const float4_t p00 = float4_ld(edgePlane0[0].m_plane);
-				const float4_t p10 = float4_ld(edgePlane1[0].m_plane);
-				const float4_t p01 = float4_ld(edgePlane0[1].m_plane);
-				const float4_t p11 = float4_ld(edgePlane1[1].m_plane);
-
-				const float4_t xxyy0 = float4_shuf_xAyB(p00, p01);
-				const float4_t zzww0 = float4_shuf_zCwD(p00, p01);
-				const float4_t xxyy1 = float4_shuf_xAyB(p10, p11);
-				const float4_t zzww1 = float4_shuf_zCwD(p10, p11);
-
-				const float4_t vX = float4_shuf_xAyB(xxyy0, xxyy1);
-				const float4_t vY = float4_shuf_zCwD(xxyy0, xxyy1);
-				const float4_t vZ = float4_shuf_xAyB(zzww0, zzww1);
-				const float4_t vW = float4_shuf_zCwD(zzww0, zzww1);
-
-				const float4_t r0 = float4_mul(vX, lx);
-				const float4_t r1 = float4_mul(vY, ly);
-				const float4_t r2 = float4_mul(vZ, lz);
-
-				const float4_t dot = float4_add(r0, float4_add(r1, r2) );
-				const float4_t f = float4_add(dot, vW);
-
-				const float4_t zero = float4_zero();
-				const float4_t mask = float4_cmpgt(f, zero);
-				const float4_t onef = float4_splat(1.0f);
-				const float4_t tmp0 = float4_and(mask, onef);
-				const float4_t tmp1 = float4_ftoi(tmp0);
-				const float4_t tmp2 = float4_xor(tmp1, reverse);
-				const float4_t tmp3 = float4_sll(tmp2, 1);
-				const float4_t onei = float4_isplat(1);
-				const float4_t tmp4 = float4_isub(tmp3, onei);
+				const simd128_t p00 = simd_ld(edgePlane0[0].m_plane);
+				const simd128_t p10 = simd_ld(edgePlane1[0].m_plane);
+				const simd128_t p01 = simd_ld(edgePlane0[1].m_plane);
+				const simd128_t p11 = simd_ld(edgePlane1[1].m_plane);
+
+				const simd128_t xxyy0 = simd_shuf_xAyB(p00, p01);
+				const simd128_t zzww0 = simd_shuf_zCwD(p00, p01);
+				const simd128_t xxyy1 = simd_shuf_xAyB(p10, p11);
+				const simd128_t zzww1 = simd_shuf_zCwD(p10, p11);
+
+				const simd128_t vX = simd_shuf_xAyB(xxyy0, xxyy1);
+				const simd128_t vY = simd_shuf_zCwD(xxyy0, xxyy1);
+				const simd128_t vZ = simd_shuf_xAyB(zzww0, zzww1);
+				const simd128_t vW = simd_shuf_zCwD(zzww0, zzww1);
+
+				const simd128_t r0 = simd_mul(vX, lx);
+				const simd128_t r1 = simd_mul(vY, ly);
+				const simd128_t r2 = simd_mul(vZ, lz);
+
+				const simd128_t dot = simd_add(r0, simd_add(r1, r2) );
+				const simd128_t f = simd_add(dot, vW);
+
+				const simd128_t zero = simd_zero();
+				const simd128_t mask = simd_cmpgt(f, zero);
+				const simd128_t onef = simd_splat(1.0f);
+				const simd128_t tmp0 = simd_and(mask, onef);
+				const simd128_t tmp1 = simd_ftoi(tmp0);
+				const simd128_t tmp2 = simd_xor(tmp1, reverse);
+				const simd128_t tmp3 = simd_sll(tmp2, 1);
+				const simd128_t onei = simd_isplat(1);
+				const simd128_t tmp4 = simd_isub(tmp3, onei);
 
 				BX_ALIGN_DECL_16(int32_t res[4]);
-				float4_st(&res, tmp4);
+				simd_st(&res, tmp4);
 
 				for (uint16_t jj = 0; jj < 2; ++jj)
 				{

+ 78 - 78
src/image.cpp

@@ -387,68 +387,68 @@ namespace bgfx
 		const uint8_t* src = (const uint8_t*)_src;
 
 		using namespace bx;
-		const float4_t unpack = float4_ld(1.0f, 1.0f/256.0f, 1.0f/65536.0f, 1.0f/16777216.0f);
-		const float4_t pack   = float4_ld(1.0f, 256.0f*0.5f, 65536.0f, 16777216.0f*0.5f);
-		const float4_t umask  = float4_ild(0xff, 0xff00, 0xff0000, 0xff000000);
-		const float4_t pmask  = float4_ild(0xff, 0x7f80, 0xff0000, 0x7f800000);
-		const float4_t wflip  = float4_ild(0, 0, 0, 0x80000000);
-		const float4_t wadd   = float4_ld(0.0f, 0.0f, 0.0f, 32768.0f*65536.0f);
-		const float4_t gamma  = float4_ld(1.0f/2.2f, 1.0f/2.2f, 1.0f/2.2f, 1.0f);
-		const float4_t linear = float4_ld(2.2f, 2.2f, 2.2f, 1.0f);
-		const float4_t quater = float4_splat(0.25f);
+		const simd128_t unpack = simd_ld(1.0f, 1.0f/256.0f, 1.0f/65536.0f, 1.0f/16777216.0f);
+		const simd128_t pack   = simd_ld(1.0f, 256.0f*0.5f, 65536.0f, 16777216.0f*0.5f);
+		const simd128_t umask  = simd_ild(0xff, 0xff00, 0xff0000, 0xff000000);
+		const simd128_t pmask  = simd_ild(0xff, 0x7f80, 0xff0000, 0x7f800000);
+		const simd128_t wflip  = simd_ild(0, 0, 0, 0x80000000);
+		const simd128_t wadd   = simd_ld(0.0f, 0.0f, 0.0f, 32768.0f*65536.0f);
+		const simd128_t gamma  = simd_ld(1.0f/2.2f, 1.0f/2.2f, 1.0f/2.2f, 1.0f);
+		const simd128_t linear = simd_ld(2.2f, 2.2f, 2.2f, 1.0f);
+		const simd128_t quater = simd_splat(0.25f);
 
 		for (uint32_t yy = 0, ystep = _pitch*2; yy < dstheight; ++yy, src += ystep)
 		{
 			const uint8_t* rgba = src;
 			for (uint32_t xx = 0; xx < dstwidth; ++xx, rgba += 8, dst += 4)
 			{
-				const float4_t abgr0  = float4_splat(rgba);
-				const float4_t abgr1  = float4_splat(rgba+4);
-				const float4_t abgr2  = float4_splat(rgba+_pitch);
-				const float4_t abgr3  = float4_splat(rgba+_pitch+4);
-
-				const float4_t abgr0m = float4_and(abgr0, umask);
-				const float4_t abgr1m = float4_and(abgr1, umask);
-				const float4_t abgr2m = float4_and(abgr2, umask);
-				const float4_t abgr3m = float4_and(abgr3, umask);
-				const float4_t abgr0x = float4_xor(abgr0m, wflip);
-				const float4_t abgr1x = float4_xor(abgr1m, wflip);
-				const float4_t abgr2x = float4_xor(abgr2m, wflip);
-				const float4_t abgr3x = float4_xor(abgr3m, wflip);
-				const float4_t abgr0f = float4_itof(abgr0x);
-				const float4_t abgr1f = float4_itof(abgr1x);
-				const float4_t abgr2f = float4_itof(abgr2x);
-				const float4_t abgr3f = float4_itof(abgr3x);
-				const float4_t abgr0c = float4_add(abgr0f, wadd);
-				const float4_t abgr1c = float4_add(abgr1f, wadd);
-				const float4_t abgr2c = float4_add(abgr2f, wadd);
-				const float4_t abgr3c = float4_add(abgr3f, wadd);
-				const float4_t abgr0n = float4_mul(abgr0c, unpack);
-				const float4_t abgr1n = float4_mul(abgr1c, unpack);
-				const float4_t abgr2n = float4_mul(abgr2c, unpack);
-				const float4_t abgr3n = float4_mul(abgr3c, unpack);
-
-				const float4_t abgr0l = float4_pow(abgr0n, linear);
-				const float4_t abgr1l = float4_pow(abgr1n, linear);
-				const float4_t abgr2l = float4_pow(abgr2n, linear);
-				const float4_t abgr3l = float4_pow(abgr3n, linear);
-
-				const float4_t sum0   = float4_add(abgr0l, abgr1l);
-				const float4_t sum1   = float4_add(abgr2l, abgr3l);
-				const float4_t sum2   = float4_add(sum0, sum1);
-				const float4_t avg0   = float4_mul(sum2, quater);
-				const float4_t avg1   = float4_pow(avg0, gamma);
-
-				const float4_t avg2   = float4_mul(avg1, pack);
-				const float4_t ftoi0  = float4_ftoi(avg2);
-				const float4_t ftoi1  = float4_and(ftoi0, pmask);
-				const float4_t zwxy   = float4_swiz_zwxy(ftoi1);
-				const float4_t tmp0   = float4_or(ftoi1, zwxy);
-				const float4_t yyyy   = float4_swiz_yyyy(tmp0);
-				const float4_t tmp1   = float4_iadd(yyyy, yyyy);
-				const float4_t result = float4_or(tmp0, tmp1);
-
-				float4_stx(dst, result);
+				const simd128_t abgr0  = simd_splat(rgba);
+				const simd128_t abgr1  = simd_splat(rgba+4);
+				const simd128_t abgr2  = simd_splat(rgba+_pitch);
+				const simd128_t abgr3  = simd_splat(rgba+_pitch+4);
+
+				const simd128_t abgr0m = simd_and(abgr0, umask);
+				const simd128_t abgr1m = simd_and(abgr1, umask);
+				const simd128_t abgr2m = simd_and(abgr2, umask);
+				const simd128_t abgr3m = simd_and(abgr3, umask);
+				const simd128_t abgr0x = simd_xor(abgr0m, wflip);
+				const simd128_t abgr1x = simd_xor(abgr1m, wflip);
+				const simd128_t abgr2x = simd_xor(abgr2m, wflip);
+				const simd128_t abgr3x = simd_xor(abgr3m, wflip);
+				const simd128_t abgr0f = simd_itof(abgr0x);
+				const simd128_t abgr1f = simd_itof(abgr1x);
+				const simd128_t abgr2f = simd_itof(abgr2x);
+				const simd128_t abgr3f = simd_itof(abgr3x);
+				const simd128_t abgr0c = simd_add(abgr0f, wadd);
+				const simd128_t abgr1c = simd_add(abgr1f, wadd);
+				const simd128_t abgr2c = simd_add(abgr2f, wadd);
+				const simd128_t abgr3c = simd_add(abgr3f, wadd);
+				const simd128_t abgr0n = simd_mul(abgr0c, unpack);
+				const simd128_t abgr1n = simd_mul(abgr1c, unpack);
+				const simd128_t abgr2n = simd_mul(abgr2c, unpack);
+				const simd128_t abgr3n = simd_mul(abgr3c, unpack);
+
+				const simd128_t abgr0l = simd_pow(abgr0n, linear);
+				const simd128_t abgr1l = simd_pow(abgr1n, linear);
+				const simd128_t abgr2l = simd_pow(abgr2n, linear);
+				const simd128_t abgr3l = simd_pow(abgr3n, linear);
+
+				const simd128_t sum0   = simd_add(abgr0l, abgr1l);
+				const simd128_t sum1   = simd_add(abgr2l, abgr3l);
+				const simd128_t sum2   = simd_add(sum0, sum1);
+				const simd128_t avg0   = simd_mul(sum2, quater);
+				const simd128_t avg1   = simd_pow(avg0, gamma);
+
+				const simd128_t avg2   = simd_mul(avg1, pack);
+				const simd128_t ftoi0  = simd_ftoi(avg2);
+				const simd128_t ftoi1  = simd_and(ftoi0, pmask);
+				const simd128_t zwxy   = simd_swiz_zwxy(ftoi1);
+				const simd128_t tmp0   = simd_or(ftoi1, zwxy);
+				const simd128_t yyyy   = simd_swiz_yyyy(tmp0);
+				const simd128_t tmp1   = simd_iadd(yyyy, yyyy);
+				const simd128_t result = simd_or(tmp0, tmp1);
+
+				simd_stx(dst, result);
 			}
 		}
 	}
@@ -630,8 +630,8 @@ namespace bgfx
 
 		using namespace bx;
 
-		const float4_t mf0f0 = float4_isplat(0xff00ff00);
-		const float4_t m0f0f = float4_isplat(0x00ff00ff);
+		const simd128_t mf0f0 = simd_isplat(0xff00ff00);
+		const simd128_t m0f0f = simd_isplat(0x00ff00ff);
 		const uint8_t* src = (uint8_t*) _src;
 		const uint8_t* next = src + _pitch;
 		uint8_t* dst = (uint8_t*)_dst;
@@ -642,14 +642,14 @@ namespace bgfx
 		{
 			for (uint32_t xx = 0; xx < width; ++xx, src += 16, dst += 16)
 			{
-				const float4_t tabgr = float4_ld(src);
-				const float4_t t00ab = float4_srl(tabgr, 16);
-				const float4_t tgr00 = float4_sll(tabgr, 16);
-				const float4_t tgrab = float4_or(t00ab, tgr00);
-				const float4_t ta0g0 = float4_and(tabgr, mf0f0);
-				const float4_t t0r0b = float4_and(tgrab, m0f0f);
-				const float4_t targb = float4_or(ta0g0, t0r0b);
-				float4_st(dst, targb);
+				const simd128_t tabgr = simd_ld(src);
+				const simd128_t t00ab = simd_srl(tabgr, 16);
+				const simd128_t tgr00 = simd_sll(tabgr, 16);
+				const simd128_t tgrab = simd_or(t00ab, tgr00);
+				const simd128_t ta0g0 = simd_and(tabgr, mf0f0);
+				const simd128_t t0r0b = simd_and(tgrab, m0f0f);
+				const simd128_t targb = simd_or(ta0g0, t0r0b);
+				simd_st(dst, targb);
 			}
 		}
 	}
@@ -3676,24 +3676,24 @@ namespace bgfx
 		const uint8_t* src = (const uint8_t*)_src;
 
 		using namespace bx;
-		const float4_t unpack = float4_ld(1.0f, 1.0f/256.0f, 1.0f/65536.0f, 1.0f/16777216.0f);
-		const float4_t umask  = float4_ild(0xff, 0xff00, 0xff0000, 0xff000000);
-		const float4_t wflip  = float4_ild(0, 0, 0, 0x80000000);
-		const float4_t wadd   = float4_ld(0.0f, 0.0f, 0.0f, 32768.0f*65536.0f);
+		const simd128_t unpack = simd_ld(1.0f, 1.0f/256.0f, 1.0f/65536.0f, 1.0f/16777216.0f);
+		const simd128_t umask  = simd_ild(0xff, 0xff00, 0xff0000, 0xff000000);
+		const simd128_t wflip  = simd_ild(0, 0, 0, 0x80000000);
+		const simd128_t wadd   = simd_ld(0.0f, 0.0f, 0.0f, 32768.0f*65536.0f);
 
 		for (uint32_t yy = 0, ystep = _pitch; yy < dstheight; ++yy, src += ystep)
 		{
 			const uint8_t* rgba = src;
 			for (uint32_t xx = 0; xx < dstwidth; ++xx, rgba += 4, dst += 4)
 			{
-				const float4_t abgr0  = float4_splat(rgba);
-				const float4_t abgr0m = float4_and(abgr0, umask);
-				const float4_t abgr0x = float4_xor(abgr0m, wflip);
-				const float4_t abgr0f = float4_itof(abgr0x);
-				const float4_t abgr0c = float4_add(abgr0f, wadd);
-				const float4_t abgr0n = float4_mul(abgr0c, unpack);
-
-				float4_st(dst, abgr0n);
+				const simd128_t abgr0  = simd_splat(rgba);
+				const simd128_t abgr0m = simd_and(abgr0, umask);
+				const simd128_t abgr0x = simd_xor(abgr0m, wflip);
+				const simd128_t abgr0f = simd_itof(abgr0x);
+				const simd128_t abgr0c = simd_add(abgr0f, wadd);
+				const simd128_t abgr0n = simd_mul(abgr0c, unpack);
+
+				simd_st(dst, abgr0n);
 			}
 		}
 	}