Browse Source

Added a library for Matrix4f

shamanDevel 9 năm trước cách đây
mục cha
commit
250c871cab

+ 307 - 0
jme3-core/src/main/resources/Common/OpenCL/Matrix4f.clh

@@ -9,4 +9,311 @@ typedef float16 mat4;
 
 //All matrix functions are prefixed with mat3 or mat4
 
+//Returns the zero matrix
+inline mat4 mat4Zero() {
+	return (float16)(0);
+}
+
+//Returns the identity matrix
+inline mat4 mat4Identity() {
+	return (float16)
+		(1, 0, 0, 0,
+		 0, 1, 0, 0,
+		 0, 0, 1, 0,
+		 0, 0, 0, 1);
+}
+
+inline mat4 mat4FromRows(float4 row1, float4 row2, float4 row3, float4 row4) {
+	return (float16) (row1, row2, row3, row4);
+}
+
+inline mat4 mat4FromColumns(float4 col1, float4 col2, float4 col3, float4 col4) {
+	return (float16)
+		(col1.x, col2.x, col3.x, col4.x,
+		 col1.y, col2.y, col3.y, col4.y,
+		 col1.z, col2.z, col3.z, col4.z,
+		 col1.w, col2.w, col3.w, col4.w);
+}
+
+inline mat4 mat4FromDiagonal(float4 diag) {
+	return (float16)
+		(diag.x, 0, 0, 0,
+		 0, diag.y, 0, 0,
+		 0, 0, diag.z, 0,
+		 0, 0, 0, diag.w);
+}
+
+//Returns the i-th row (0-based)
+inline float4 mat4GetRow(mat4 mat, int i) {
+	if (i==0) return mat.s0123;
+	else if (i==1) return mat.s4567;
+	else if (i==2) return mat.s89ab;
+	else return mat.scdef;
+}
+
+//Sets the i-th row (0-based)
+inline mat4 mat4SetRow(mat4 mat, int i, float4 row) {
+	if (i==0) mat.s0123 = row;
+	else if (i==1) mat.s4567 = row;
+	else if (i==2) mat.s89ab = row;
+	else mat.scdef = row;
+	return mat;
+}
+
+//Returns the i-th column (0-based)
+inline float4 mat4GetColumn(mat4 mat, int i) {
+	if (i==0) return mat.s048c;
+	else if (i==1) return mat.s159d;
+	else if (i==2) return mat.s26ae;
+	else return mat.s37bf;
+}
+
+//Sets the i-th column (0-based)
+inline mat4 mat4SetColumn(mat4 mat, int i, float4 col) {
+	if (i==0) mat.s048c = col;
+	else if (i==1) mat.s159d = col;
+	else if (i==2) mat.s26ae = col;
+	else mat.s37bf = col;
+	return mat;
+}
+
+//Returns the diagonal
+inline float4 mat4GetDiagonal(mat4 mat) {
+	return mat.s05af;
+}
+
+//Sets the diagonal
+inline mat4 mat3SetDiagonal(mat4 mat, float4 diag) {
+	mat.s05af = diag;
+	return mat;
+}
+
+mat4 mat4FromFrame(float3 location, float3 direction, float3 up, float3 left)
+{
+	float3 fwdVector = direction;
+	float3 leftVector = cross(fwdVector, up);
+	float3 upVector = cross(leftVector, fwdVector);
+
+	return (float16) (
+		leftVector.x,
+		leftVector.y,
+		leftVector.z,
+		dot(-leftVector, location),
+
+		upVector.x,
+		upVector.y,
+		upVector.z,
+		dot(-upVector, location),
+
+		-fwdVector.x,
+		-fwdVector.y,
+		-fwdVector.z,
+		dot(fwdVector, location),
+
+		0, 0, 0, 1
+	);
+}
+
+inline mat4 mat4Transpose(mat4 mat) {
+	return mat.s048c159d26ae37bf; //magic
+}
+
+mat4 mat4FromAngleNormalAxis(float angle, float3 axis) {
+	float fCos = cos(angle);
+	float fSin = sin(angle);
+	float fOneMinusCos = 1.0f - fCos;
+	float fX2 = axis.x * axis.x;
+	float fY2 = axis.y * axis.y;
+	float fZ2 = axis.z * axis.z;
+	float fXYM = axis.x * axis.y * fOneMinusCos;
+	float fXZM = axis.x * axis.z * fOneMinusCos;
+	float fYZM = axis.y * axis.z * fOneMinusCos;
+	float fXSin = axis.x * fSin;
+	float fYSin = axis.y * fSin;
+	float fZSin = axis.z * fSin;
+
+	return (float16) (
+		fX2 * fOneMinusCos + fCos,
+		fXYM - fZSin,
+		fXZM + fYSin,
+		0,
+		fXYM + fZSin,
+		fY2 * fOneMinusCos + fCos,
+		fYZM - fXSin,
+		0,
+		fXZM - fYSin,
+		fYZM + fXSin,
+		fZ2 * fOneMinusCos + fCos,
+		0,
+		0, 0, 0, 1
+	);
+}
+
+mat4 mat4FromAngleAxis(float angle, float3 axis) {
+	return mat4FromAngleNormalAxis(angle, normalize(axis));
+}
+
+inline mat4 mat4Scale(mat4 mat, float s) {
+	return mat * s;
+}
+
+inline mat4 mat4Add(mat4 A, mat4 B) {
+	return A + B;
+}
+
+//Multiplies the two matrices A and B
+inline mat4 mat4Mult(mat4 A, mat4 B) {
+	return (float16) (
+		dot(A.s0123, B.s048c),
+		dot(A.s0123, B.s159d),
+		dot(A.s0123, B.s26ae),
+		dot(A.s0123, B.s37bf),
+
+		dot(A.s4567, B.s048c),
+		dot(A.s4567, B.s159d),
+		dot(A.s4567, B.s26ae),
+		dot(A.s4567, B.s37bf),
+
+		dot(A.s89ab, B.s048c),
+		dot(A.s89ab, B.s159d),
+		dot(A.s89ab, B.s26ae),
+		dot(A.s89ab, B.s37bf),
+
+		dot(A.scdef, B.s048c),
+		dot(A.scdef, B.s159d),
+		dot(A.scdef, B.s26ae),
+		dot(A.scdef, B.s37bf)
+	);
+}
+
+//Computes Av (right multiply of a vector to a matrix)
+inline float4 mat4VMult(mat4 A, float4 v) {
+	return (float4) (
+		dot(A.s0123, v),
+		dot(A.s4567, v),
+		dot(A.s89ab, v),
+		dot(A.scdef, v));
+}
+
+//Computes vA (left multiply of a vector to a matrix)
+inline float4 mat4VMult2(float4 v, mat4 A) {
+	return (float4) (
+		dot(v, A.s048c),
+		dot(v, A.s159d),
+		dot(v, A.s26ae),
+		dot(v, A.s37bf));
+}
+
+inline float4 mat4MultAcross(mat4 mat, float4 v) {
+	return mat4VMult2(v, mat);
+}
+
+inline float3 mat4MultNormal(mat4 mat, float3 v) {
+	return mat4VMult(mat, (float4)(v, 0)).xyz;
+}
+
+inline float3 mat4MultNormalAcross(mat4 mat, float3 v) {
+	return mat4VMult2((float4)(v, 0), mat).xyz;
+}
+
+mat4 mat4Invert(mat4 mat) {
+	float fA0 = mat.s0 * mat.s5 - mat.s1 * mat.s4;
+	float fA1 = mat.s0 * mat.s6 - mat.s2 * mat.s4;
+	float fA2 = mat.s0 * mat.s7 - mat.s3 * mat.s4;
+	float fA3 = mat.s1 * mat.s6 - mat.s2 * mat.s5;
+	float fA4 = mat.s1 * mat.s7 - mat.s3 * mat.s5;
+	float fA5 = mat.s2 * mat.s7 - mat.s3 * mat.s6;
+	float fB0 = mat.s8 * mat.sd - mat.s9 * mat.sc;
+	float fB1 = mat.s8 * mat.se - mat.sa * mat.sc;
+	float fB2 = mat.s8 * mat.sf - mat.sb * mat.sc;
+	float fB3 = mat.s9 * mat.se - mat.sa * mat.sd;
+	float fB4 = mat.s9 * mat.sf - mat.sb * mat.sd;
+	float fB5 = mat.sa * mat.sf - mat.sb * mat.se;
+	float fDet = fA0 * fB5 - fA1 * fB4 + fA2 * fB3 + fA3 * fB2 - fA4 * fB1 + fA5 * fB0;
+
+	if (fabs(fDet) <= 0.000001f) {
+		return mat4Zero();
+	}
+
+	mat4 store;
+	store.s0 = +mat.s5 * fB5 - mat.s6 * fB4 + mat.s7 * fB3;
+	store.s4 = -mat.s4 * fB5 + mat.s6 * fB2 - mat.s7 * fB1;
+	store.s8 = +mat.s4 * fB4 - mat.s5 * fB2 + mat.s7 * fB0;
+	store.sc = -mat.s4 * fB3 + mat.s5 * fB1 - mat.s6 * fB0;
+	store.s1 = -mat.s1 * fB5 + mat.s2 * fB4 - mat.s3 * fB3;
+	store.s5 = +mat.s0 * fB5 - mat.s2 * fB2 + mat.s3 * fB1;
+	store.s9 = -mat.s0 * fB4 + mat.s1 * fB2 - mat.s3 * fB0;
+	store.sd = +mat.s0 * fB3 - mat.s1 * fB1 + mat.s2 * fB0;
+	store.s2 = +mat.sd * fA5 - mat.se * fA4 + mat.sf * fA3;
+	store.s6 = -mat.sc * fA5 + mat.se * fA2 - mat.sf * fA1;
+	store.sa = +mat.sc * fA4 - mat.sd * fA2 + mat.sf * fA0;
+	store.se = -mat.sc * fA3 + mat.sd * fA1 - mat.se * fA0;
+	store.s3 = -mat.s9 * fA5 + mat.sa * fA4 - mat.sb * fA3;
+	store.s7 = +mat.s8 * fA5 - mat.sa * fA2 + mat.sb * fA1;
+	store.sb = -mat.s8 * fA4 + mat.s9 * fA2 - mat.sb * fA0;
+	store.sf = +mat.s8 * fA3 - mat.s9 * fA1 + mat.sa * fA0;
+
+	store /= fDet;
+
+	return store;
+}
+
+mat4 mat4Adjoint(mat4 mat) {
+	float fA0 = mat.s0 * mat.s5 - mat.s1 * mat.s4;
+	float fA1 = mat.s0 * mat.s6 - mat.s2 * mat.s4;
+	float fA2 = mat.s0 * mat.s7 - mat.s3 * mat.s4;
+	float fA3 = mat.s1 * mat.s6 - mat.s2 * mat.s5;
+	float fA4 = mat.s1 * mat.s7 - mat.s3 * mat.s5;
+	float fA5 = mat.s2 * mat.s7 - mat.s3 * mat.s6;
+	float fB0 = mat.s8 * mat.sd - mat.s9 * mat.sc;
+	float fB1 = mat.s8 * mat.se - mat.sa * mat.sc;
+	float fB2 = mat.s8 * mat.sf - mat.sb * mat.sc;
+	float fB3 = mat.s9 * mat.se - mat.sa * mat.sd;
+	float fB4 = mat.s9 * mat.sf - mat.sb * mat.sd;
+	float fB5 = mat.sa * mat.sf - mat.sb * mat.se;
+
+	mat4 store;
+
+	store.s0 = +mat.s5 * fB5 - mat.s6 * fB4 + mat.s7 * fB3;
+	store.s4 = -mat.s4 * fB5 + mat.s6 * fB2 - mat.s7 * fB1;
+	store.s8 = +mat.s4 * fB4 - mat.s5 * fB2 + mat.s7 * fB0;
+	store.sc = -mat.s4 * fB3 + mat.s5 * fB1 - mat.s6 * fB0;
+	store.s1 = -mat.s1 * fB5 + mat.s2 * fB4 - mat.s3 * fB3;
+	store.s5 = +mat.s0 * fB5 - mat.s2 * fB2 + mat.s3 * fB1;
+	store.s9 = -mat.s0 * fB4 + mat.s1 * fB2 - mat.s3 * fB0;
+	store.sd = +mat.s0 * fB3 - mat.s1 * fB1 + mat.s2 * fB0;
+	store.s2 = +mat.sd * fA5 - mat.se * fA4 + mat.sf * fA3;
+	store.s6 = -mat.sc * fA5 + mat.se * fA2 - mat.sf * fA1;
+	store.sa = +mat.sc * fA4 - mat.sd * fA2 + mat.sf * fA0;
+	store.se = -mat.sc * fA3 + mat.sd * fA1 - mat.se * fA0;
+	store.s3 = -mat.s9 * fA5 + mat.sa * fA4 - mat.sb * fA3;
+	store.s7 = +mat.s8 * fA5 - mat.sa * fA2 + mat.sb * fA1;
+	store.sb = -mat.s8 * fA4 + mat.s9 * fA2 - mat.sb * fA0;
+	store.sf = +mat.s8 * fA3 - mat.s9 * fA1 + mat.sa * fA0;
+
+	return store;
+}
+
+float mat4Determinant(mat4 mat) {
+	float fA0 = mat.s0 * mat.s5 - mat.s1 * mat.s4;
+	float fA1 = mat.s0 * mat.s6 - mat.s2 * mat.s4;
+	float fA2 = mat.s0 * mat.s7 - mat.s3 * mat.s4;
+	float fA3 = mat.s1 * mat.s6 - mat.s2 * mat.s5;
+	float fA4 = mat.s1 * mat.s7 - mat.s3 * mat.s5;
+	float fA5 = mat.s2 * mat.s7 - mat.s3 * mat.s6;
+	float fB0 = mat.s8 * mat.sd - mat.s9 * mat.sc;
+	float fB1 = mat.s8 * mat.se - mat.sa * mat.sc;
+	float fB2 = mat.s8 * mat.sf - mat.sb * mat.sc;
+	float fB3 = mat.s9 * mat.se - mat.sa * mat.sd;
+	float fB4 = mat.s9 * mat.sf - mat.sb * mat.sd;
+	float fB5 = mat.sa * mat.sf - mat.sb * mat.se;
+	float fDet = fA0 * fB5 - fA1 * fB4 + fA2 * fB3 + fA3 * fB2 - fA4 * fB1 + fA5 * fB0;
+	return fDet;
+}
+
+inline bool mat4Equals(mat4 A, mat4 B, float epsilon) {
+	return all(isless(fabs(A - B), epsilon));
+}
+
+
 #endif

+ 85 - 1
jme3-examples/src/main/java/jme3test/opencl/TestOpenCLLibraries.java

@@ -36,7 +36,9 @@ import com.jme3.app.SimpleApplication;
 import com.jme3.font.BitmapFont;
 import com.jme3.font.BitmapText;
 import com.jme3.math.ColorRGBA;
+import com.jme3.math.FastMath;
 import com.jme3.math.Matrix3f;
+import com.jme3.math.Matrix4f;
 import com.jme3.opencl.*;
 import com.jme3.system.AppSettings;
 import com.jme3.util.BufferUtils;
@@ -84,6 +86,7 @@ public class TestOpenCLLibraries extends SimpleApplication {
         str.append("\nTests:");
         str.append("\n  Random numbers: ").append(testRandom(clContext, clQueue));
         str.append("\n  Matrix3f: ").append(testMatrix3f(clContext, clQueue));
+        str.append("\n  Matrix4f: ").append(testMatrix4f(clContext, clQueue));
         
         clQueue.release();
         
@@ -279,7 +282,7 @@ public class TestOpenCLLibraries extends SimpleApplication {
                     + "}\n";
             Program program = clContext.createProgramFromSourceCodeWithDependencies(code, assetManager);
             program.build();
-            com.jme3.opencl.Buffer buffer = clContext.createBuffer(4 * 1);
+            com.jme3.opencl.Buffer buffer = clContext.createBuffer(1);
             
             Kernel testMatrix3fKernel1 = program.createKernel("TestMatrix3f_1");
             testMatrix3fKernel1.Run1NoEvent(clQueue, new Kernel.WorkSize(1), buffer);
@@ -316,4 +319,85 @@ public class TestOpenCLLibraries extends SimpleApplication {
         return true;
     }
     
+    private boolean testMatrix4f(Context clContext, CommandQueue clQueue) {
+        try {
+            
+            String code = ""
+                    + "#import \"Common/OpenCL/Matrix4f.clh\"\n"
+                    + "\n"
+                    + "__kernel void TestMatrix4f_1(mat4 m1, __global char* result)\n"
+                    + "{\n"
+                    + "  mat4 id = mat4Identity();\n"
+                    + "  mat4 m1Inv = mat4Invert(m1);\n"
+                    + "  mat4 m1Res = mat4Mult(m1, m1Inv);\n"
+                    + "  result[0] = mat4Equals(id, m1Res, 0.0001f) ? 1 : 0;\n"
+                    + "}\n"
+                    + "\n"
+                    + "__kernel void TestMatrix4f_2(mat4 m1, float d, mat4 m2, mat4 m3, __global char* result)\n"
+                    + "{\n"
+                    + "  float d2 = mat4Determinant(m1);\n"
+                    + "  result[0] = fabs(d - d2) < 0.0001f ? 1 : 0;\n"
+                    + "  mat4 res = mat4Transpose(m1);\n"
+                    + "  result[1] = mat4Equals(res, m2, 0.0001f) ? 1 : 0;\n"
+                    + "  res = mat4Adjoint(m1);\n"
+                    + "  result[2] = mat4Equals(res, m3, 0.0001f) ? 1 : 0;\n"
+                    + "}\n";
+            Program program = clContext.createProgramFromSourceCodeWithDependencies(code, assetManager);
+            program.build();
+            com.jme3.opencl.Buffer buffer = clContext.createBuffer(3);
+            
+            Random rand = new Random(1561);
+            
+            Kernel testMatrix4fKernel1 = program.createKernel("TestMatrix4f_1");
+            Matrix4f m1 = new Matrix4f();
+            do {
+                for (int i=0; i<4; ++i) {
+                    for (int j=0; j<4; ++j) {
+                        m1.set(i, j, rand.nextFloat()*20 - 10);
+                    }
+                }
+            } while (FastMath.abs(m1.determinant()) < 0.00001f);
+            testMatrix4fKernel1.Run1NoEvent(clQueue, new Kernel.WorkSize(1), m1, buffer);
+            ByteBuffer bb = buffer.map(clQueue, MappingAccess.MAP_READ_ONLY);
+            if (bb.get() == 0) {
+                LOG.severe("Matrix inversion failed");
+                return false;
+            }
+            buffer.unmap(clQueue, bb);
+            testMatrix4fKernel1.release();
+            
+            Kernel testMatrix4fKernel2 = program.createKernel("TestMatrix4f_2");
+            for (int i=0; i<4; ++i) {
+                for (int j=0; j<4; ++j) {
+                    m1.set(i, j, rand.nextFloat()*20 - 10);
+                }
+            }
+            testMatrix4fKernel2.Run1NoEvent(clQueue, new Kernel.WorkSize(1), m1, m1.determinant(), m1.transpose(), m1.adjoint(), buffer);
+            bb = buffer.map(clQueue, MappingAccess.MAP_READ_ONLY);
+            if (bb.get() == 0) {
+                LOG.severe("Matrix determinant computation failed");
+                return false;
+            }
+            if (bb.get() == 0) {
+                LOG.severe("Matrix transposing failed");
+                return false;
+            }
+            if (bb.get() == 0) {
+                LOG.severe("Matrix adjoint computation failed");
+                return false;
+            }
+            buffer.unmap(clQueue, bb);
+            testMatrix4fKernel2.release();
+            
+            buffer.release();
+            
+        } catch (AssertionError ex) {
+            LOG.log(Level.SEVERE, "kernel test failed with an assertion error");
+            return false;
+        } catch (Exception ex) {
+            LOG.log(Level.SEVERE, "kernel test failed with:", ex);
+            return false;
+        }
+        return true;
+    }
 }