浏览代码

Incorporate non-sse functions; compiled and tested on M1 (#1975)

Co-authored-by: Alec Jacobson <[email protected]>
Alec Jacobson 3 年之前
父节点
当前提交
7fe68a3237
共有 1 个文件被更改,包括 408 次插入2 次删除
  1. 408 2
      include/igl/FastWindingNumberForSoups.h

+ 408 - 2
include/igl/FastWindingNumberForSoups.h

@@ -1,4 +1,4 @@
-// This header created by issuing: `echo "// This header created by issuing: \`$BASH_COMMAND\` $(echo "" | cat - LICENSE README.md | sed -e "s#^..*#\/\/ &#") $(echo "" | cat - SYS_Types.h SYS_Math.h VM_SSEFunc.h VM_SIMD.h UT_Array.h UT_ArrayImpl.h UT_SmallArray.h UT_FixedVector.h UT_ParallelUtil.h UT_BVH.h UT_BVHImpl.h UT_SolidAngle.h UT_Array.cpp UT_SolidAngle.cpp | sed -e "s/^#.*include  *\".*$//g")" > ../FastWindingNumberForSoups.h` 
+// This header created by issuing: `echo "// This header created by issuing: \`$BASH_COMMAND\` $(echo "" | cat - LICENSE README.md | sed -e "s#^..*#\/\/ &#") $(echo "" | cat - SYS_Types.h SYS_Math.h VM_SSEFunc.h VM_SIMDFunc.h VM_SIMD.h UT_Array.h UT_ArrayImpl.h UT_SmallArray.h UT_FixedVector.h UT_ParallelUtil.h UT_BVH.h UT_BVHImpl.h UT_SolidAngle.h UT_Array.cpp UT_SolidAngle.cpp | sed -e "s/^#.*include  *\".*$//g")" > ~/Repos/libigl/include/igl/FastWindingNumberForSoups.h` 
 // MIT License
 // MIT License
 
 
 // Copyright (c) 2018 Side Effects Software Inc.
 // Copyright (c) 2018 Side Effects Software Inc.
@@ -48,7 +48,7 @@
 
 
 // Create a single header using:
 // Create a single header using:
 
 
-//     echo "// This header created by issuing: \`$BASH_COMMAND\` $(echo "" | cat - LICENSE README.md | sed -e "s#^..*#\/\/&#") $(echo "" | cat - SYS_Types.h SYS_Math.h VM_SSEFunc.h VM_SIMD.h UT_Array.h UT_ArrayImpl.h UT_SmallArray.h UT_FixedVector.h UT_ParallelUtil.h UT_BVH.h UT_BVHImpl.h UT_SolidAngle.h UT_Array.cpp UT_SolidAngle.cpp | sed -e "s/^#.*include  *\".*$//g")" 
+//     echo "// This header created by issuing: \`$BASH_COMMAND\` $(echo "" | cat - LICENSE README.md | sed -e "s#^..*#\/\/ &#") $(echo "" | cat - SYS_Types.h SYS_Math.h VM_SSEFunc.h VM_SIMD.h UT_Array.h UT_ArrayImpl.h UT_SmallArray.h UT_FixedVector.h UT_ParallelUtil.h UT_BVH.h UT_BVHImpl.h UT_SolidAngle.h UT_Array.cpp UT_SolidAngle.cpp | sed -e "s/^#.*include  *\".*$//g")" 
 /*
 /*
  * Copyright (c) 2018 Side Effects Software Inc.
  * Copyright (c) 2018 Side Effects Software Inc.
  *
  *
@@ -357,6 +357,7 @@ static inline fpreal64 SYSabs(fpreal64 a) { return ::fabs(a); }
  */
  */
 
 
 #pragma once
 #pragma once
+#ifdef __SSE__
 
 
 #ifndef __VM_SSEFunc__
 #ifndef __VM_SSEFunc__
 #define __VM_SSEFunc__
 #define __VM_SSEFunc__
@@ -734,6 +735,407 @@ vm_allbits(const v4si &a)
 }}
 }}
 
 
 #endif
 #endif
+#endif
+#pragma once
+#ifndef __SSE__
+#ifndef __VM_SIMDFunc__
+#define __VM_SIMDFunc__
+
+
+
+#include <cmath>
+
+namespace igl { namespace FastWindingNumber {
+
+struct v4si {
+	int32 v[4];
+};
+
+struct v4sf {
+	float v[4];
+};
+
+static SYS_FORCE_INLINE v4sf V4SF(const v4si &v) {
+	static_assert(sizeof(v4si) == sizeof(v4sf) && alignof(v4si) == alignof(v4sf), "v4si and v4sf must be compatible");
+	return *(const v4sf*)&v;
+}
+
+static SYS_FORCE_INLINE v4si V4SI(const v4sf &v) {
+	static_assert(sizeof(v4si) == sizeof(v4sf) && alignof(v4si) == alignof(v4sf), "v4si and v4sf must be compatible");
+	return *(const v4si*)&v;
+}
+
+static SYS_FORCE_INLINE int32 conditionMask(bool c) {
+	return c ? int32(0xFFFFFFFF) : 0;
+}
+
+static SYS_FORCE_INLINE v4sf
+VM_SPLATS(float f) {
+	return v4sf{{f, f, f, f}};
+}
+
+static SYS_FORCE_INLINE v4si
+VM_SPLATS(uint32 i) {
+	return v4si{{int32(i), int32(i), int32(i), int32(i)}};
+}
+
+static SYS_FORCE_INLINE v4si
+VM_SPLATS(int32 i) {
+	return v4si{{i, i, i, i}};
+}
+
+static SYS_FORCE_INLINE v4sf
+VM_SPLATS(float a, float b, float c, float d) {
+	return v4sf{{a, b, c, d}};
+}
+
+static SYS_FORCE_INLINE v4si
+VM_SPLATS(uint32 a, uint32 b, uint32 c, uint32 d) {
+	return v4si{{int32(a), int32(b), int32(c), int32(d)}};
+}
+
+static SYS_FORCE_INLINE v4si
+VM_SPLATS(int32 a, int32 b, int32 c, int32 d) {
+	return v4si{{a, b, c, d}};
+}
+
+static SYS_FORCE_INLINE v4si
+VM_LOAD(const int32 v[4]) {
+	return v4si{{v[0], v[1], v[2], v[3]}};
+}
+
+static SYS_FORCE_INLINE v4sf
+VM_LOAD(const float v[4]) {
+	return v4sf{{v[0], v[1], v[2], v[3]}};
+}
+
+
+static inline v4si VM_ICMPEQ(v4si a, v4si b) {
+	return v4si{{
+		conditionMask(a.v[0] == b.v[0]),
+		conditionMask(a.v[1] == b.v[1]),
+		conditionMask(a.v[2] == b.v[2]),
+		conditionMask(a.v[3] == b.v[3])
+	}};
+}
+
+static inline v4si VM_ICMPGT(v4si a, v4si b) {
+	return v4si{{
+		conditionMask(a.v[0] > b.v[0]),
+		conditionMask(a.v[1] > b.v[1]),
+		conditionMask(a.v[2] > b.v[2]),
+		conditionMask(a.v[3] > b.v[3])
+	}};
+}
+
+static inline v4si VM_ICMPLT(v4si a, v4si b) {
+	return v4si{{
+		conditionMask(a.v[0] < b.v[0]),
+		conditionMask(a.v[1] < b.v[1]),
+		conditionMask(a.v[2] < b.v[2]),
+		conditionMask(a.v[3] < b.v[3])
+	}};
+}
+
+static inline v4si VM_IADD(v4si a, v4si b) {
+	return v4si{{
+		(a.v[0] + b.v[0]),
+		(a.v[1] + b.v[1]),
+		(a.v[2] + b.v[2]),
+		(a.v[3] + b.v[3])
+	}};
+}
+
+static inline v4si VM_ISUB(v4si a, v4si b) {
+	return v4si{{
+		(a.v[0] - b.v[0]),
+		(a.v[1] - b.v[1]),
+		(a.v[2] - b.v[2]),
+		(a.v[3] - b.v[3])
+	}};
+}
+
+static inline v4si VM_OR(v4si a, v4si b) {
+	return v4si{{
+		(a.v[0] | b.v[0]),
+		(a.v[1] | b.v[1]),
+		(a.v[2] | b.v[2]),
+		(a.v[3] | b.v[3])
+	}};
+}
+
+static inline v4si VM_AND(v4si a, v4si b) {
+	return v4si{{
+		(a.v[0] & b.v[0]),
+		(a.v[1] & b.v[1]),
+		(a.v[2] & b.v[2]),
+		(a.v[3] & b.v[3])
+	}};
+}
+
+static inline v4si VM_ANDNOT(v4si a, v4si b) {
+	return v4si{{
+		((~a.v[0]) & b.v[0]),
+		((~a.v[1]) & b.v[1]),
+		((~a.v[2]) & b.v[2]),
+		((~a.v[3]) & b.v[3])
+	}};
+}
+
+static inline v4si VM_XOR(v4si a, v4si b) {
+	return v4si{{
+		(a.v[0] ^ b.v[0]),
+		(a.v[1] ^ b.v[1]),
+		(a.v[2] ^ b.v[2]),
+		(a.v[3] ^ b.v[3])
+	}};
+}
+
+static SYS_FORCE_INLINE int
+VM_EXTRACT(const v4si v, int index) {
+	return v.v[index];
+}
+
+static SYS_FORCE_INLINE float
+VM_EXTRACT(const v4sf v, int index) {
+	return v.v[index];
+}
+
+static SYS_FORCE_INLINE v4si
+VM_INSERT(v4si v, int32 value, int index) {
+	v.v[index] = value;
+	return v;
+}
+
+static SYS_FORCE_INLINE v4sf
+VM_INSERT(v4sf v, float value, int index) {
+	v.v[index] = value;
+	return v;
+}
+
+static inline v4si VM_CMPEQ(v4sf a, v4sf b) {
+	return v4si{{
+		conditionMask(a.v[0] == b.v[0]),
+		conditionMask(a.v[1] == b.v[1]),
+		conditionMask(a.v[2] == b.v[2]),
+		conditionMask(a.v[3] == b.v[3])
+	}};
+}
+
+static inline v4si VM_CMPNE(v4sf a, v4sf b) {
+	return v4si{{
+		conditionMask(a.v[0] != b.v[0]),
+		conditionMask(a.v[1] != b.v[1]),
+		conditionMask(a.v[2] != b.v[2]),
+		conditionMask(a.v[3] != b.v[3])
+	}};
+}
+
+static inline v4si VM_CMPGT(v4sf a, v4sf b) {
+	return v4si{{
+		conditionMask(a.v[0] > b.v[0]),
+		conditionMask(a.v[1] > b.v[1]),
+		conditionMask(a.v[2] > b.v[2]),
+		conditionMask(a.v[3] > b.v[3])
+	}};
+}
+
+static inline v4si VM_CMPLT(v4sf a, v4sf b) {
+	return v4si{{
+		conditionMask(a.v[0] < b.v[0]),
+		conditionMask(a.v[1] < b.v[1]),
+		conditionMask(a.v[2] < b.v[2]),
+		conditionMask(a.v[3] < b.v[3])
+	}};
+}
+
+static inline v4si VM_CMPGE(v4sf a, v4sf b) {
+	return v4si{{
+		conditionMask(a.v[0] >= b.v[0]),
+		conditionMask(a.v[1] >= b.v[1]),
+		conditionMask(a.v[2] >= b.v[2]),
+		conditionMask(a.v[3] >= b.v[3])
+	}};
+}
+
+static inline v4si VM_CMPLE(v4sf a, v4sf b) {
+	return v4si{{
+		conditionMask(a.v[0] <= b.v[0]),
+		conditionMask(a.v[1] <= b.v[1]),
+		conditionMask(a.v[2] <= b.v[2]),
+		conditionMask(a.v[3] <= b.v[3])
+	}};
+}
+
+static inline v4sf VM_ADD(v4sf a, v4sf b) {
+	return v4sf{{
+		(a.v[0] + b.v[0]),
+		(a.v[1] + b.v[1]),
+		(a.v[2] + b.v[2]),
+		(a.v[3] + b.v[3])
+	}};
+}
+
+static inline v4sf VM_SUB(v4sf a, v4sf b) {
+	return v4sf{{
+		(a.v[0] - b.v[0]),
+		(a.v[1] - b.v[1]),
+		(a.v[2] - b.v[2]),
+		(a.v[3] - b.v[3])
+	}};
+}
+
+static inline v4sf VM_NEG(v4sf a) {
+	return v4sf{{
+		(-a.v[0]),
+		(-a.v[1]),
+		(-a.v[2]),
+		(-a.v[3])
+	}};
+}
+
+static inline v4sf VM_MUL(v4sf a, v4sf b) {
+	return v4sf{{
+		(a.v[0] * b.v[0]),
+		(a.v[1] * b.v[1]),
+		(a.v[2] * b.v[2]),
+		(a.v[3] * b.v[3])
+	}};
+}
+
+static inline v4sf VM_DIV(v4sf a, v4sf b) {
+	return v4sf{{
+		(a.v[0] / b.v[0]),
+		(a.v[1] / b.v[1]),
+		(a.v[2] / b.v[2]),
+		(a.v[3] / b.v[3])
+	}};
+}
+
+static inline v4sf VM_MADD(v4sf a, v4sf b, v4sf c) {
+	return v4sf{{
+		(a.v[0] * b.v[0]) + c.v[0],
+		(a.v[1] * b.v[1]) + c.v[1],
+		(a.v[2] * b.v[2]) + c.v[2],
+		(a.v[3] * b.v[3]) + c.v[3]
+	}};
+}
+
+static inline v4sf VM_ABS(v4sf a) {
+	return v4sf{{
+		(a.v[0] < 0) ? -a.v[0] : a.v[0],
+		(a.v[1] < 0) ? -a.v[1] : a.v[1],
+		(a.v[2] < 0) ? -a.v[2] : a.v[2],
+		(a.v[3] < 0) ? -a.v[3] : a.v[3]
+	}};
+}
+
+static inline v4sf VM_MAX(v4sf a, v4sf b) {
+	return v4sf{{
+		(a.v[0] < b.v[0]) ? b.v[0] : a.v[0],
+		(a.v[1] < b.v[1]) ? b.v[1] : a.v[1],
+		(a.v[2] < b.v[2]) ? b.v[2] : a.v[2],
+		(a.v[3] < b.v[3]) ? b.v[3] : a.v[3]
+	}};
+}
+
+static inline v4sf VM_MIN(v4sf a, v4sf b) {
+	return v4sf{{
+		(a.v[0] > b.v[0]) ? b.v[0] : a.v[0],
+		(a.v[1] > b.v[1]) ? b.v[1] : a.v[1],
+		(a.v[2] > b.v[2]) ? b.v[2] : a.v[2],
+		(a.v[3] > b.v[3]) ? b.v[3] : a.v[3]
+	}};
+}
+
+static inline v4sf VM_INVERT(v4sf a) {
+	return v4sf{{
+		(1.0f/a.v[0]),
+		(1.0f/a.v[1]),
+		(1.0f/a.v[2]),
+		(1.0f/a.v[3])
+	}};
+}
+
+static inline v4sf VM_SQRT(v4sf a) {
+	return v4sf{{
+		std::sqrt(a.v[0]),
+		std::sqrt(a.v[1]),
+		std::sqrt(a.v[2]),
+		std::sqrt(a.v[3])
+	}};
+}
+
+static inline v4si VM_INT(v4sf a) {
+	return v4si{{
+		int32(a.v[0]),
+		int32(a.v[1]),
+		int32(a.v[2]),
+		int32(a.v[3])
+	}};
+}
+
+static inline v4sf VM_IFLOAT(v4si a) {
+	return v4sf{{
+		float(a.v[0]),
+		float(a.v[1]),
+		float(a.v[2]),
+		float(a.v[3])
+	}};
+}
+
+static SYS_FORCE_INLINE void VM_P_FLOOR() {}
+
+static SYS_FORCE_INLINE int32 singleIntFloor(float f) {
+	// Casting to int32 usually truncates toward zero, instead of rounding down,
+	// so subtract one if the result is above f.
+	int32 i = int32(f);
+	i -= (float(i) > f);
+	return i;
+}
+static inline v4si VM_FLOOR(v4sf a) {
+	return v4si{{
+		singleIntFloor(a.v[0]),
+		singleIntFloor(a.v[1]),
+		singleIntFloor(a.v[2]),
+		singleIntFloor(a.v[3])
+	}};
+}
+
+static SYS_FORCE_INLINE void VM_E_FLOOR() {}
+
+static SYS_FORCE_INLINE bool vm_allbits(v4si a) {
+	return (
+		(a.v[0] == -1) && 
+		(a.v[1] == -1) && 
+		(a.v[2] == -1) && 
+		(a.v[3] == -1)
+	);
+}
+
+int SYS_FORCE_INLINE _mm_movemask_ps(const v4si& v) {
+	return (
+		int(v.v[0] < 0) |
+		(int(v.v[1] < 0)<<1) |
+		(int(v.v[2] < 0)<<2) |
+		(int(v.v[3] < 0)<<3)
+	);
+}
+
+int SYS_FORCE_INLINE _mm_movemask_ps(const v4sf& v) {
+	// Use std::signbit just in case it needs to distinguish between +0 and -0
+	// or between positive and negative NaN values (e.g. these could really
+	// be integers instead of floats).
+	return (
+		int(std::signbit(v.v[0])) |
+		(int(std::signbit(v.v[1]))<<1) |
+		(int(std::signbit(v.v[2]))<<2) |
+		(int(std::signbit(v.v[3]))<<3)
+	);
+}
+}}
+#endif
+#endif
 /*
 /*
  * Copyright (c) 2018 Side Effects Software Inc.
  * Copyright (c) 2018 Side Effects Software Inc.
  *
  *
@@ -770,6 +1172,8 @@ vm_allbits(const v4si &a)
 //#define FORCE_NON_SIMD
 //#define FORCE_NON_SIMD
 
 
 
 
+
+
 namespace igl { namespace FastWindingNumber {
 namespace igl { namespace FastWindingNumber {
 
 
 class v4uf;
 class v4uf;
@@ -986,11 +1390,13 @@ public:
         return base;
         return base;
     }
     }
 
 
+#ifdef __SSE__
     template <int A, int B, int C, int D>
     template <int A, int B, int C, int D>
     SYS_FORCE_INLINE v4uf swizzle() const
     SYS_FORCE_INLINE v4uf swizzle() const
     { 
     { 
         return VM_SHUFFLE<A,B,C,D>(vector);
         return VM_SHUFFLE<A,B,C,D>(vector);
     }
     }
+#endif
 
 
     SYS_FORCE_INLINE v4uu isFinite() const
     SYS_FORCE_INLINE v4uu isFinite() const
     {
     {