Browse Source

Added findLSB tests, faster ceilPowerOfTwo

Christophe Riccio 11 years ago
parent
commit
3420d691f5

+ 29 - 29
glm/gtc/integer.inl

@@ -29,43 +29,43 @@
 namespace glm{
 namespace detail
 {
-	template <typename T, precision P, template <class, precision> class vecType, bool isSigned = true>
+	template <typename T, precision P, template <typename, precision> class vecType, bool compute = false>
+	struct compute_ceilShift
+	{
+		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T)
+		{
+			return v;
+		}
+	};
+
+	template <typename T, precision P, template <typename, precision> class vecType>
+	struct compute_ceilShift<T, P, vecType, true>
+	{
+		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T Shift)
+		{
+			return v | (v >> Shift);
+		}
+	};
+
+	template <typename T, precision P, template <typename, precision> class vecType, bool isSigned = true>
 	struct compute_ceilPowerOfTwo{};
 
-	template <typename T, precision P, template <class, precision> class vecType>
+	template <typename T, precision P, template <typename, precision> class vecType>
 	struct compute_ceilPowerOfTwo<T, P, vecType, false>
 	{
-		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v)
+		GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & x)
 		{
-			GLM_STATIC_ASSERT(
-				!std::numeric_limits<genFIType>::is_iec559,
-				"'ceilPowerOfTwo' only accept integer scalar or vector inputs");
-
-			template <typename T, precision P, template <class, precision> class vecType, bool compute = false>
-			struct compute_ceil_shift
-			{
-				GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T)
-				{
-					return v;
-				}
-			};
-
-			template <typename T, precision P, template <class, precision> class vecType, bool compute = true>
-			struct compute_ceil_shift
-			{
-				GLM_FUNC_QUALIFIER static vecType<T, P> call(vecType<T, P> const & v, T Shift)
-				{
-					return v | (v >> Shift);
-				}
-			};
+			GLM_STATIC_ASSERT(!std::numeric_limits<T>::is_iec559, "'ceilPowerOfTwo' only accept integer scalar or vector inputs");
+
+			vecType<T, P> v(x);
 
 			v = v - static_cast<T>(1);
 			v = v | (v >> static_cast<T>(1));
 			v = v | (v >> static_cast<T>(2));
 			v = v | (v >> static_cast<T>(4));
-			v = compute_ceil_shift<T, P, vecType, sizeof(T) >= 2>::call(v, 8);
-			v = compute_ceil_shift<T, P, vecType, sizeof(T) >= 4>::call(v, 16);
-			v = compute_ceil_shift<T, P, vecType, sizeof(T) >= 8>::call(v, 32);
+			v = compute_ceilShift<T, P, vecType, sizeof(T) >= 2>::call(v, 8);
+			v = compute_ceilShift<T, P, vecType, sizeof(T) >= 4>::call(v, 16);
+			v = compute_ceilShift<T, P, vecType, sizeof(T) >= 8>::call(v, 32);
 			return v + static_cast<T>(1);
 		}
 	};
@@ -94,13 +94,13 @@ namespace detail
 	template <typename genType>
 	GLM_FUNC_QUALIFIER genType ceilPowerOfTwo(genType value)
 	{
-		return isPowerOfTwo(value) ? value : highestBitValue(value) << 1;
+		return detail::compute_ceilPowerOfTwo<genType, defaultp, tvec1, std::numeric_limits<genType>::is_signed>::call(tvec1<genType, defaultp>(value)).x;
 	}
 
 	template <typename T, precision P, template <typename, precision> class vecType>
 	GLM_FUNC_QUALIFIER vecType<T, P> ceilPowerOfTwo(vecType<T, P> const & v)
 	{
-		return detail::functor1<T, T, P, vecType>::call(ceilPowerOfTwo, v);
+		return detail::compute_ceilPowerOfTwo<T, P, vecType, std::numeric_limits<T>::is_signed>::call(v);
 	}
 
 	///////////////////

+ 1 - 0
test/core/CMakeLists.txt

@@ -20,6 +20,7 @@ glmCreateTestGTC(core_func_common)
 glmCreateTestGTC(core_func_exponential)
 glmCreateTestGTC(core_func_geometric)
 glmCreateTestGTC(core_func_integer)
+glmCreateTestGTC(core_func_integer_find_lsb)
 glmCreateTestGTC(core_func_matrix)
 glmCreateTestGTC(core_func_noise)
 glmCreateTestGTC(core_func_packing)

+ 300 - 43
test/core/core_func_integer.cpp

@@ -232,58 +232,314 @@ namespace findMSB
 		genType		Return;
 	};
 
-	type<int> const DataI32[] =
+	template <typename genIUType>
+	GLM_FUNC_QUALIFIER int findMSB_095(genIUType Value)
 	{
-		{0x00000000, -1},
-		{0x00000001,  0},
-		{0x00000002,  1},
-		{0x00000003,  1},
-		{0x00000004,  2},
-		{0x00000005,  2},
-		{0x00000007,  2},
-		{0x00000008,  3},
-		{0x00000010,  4},
-		{0x00000020,  5},
-		{0x00000040,  6},
-		{0x00000080,  7},
-		{0x00000100,  8},
-		{0x00000200,  9},
-		{0x00000400, 10},
-		{0x00000800, 11},
-		{0x00001000, 12},
-		{0x00002000, 13},
-		{0x00004000, 14},
-		{0x00008000, 15},
-		{0x00010000, 16},
-		{0x00020000, 17},
-		{0x00040000, 18},
-		{0x00080000, 19},
-		{0x00100000, 20},
-		{0x00200000, 21},
-		{0x00400000, 22},
-		{0x00800000, 23},
-		{0x01000000, 24},
-		{0x02000000, 25},
-		{0x04000000, 26},
-		{0x08000000, 27},
-		{0x10000000, 28},
-		{0x20000000, 29},
-		{0x40000000, 30}
-	};
+		GLM_STATIC_ASSERT(std::numeric_limits<genIUType>::is_integer, "'findMSB' only accept integer values");
+		
+		if(Value == genIUType(0) || Value == genIUType(-1))
+			return -1;
+		else if(Value > 0)
+		{
+			genIUType Bit = genIUType(-1);
+			for(genIUType tmp = Value; tmp > 0; tmp >>= 1, ++Bit){}
+			return Bit;
+		}
+		else //if(Value < 0)
+		{
+			int const BitCount(sizeof(genIUType) * 8);
+			int MostSignificantBit(-1);
+			for(int BitIndex(0); BitIndex < BitCount; ++BitIndex)
+				MostSignificantBit = (Value & (1 << BitIndex)) ? MostSignificantBit : BitIndex;
+			assert(MostSignificantBit >= 0);
+			return MostSignificantBit;
+		}
+	}
 
-	int test()
+	template <typename genIUType>
+	GLM_FUNC_QUALIFIER int findMSB_nlz1(genIUType x)
+	{
+		GLM_STATIC_ASSERT(std::numeric_limits<genIUType>::is_integer, "'findMSB' only accept integer values");
+/*
+		int Result = 0;
+		for(std::size_t i = 0, n = sizeof(genIUType) * 8; i < n; ++i)
+			Result = Value & static_cast<genIUType>(1 << i) ? static_cast<int>(i) : Result;
+		return Result;
+*/
+/*
+		genIUType Bit = genIUType(-1);
+		for(genIUType tmp = Value; tmp > 0; tmp >>= 1, ++Bit){}
+		return Bit;
+*/
+		int n;
+
+		if (x == 0) return(32);
+		n = 0;
+		if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
+		if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
+		if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
+		if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
+		if (x <= 0x7FFFFFFF) {n = n + 1;}
+		return n;
+	}
+
+	int findMSB_nlz2(unsigned int x)
+	{
+		unsigned y;
+		int n;
+
+		n = 32;
+		y = x >>16;  if (y != 0) {n = n -16;  x = y;}
+		y = x >> 8;  if (y != 0) {n = n - 8;  x = y;}
+		y = x >> 4;  if (y != 0) {n = n - 4;  x = y;}
+		y = x >> 2;  if (y != 0) {n = n - 2;  x = y;}
+		y = x >> 1;  if (y != 0) return n - 2;
+		return n - x;
+	}
+
+	int perf_950()
 	{
+		type<glm::uint> const Data[] =
+		{
+			{0x00000000, -1},
+			{0x00000001,  0},
+			{0x00000002,  1},
+			{0x00000003,  1},
+			{0x00000004,  2},
+			{0x00000005,  2},
+			{0x00000007,  2},
+			{0x00000008,  3},
+			{0x00000010,  4},
+			{0x00000020,  5},
+			{0x00000040,  6},
+			{0x00000080,  7},
+			{0x00000100,  8},
+			{0x00000200,  9},
+			{0x00000400, 10},
+			{0x00000800, 11},
+			{0x00001000, 12},
+			{0x00002000, 13},
+			{0x00004000, 14},
+			{0x00008000, 15},
+			{0x00010000, 16},
+			{0x00020000, 17},
+			{0x00040000, 18},
+			{0x00080000, 19},
+			{0x00100000, 20},
+			{0x00200000, 21},
+			{0x00400000, 22},
+			{0x00800000, 23},
+			{0x01000000, 24},
+			{0x02000000, 25},
+			{0x04000000, 26},
+			{0x08000000, 27},
+			{0x10000000, 28},
+			{0x20000000, 29},
+			{0x40000000, 30}
+		};
+
 		int Error(0);
 
-		for(std::size_t i = 0; i < sizeof(DataI32) / sizeof(type<int>); ++i)
+		std::clock_t Timestamps1 = std::clock();
+
+		for(std::size_t k = 0; k < 10000000; ++k)
+		for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
 		{
-			int Result = glm::findMSB(DataI32[i].Value);
-			Error += DataI32[i].Return == Result ? 0 : 1;
+			int Result = findMSB_095(Data[i].Value);
+			Error += Data[i].Return == Result ? 0 : 1;
+		}
+
+		std::clock_t Timestamps2 = std::clock();
+
+		printf("findMSB - 0.9.5: %d clocks\n", Timestamps2 - Timestamps1);
+
+		return Error;
+	}
+
+	int perf_ops()
+	{
+		type<int> const Data[] =
+		{
+			{0x00000000, -1},
+			{0x00000001,  0},
+			{0x00000002,  1},
+			{0x00000003,  1},
+			{0x00000004,  2},
+			{0x00000005,  2},
+			{0x00000007,  2},
+			{0x00000008,  3},
+			{0x00000010,  4},
+			{0x00000020,  5},
+			{0x00000040,  6},
+			{0x00000080,  7},
+			{0x00000100,  8},
+			{0x00000200,  9},
+			{0x00000400, 10},
+			{0x00000800, 11},
+			{0x00001000, 12},
+			{0x00002000, 13},
+			{0x00004000, 14},
+			{0x00008000, 15},
+			{0x00010000, 16},
+			{0x00020000, 17},
+			{0x00040000, 18},
+			{0x00080000, 19},
+			{0x00100000, 20},
+			{0x00200000, 21},
+			{0x00400000, 22},
+			{0x00800000, 23},
+			{0x01000000, 24},
+			{0x02000000, 25},
+			{0x04000000, 26},
+			{0x08000000, 27},
+			{0x10000000, 28},
+			{0x20000000, 29},
+			{0x40000000, 30}
+		};
+
+		int Error(0);
+
+		std::clock_t Timestamps1 = std::clock();
+
+		for(std::size_t k = 0; k < 10000000; ++k)
+		for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
+		{
+			int Result = findMSB_nlz1(Data[i].Value);
+			Error += Data[i].Return == Result ? 0 : 1;
+		}
+
+		std::clock_t Timestamps2 = std::clock();
+
+		printf("findMSB - nlz1: %d clocks\n", Timestamps2 - Timestamps1);
+
+		return Error;
+	}
+
+
+	int test_findMSB()
+	{
+		type<glm::uint> const Data[] =
+		{
+			{0x00000000, -1},
+			{0x00000001,  0},
+			{0x00000002,  1},
+			{0x00000003,  1},
+			{0x00000004,  2},
+			{0x00000005,  2},
+			{0x00000007,  2},
+			{0x00000008,  3},
+			{0x00000010,  4},
+			{0x00000020,  5},
+			{0x00000040,  6},
+			{0x00000080,  7},
+			{0x00000100,  8},
+			{0x00000200,  9},
+			{0x00000400, 10},
+			{0x00000800, 11},
+			{0x00001000, 12},
+			{0x00002000, 13},
+			{0x00004000, 14},
+			{0x00008000, 15},
+			{0x00010000, 16},
+			{0x00020000, 17},
+			{0x00040000, 18},
+			{0x00080000, 19},
+			{0x00100000, 20},
+			{0x00200000, 21},
+			{0x00400000, 22},
+			{0x00800000, 23},
+			{0x01000000, 24},
+			{0x02000000, 25},
+			{0x04000000, 26},
+			{0x08000000, 27},
+			{0x10000000, 28},
+			{0x20000000, 29},
+			{0x40000000, 30}
+		};
+
+		int Error(0);
+
+		for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
+		{
+			int Result = glm::findMSB(Data[i].Value);
+			Error += Data[i].Return == Result ? 0 : 1;
 			assert(!Error);
 		}
 
 		return Error;
 	}
+
+	int test_nlz1()
+	{
+		type<glm::uint> const Data[] =
+		{
+			{0x00000000, -1},
+			{0x00000001,  0},
+			{0x00000002,  1},
+			{0x00000003,  1},
+			{0x00000004,  2},
+			{0x00000005,  2},
+			{0x00000007,  2},
+			{0x00000008,  3},
+			{0x00000010,  4},
+			{0x00000020,  5},
+			{0x00000040,  6},
+			{0x00000080,  7},
+			{0x00000100,  8},
+			{0x00000200,  9},
+			{0x00000400, 10},
+			{0x00000800, 11},
+			{0x00001000, 12},
+			{0x00002000, 13},
+			{0x00004000, 14},
+			{0x00008000, 15},
+			{0x00010000, 16},
+			{0x00020000, 17},
+			{0x00040000, 18},
+			{0x00080000, 19},
+			{0x00100000, 20},
+			{0x00200000, 21},
+			{0x00400000, 22},
+			{0x00800000, 23},
+			{0x01000000, 24},
+			{0x02000000, 25},
+			{0x04000000, 26},
+			{0x08000000, 27},
+			{0x10000000, 28},
+			{0x20000000, 29},
+			{0x40000000, 30}
+		};
+
+		int Error(0);
+
+		for(std::size_t i = 0; i < sizeof(Data) / sizeof(type<int>); ++i)
+		{
+			int Result = findMSB_nlz2(Data[i].Value);
+			Error += Data[i].Return == Result ? 0 : 1;
+		}
+
+		return Error;
+	}
+
+	int test()
+	{
+		int Error(0);
+
+		Error += test_findMSB();
+		Error += test_nlz1();
+
+		return Error;
+	}
+
+	int perf()
+	{
+		int Error(0);
+
+		Error += perf_950();
+		Error += perf_ops();
+
+		return Error;
+	}
 }//findMSB
 
 namespace findLSB
@@ -690,6 +946,9 @@ int main()
 {
 	int Error = 0;
 
+	Error += ::findMSB::test();
+	Error += ::findMSB::perf();
+	Error += ::findLSB::test();
 	Error += ::umulExtended::test();
 	Error += ::imulExtended::test();
 	Error += ::uaddCarry::test();
@@ -699,8 +958,6 @@ int main()
 	Error += ::bitfieldReverse::test();
 	Error += ::bitCount::test();
 	Error += ::bitCount::perf();
-	Error += ::findMSB::test();
-	Error += ::findLSB::test();
 
 	return Error;
 }

+ 443 - 0
test/core/core_func_integer_find_lsb.cpp

@@ -0,0 +1,443 @@
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// OpenGL Mathematics Copyright (c) 2005 - 2014 G-Truc Creation (www.g-truc.net)
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Created : 2014-10-27
+// Updated : 2014-10-27
+// Licence : This source is under MIT licence
+// File    : test/core/func_integer_find_lsb.cpp
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+// This has the programs for computing the number of leading zeros
+// in a word.
+// Max line length is 57, to fit in hacker.book.
+// Compile with g++, not gcc.
+#include <cstdio>
+#include <cstdlib>     // To define "exit", req'd by XLC.
+#include <ctime>     // To define "exit", req'd by XLC.
+
+#define LE 1            // 1 for little-endian, 0 for big-endian.
+
+int pop(unsigned x) {
+   x = x - ((x >> 1) & 0x55555555);
+   x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+   x = (x + (x >> 4)) & 0x0F0F0F0F;
+   x = x + (x << 8);
+   x = x + (x << 16);
+   return x >> 24;
+}
+
+int nlz1(unsigned x) {
+   int n;
+
+   if (x == 0) return(32);
+   n = 0;
+   if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
+   if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
+   if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
+   if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
+   if (x <= 0x7FFFFFFF) {n = n + 1;}
+   return n;
+}
+
+int nlz1a(unsigned x) {
+   int n;
+
+/* if (x == 0) return(32); */
+   if ((int)x <= 0) return (~x >> 26) & 32;
+   n = 1;
+   if ((x >> 16) == 0) {n = n +16; x = x <<16;}
+   if ((x >> 24) == 0) {n = n + 8; x = x << 8;}
+   if ((x >> 28) == 0) {n = n + 4; x = x << 4;}
+   if ((x >> 30) == 0) {n = n + 2; x = x << 2;}
+   n = n - (x >> 31);
+   return n;
+}
+// On basic Risc, 12 to 20 instructions.
+
+int nlz2(unsigned x) {
+   unsigned y;
+   int n;
+
+   n = 32;
+   y = x >>16;  if (y != 0) {n = n -16;  x = y;}
+   y = x >> 8;  if (y != 0) {n = n - 8;  x = y;}
+   y = x >> 4;  if (y != 0) {n = n - 4;  x = y;}
+   y = x >> 2;  if (y != 0) {n = n - 2;  x = y;}
+   y = x >> 1;  if (y != 0) return n - 2;
+   return n - x;
+}
+
+// As above but coded as a loop for compactness:
+// 23 to 33 basic Risc instructions.
+int nlz2a(unsigned x) {
+   unsigned y;
+   int n, c;
+
+   n = 32;
+   c = 16;
+   do {
+      y = x >> c;  if (y != 0) {n = n - c;  x = y;}
+      c = c >> 1;
+   } while (c != 0);
+   return n - x;
+}
+
+int nlz3(int x) {
+   int y, n;
+
+   n = 0;
+   y = x;
+L: if (x < 0) return n;
+   if (y == 0) return 32 - n;
+   n = n + 1;
+   x = x << 1;
+   y = y >> 1;
+   goto L;
+}
+
+int nlz4(unsigned x) {
+   int y, m, n;
+
+   y = -(x >> 16);      // If left half of x is 0,
+   m = (y >> 16) & 16;  // set n = 16.  If left half
+   n = 16 - m;          // is nonzero, set n = 0 and
+   x = x >> m;          // shift x right 16.
+                        // Now x is of the form 0000xxxx.
+   y = x - 0x100;       // If positions 8-15 are 0,
+   m = (y >> 16) & 8;   // add 8 to n and shift x left 8.
+   n = n + m;
+   x = x << m;
+
+   y = x - 0x1000;      // If positions 12-15 are 0,
+   m = (y >> 16) & 4;   // add 4 to n and shift x left 4.
+   n = n + m;
+   x = x << m;
+
+   y = x - 0x4000;      // If positions 14-15 are 0,
+   m = (y >> 16) & 2;   // add 2 to n and shift x left 2.
+   n = n + m;
+   x = x << m;
+
+   y = x >> 14;         // Set y = 0, 1, 2, or 3.
+   m = y & ~(y >> 1);   // Set m = 0, 1, 2, or 2 resp.
+   return n + 2 - m;
+}
+
+int nlz5(unsigned x) {
+   int pop(unsigned x);
+
+   x = x | (x >> 1);
+   x = x | (x >> 2);
+   x = x | (x >> 4);
+   x = x | (x >> 8);
+   x = x | (x >>16);
+   return pop(~x);
+}
+
+/* The four programs below are not valid ANSI C programs.  This is
+because they refer to the same storage locations as two different types.
+However, they work with xlc/AIX, gcc/AIX, and gcc/NT.  If you try to
+code them more compactly by declaring a variable xx to be "double," and
+then using
+
+   n = 1054 - (*((unsigned *)&xx + LE) >> 20);
+
+then you are violating not only the rule above, but also the ANSI C
+rule that pointer arithmetic can be performed only on pointers to
+array elements.
+   When coded with the above statement, the program fails with xlc,
+gcc/AIX, and gcc/NT, at some optimization levels.
+   BTW, these programs use the "anonymous union" feature of C++, not
+available in C. */
+
+int nlz6(unsigned k) {
+   union {
+      unsigned asInt[2];
+      double asDouble;
+   };
+   int n;
+
+   asDouble = (double)k + 0.5;
+   n = 1054 - (asInt[LE] >> 20);
+   return n;
+}
+
+int nlz7(unsigned k) {
+   union {
+      unsigned asInt[2];
+      double asDouble;
+   };
+   int n;
+
+   asDouble = (double)k;
+   n = 1054 - (asInt[LE] >> 20);
+   n = (n & 31) + (n >> 9);
+   return n;
+}
+
+   /* In single precision, round-to-nearest mode, the basic method fails for:
+   k = 0, k = 01FFFFFF, 03FFFFFE <= k <= 03FFFFFF,
+                        07FFFFFC <= k <= 07FFFFFF,
+                        0FFFFFF8 <= k <= 0FFFFFFF,
+                                   ...
+                        7FFFFFC0 <= k <= 7FFFFFFF.
+                        FFFFFF80 <= k <= FFFFFFFF.
+   For k = 0 it gives 158, and for the other values it is too low by 1. */
+
+int nlz8(unsigned k) {
+   union {
+      unsigned asInt;
+      float asFloat;
+   };
+   int n;
+
+   k = k & ~(k >> 1);           /* Fix problem with rounding. */
+   asFloat = (float)k + 0.5f;
+   n = 158 - (asInt >> 23);
+   return n;
+}
+
+/* The example below shows how to make a macro for nlz.  It uses an
+extension to the C and C++ languages that is provided by the GNU C/C++
+compiler, namely, that of allowing statements and declarations in
+expressions (see "Using and Porting GNU CC", by Richard M. Stallman
+(1998).  The underscores are necessary to protect against the
+possibility that the macro argument will conflict with one of its local
+variables, e.g., NLZ(k). */
+
+int nlz9(unsigned k) {
+   union {
+      unsigned asInt;
+      float asFloat;
+   };
+   int n;
+
+   k = k & ~(k >> 1);           /* Fix problem with rounding. */
+   asFloat = (float)k;
+   n = 158 - (asInt >> 23);
+   n = (n & 31) + (n >> 6);     /* Fix problem with k = 0. */
+   return n;
+}
+
+/* Below are three nearly equivalent programs for computing the number
+of leading zeros in a word. This material is not in HD, but may be in a
+future edition.
+   Immediately below is Robert Harley's algorithm, found at the
+comp.arch newsgroup entry dated 7/12/96, pointed out to me by Norbert
+Juffa.
+   Table entries marked "u" are unused. 14 ops including a multiply,
+plus an indexed load.
+   The smallest multiplier that works is 0x045BCED1 = 17*65*129*513 (all
+of form 2**k + 1). There are no multipliers of three terms of the form
+2**k +- 1 that work, with a table size of 64 or 128. There are some,
+with a table size of 64, if you precede the multiplication with x = x -
+(x >> 1), but that seems less elegant. There are also some if you use a
+table size of 256, the smallest is 0x01033CBF = 65*255*1025 (this would
+save two instructions in the form of this algorithm with the
+multiplication expanded into shifts and adds, but the table size is
+getting a bit large). */
+
+#define u 99
+int nlz10(unsigned x) {
+
+   static char table[64] =
+     {32,31, u,16, u,30, 3, u,  15, u, u, u,29,10, 2, u,
+       u, u,12,14,21, u,19, u,   u,28, u,25, u, 9, 1, u,
+      17, u, 4, u, u, u,11, u,  13,22,20, u,26, u, u,18,
+       5, u, u,23, u,27, u, 6,   u,24, 7, u, 8, u, 0, u};
+
+   x = x | (x >> 1);    // Propagate leftmost
+   x = x | (x >> 2);    // 1-bit to the right.
+   x = x | (x >> 4);
+   x = x | (x >> 8);
+   x = x | (x >>16);
+   x = x*0x06EB14F9;    // Multiplier is 7*255**3.
+   return table[x >> 26];
+}
+
+/* Harley's algorithm with multiply expanded.
+19 elementary ops plus an indexed load. */
+
+int nlz10a(unsigned x) {
+
+   static char table[64] =
+     {32,31, u,16, u,30, 3, u,  15, u, u, u,29,10, 2, u,
+       u, u,12,14,21, u,19, u,   u,28, u,25, u, 9, 1, u,
+      17, u, 4, u, u, u,11, u,  13,22,20, u,26, u, u,18,
+       5, u, u,23, u,27, u, 6,   u,24, 7, u, 8, u, 0, u};
+
+   x = x | (x >> 1);    // Propagate leftmost
+   x = x | (x >> 2);    // 1-bit to the right.
+   x = x | (x >> 4);
+   x = x | (x >> 8);
+   x = x | (x >> 16);
+   x = (x << 3) - x;    // Multiply by 7.
+   x = (x << 8) - x;    // Multiply by 255.
+   x = (x << 8) - x;    // Again.
+   x = (x << 8) - x;    // Again.
+   return table[x >> 26];
+}
+
+/* Julius Goryavsky's version of Harley's algorithm.
+17 elementary ops plus an indexed load, if the machine
+has "and not." */
+
+int nlz10b(unsigned x) {
+
+   static char table[64] =
+     {32,20,19, u, u,18, u, 7,  10,17, u, u,14, u, 6, u,
+       u, 9, u,16, u, u, 1,26,   u,13, u, u,24, 5, u, u,
+       u,21, u, 8,11, u,15, u,   u, u, u, 2,27, 0,25, u,
+      22, u,12, u, u, 3,28, u,  23, u, 4,29, u, u,30,31};
+
+   x = x | (x >> 1);    // Propagate leftmost
+   x = x | (x >> 2);    // 1-bit to the right.
+   x = x | (x >> 4);
+   x = x | (x >> 8);
+   x = x & ~(x >> 16);
+   x = x*0xFD7049FF;    // Activate this line or the following 3.
+// x = (x << 9) - x;    // Multiply by 511.
+// x = (x << 11) - x;   // Multiply by 2047.
+// x = (x << 14) - x;   // Multiply by 16383.
+   return table[x >> 26];
+}
+
+int errors;
+void error(int x, int y) {
+   errors = errors + 1;
+   printf("Error for x = %08x, got %d\n", x, y);
+}
+
+int main()
+{
+	int i, n;
+	static unsigned test[] = {0,32, 1,31, 2,30, 3,30, 4,29, 5,29, 6,29,
+		7,29, 8,28, 9,28, 16,27, 32,26, 64,25, 128,24, 255,24, 256,23,
+		512,22, 1024,21, 2048,20, 4096,19, 8192,18, 16384,17, 32768,16,
+		65536,15, 0x20000,14, 0x40000,13, 0x80000,12, 0x100000,11,
+		0x200000,10, 0x400000,9, 0x800000,8, 0x1000000,7, 0x2000000,6,
+		0x4000000,5, 0x8000000,4, 0x0FFFFFFF,4, 0x10000000,3,
+		0x3000FFFF,2, 0x50003333,1, 0x7FFFFFFF,1, 0x80000000,0,
+		0xFFFFFFFF,0};
+	std::size_t const Count = 10000000;
+
+	n = sizeof(test)/4;
+
+	std::clock_t TimestampBeg = 0;
+	std::clock_t TimestampEnd = 0;
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (nlz1(test[i]) != test[i+1]) error(test[i], nlz1(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("nlz1: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (nlz1a(test[i]) != test[i+1]) error(test[i], nlz1a(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("nlz1a: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (nlz2(test[i]) != test[i+1]) error(test[i], nlz2(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("nlz2: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (nlz2a(test[i]) != test[i+1]) error(test[i], nlz2a(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("nlz2a: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (nlz3(test[i]) != test[i+1]) error(test[i], nlz3(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("nlz3: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (nlz4(test[i]) != test[i+1]) error(test[i], nlz4(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("nlz4: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (nlz5(test[i]) != test[i+1]) error(test[i], nlz5(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("nlz5: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (nlz6(test[i]) != test[i+1]) error(test[i], nlz6(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("nlz6: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (nlz7(test[i]) != test[i+1]) error(test[i], nlz7(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("nlz7: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (nlz8(test[i]) != test[i+1]) error(test[i], nlz8(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("nlz8: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (nlz9(test[i]) != test[i+1]) error(test[i], nlz9(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("nlz9: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (nlz10(test[i]) != test[i+1]) error(test[i], nlz10(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("nlz10: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (nlz10a(test[i]) != test[i+1]) error(test[i], nlz10a(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("nlz10a: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	TimestampBeg = std::clock();
+	for (std::size_t k = 0; k < Count; ++k)
+	for (i = 0; i < n; i += 2) {
+		if (nlz10b(test[i]) != test[i+1]) error(test[i], nlz10b(test[i]));}
+	TimestampEnd = std::clock();
+
+	printf("nlz10b: %d clocks\n", TimestampEnd - TimestampBeg);
+
+	if (errors == 0)
+		printf("Passed all %d cases.\n", sizeof(test)/8);
+}

+ 94 - 0
test/gtc/gtc_integer.cpp

@@ -10,6 +10,8 @@
 #include <glm/gtc/integer.hpp>
 #include <glm/gtc/type_precision.hpp>
 #include <glm/gtc/vec1.hpp>
+#include <ctime>
+#include <vector>
 
 namespace isPowerOfTwo
 {
@@ -149,11 +151,103 @@ namespace isPowerOfTwo
 	}
 }//isPowerOfTwo
 
+namespace ceilPowerOfTwo
+{
+	template <typename genIUType>
+	GLM_FUNC_QUALIFIER genIUType highestBitValue(genIUType Value)
+	{
+		genIUType tmp = Value;
+		genIUType result = genIUType(0);
+		while(tmp)
+		{
+			result = (tmp & (~tmp + 1)); // grab lowest bit
+			tmp &= ~result; // clear lowest bit
+		}
+		return result;
+	}
+
+	template <typename genType>
+	GLM_FUNC_QUALIFIER genType ceilPowerOfTwo_loop(genType value)
+	{
+		return glm::isPowerOfTwo(value) ? value : highestBitValue(value) << 1;
+	}
+
+	template <typename genType>
+	struct type
+	{
+		genType		Value;
+		genType		Return;
+	};
+
+	int test_uint32()
+	{
+		type<glm::uint32> const Data[] =
+		{
+			{0x00000001, 0x00000001},
+			{0x00000002, 0x00000002},
+			{0x00000004, 0x00000004},
+			{0x00000007, 0x00000008},
+			{0x0000ffff, 0x00010000},
+			{0x0000fff0, 0x00010000},
+			{0x0000f000, 0x00010000},
+			{0x80000000, 0x80000000},
+			{0x00000000, 0x00000000},
+			{0x00000003, 0x00000004}
+		};
+
+		int Error(0);
+
+		for(std::size_t i = 0, n = sizeof(Data) / sizeof(type<glm::uint32>); i < n; ++i)
+		{
+			glm::uint32 Result = glm::ceilPowerOfTwo(Data[i].Value);
+			Error += Data[i].Return == Result ? 0 : 1;
+		}
+
+		return Error;
+	}
+
+	int perf()
+	{
+		int Error(0);
+
+		std::vector<glm::uint> v;
+		v.resize(100000000);
+
+		std::clock_t Timestramp0 = std::clock();
+
+		for(glm::uint32 i = 0, n = static_cast<glm::uint>(v.size()); i < n; ++i)
+			v[i] = ceilPowerOfTwo_loop(i);
+
+		std::clock_t Timestramp1 = std::clock();
+
+		for(glm::uint32 i = 0, n = static_cast<glm::uint>(v.size()); i < n; ++i)
+			v[i] = glm::ceilPowerOfTwo(i);
+
+		std::clock_t Timestramp2 = std::clock();
+
+		printf("ceilPowerOfTwo_loop: %d clocks\n", Timestramp1 - Timestramp0);
+		printf("glm::ceilPowerOfTwo: %d clocks\n", Timestramp2 - Timestramp1);
+
+		return Error;
+	}
+
+	int test()
+	{
+		int Error(0);
+
+		Error += test_uint32();
+
+		return Error;
+	}
+}//namespace ceilPowerOfTwo
+
 int main()
 {
 	int Error(0);
 
 	Error += isPowerOfTwo::test();
+	Error += ceilPowerOfTwo::test();
+	Error += ceilPowerOfTwo::perf();
 
 	return Error;
 }

+ 3 - 3
test/gtc/gtc_reciprocal.cpp

@@ -8,10 +8,10 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include <glm/gtc/reciprocal.hpp>
+#include <ctime>
 
 int main()
 {
-	int Error(0);
-
-	return Error;
+	return 0;
 }
+