|
|
@@ -34,7 +34,7 @@ int nlz1a(unsigned x) {
|
|
|
int n;
|
|
|
|
|
|
/* if (x == 0) return(32); */
|
|
|
- if ((int)x <= 0) return (~x >> 26) & 32;
|
|
|
+ if (static_cast<int>(x) <= 0) return (~x >> 26) & 32;
|
|
|
n = 1;
|
|
|
if ((x >> 16) == 0) {n = n +16; x = x <<16;}
|
|
|
if ((x >> 24) == 0) {n = n + 8; x = x << 8;}
|
|
|
@@ -141,29 +141,31 @@ gcc/AIX, and gcc/NT, at some optimization levels.
|
|
|
BTW, these programs use the "anonymous union" feature of C++, not
|
|
|
available in C. */
|
|
|
|
|
|
-int nlz6(unsigned k) {
|
|
|
- union {
|
|
|
- unsigned asInt[2];
|
|
|
- double asDouble;
|
|
|
- };
|
|
|
- int n;
|
|
|
-
|
|
|
- asDouble = (double)k + 0.5;
|
|
|
- n = 1054 - (asInt[LE] >> 20);
|
|
|
- return n;
|
|
|
+int nlz6(unsigned k)
|
|
|
+{
|
|
|
+ union {
|
|
|
+ unsigned asInt[2];
|
|
|
+ double asDouble;
|
|
|
+ };
|
|
|
+ int n;
|
|
|
+
|
|
|
+ asDouble = static_cast<double>(k) + 0.5;
|
|
|
+ n = 1054 - (asInt[LE] >> 20);
|
|
|
+ return n;
|
|
|
}
|
|
|
|
|
|
-int nlz7(unsigned k) {
|
|
|
- union {
|
|
|
- unsigned asInt[2];
|
|
|
- double asDouble;
|
|
|
- };
|
|
|
- int n;
|
|
|
-
|
|
|
- asDouble = (double)k;
|
|
|
- n = 1054 - (asInt[LE] >> 20);
|
|
|
- n = (n & 31) + (n >> 9);
|
|
|
- return n;
|
|
|
+int nlz7(unsigned k)
|
|
|
+{
|
|
|
+ union {
|
|
|
+ unsigned asInt[2];
|
|
|
+ double asDouble;
|
|
|
+ };
|
|
|
+ int n;
|
|
|
+
|
|
|
+ asDouble = static_cast<double>(k);
|
|
|
+ n = 1054 - (asInt[LE] >> 20);
|
|
|
+ n = (n & 31) + (n >> 9);
|
|
|
+ return n;
|
|
|
}
|
|
|
|
|
|
/* In single precision, round-to-nearest mode, the basic method fails for:
|
|
|
@@ -175,17 +177,18 @@ int nlz7(unsigned k) {
|
|
|
FFFFFF80 <= k <= FFFFFFFF.
|
|
|
For k = 0 it gives 158, and for the other values it is too low by 1. */
|
|
|
|
|
|
-int nlz8(unsigned k) {
|
|
|
- union {
|
|
|
- unsigned asInt;
|
|
|
- float asFloat;
|
|
|
- };
|
|
|
- int n;
|
|
|
-
|
|
|
- k = k & ~(k >> 1); /* Fix problem with rounding. */
|
|
|
- asFloat = (float)k + 0.5f;
|
|
|
- n = 158 - (asInt >> 23);
|
|
|
- return n;
|
|
|
+int nlz8(unsigned k)
|
|
|
+{
|
|
|
+ union {
|
|
|
+ unsigned asInt;
|
|
|
+ float asFloat;
|
|
|
+ };
|
|
|
+ int n;
|
|
|
+
|
|
|
+ k = k & ~(k >> 1); /* Fix problem with rounding. */
|
|
|
+ asFloat = static_cast<float>(k) + 0.5f;
|
|
|
+ n = 158 - (asInt >> 23);
|
|
|
+ return n;
|
|
|
}
|
|
|
|
|
|
/* The example below shows how to make a macro for nlz. It uses an
|
|
|
@@ -196,18 +199,19 @@ expressions (see "Using and Porting GNU CC", by Richard M. Stallman
|
|
|
possibility that the macro argument will conflict with one of its local
|
|
|
variables, e.g., NLZ(k). */
|
|
|
|
|
|
-int nlz9(unsigned k) {
|
|
|
- union {
|
|
|
- unsigned asInt;
|
|
|
- float asFloat;
|
|
|
- };
|
|
|
- int n;
|
|
|
-
|
|
|
- k = k & ~(k >> 1); /* Fix problem with rounding. */
|
|
|
- asFloat = (float)k;
|
|
|
- n = 158 - (asInt >> 23);
|
|
|
- n = (n & 31) + (n >> 6); /* Fix problem with k = 0. */
|
|
|
- return n;
|
|
|
+int nlz9(unsigned k)
|
|
|
+{
|
|
|
+ union {
|
|
|
+ unsigned asInt;
|
|
|
+ float asFloat;
|
|
|
+ };
|
|
|
+ int n;
|
|
|
+
|
|
|
+ k = k & ~(k >> 1); /* Fix problem with rounding. */
|
|
|
+ asFloat = static_cast<float>(k);
|
|
|
+ n = 158 - (asInt >> 23);
|
|
|
+ n = (n & 31) + (n >> 6); /* Fix problem with k = 0. */
|
|
|
+ return n;
|
|
|
}
|
|
|
|
|
|
/* Below are three nearly equivalent programs for computing the number
|
|
|
@@ -229,74 +233,75 @@ multiplication expanded into shifts and adds, but the table size is
|
|
|
getting a bit large). */
|
|
|
|
|
|
#define u 99
|
|
|
-int nlz10(unsigned x) {
|
|
|
-
|
|
|
- static char table[64] =
|
|
|
- {32,31, u,16, u,30, 3, u, 15, u, u, u,29,10, 2, u,
|
|
|
- u, u,12,14,21, u,19, u, u,28, u,25, u, 9, 1, u,
|
|
|
- 17, u, 4, u, u, u,11, u, 13,22,20, u,26, u, u,18,
|
|
|
- 5, u, u,23, u,27, u, 6, u,24, 7, u, 8, u, 0, u};
|
|
|
-
|
|
|
- x = x | (x >> 1); // Propagate leftmost
|
|
|
- x = x | (x >> 2); // 1-bit to the right.
|
|
|
- x = x | (x >> 4);
|
|
|
- x = x | (x >> 8);
|
|
|
- x = x | (x >>16);
|
|
|
- x = x*0x06EB14F9; // Multiplier is 7*255**3.
|
|
|
- return table[x >> 26];
|
|
|
+int nlz10(unsigned x)
|
|
|
+{
|
|
|
+ static char table[64] =
|
|
|
+ {32,31, u,16, u,30, 3, u, 15, u, u, u,29,10, 2, u,
|
|
|
+ u, u,12,14,21, u,19, u, u,28, u,25, u, 9, 1, u,
|
|
|
+ 17, u, 4, u, u, u,11, u, 13,22,20, u,26, u, u,18,
|
|
|
+ 5, u, u,23, u,27, u, 6, u,24, 7, u, 8, u, 0, u};
|
|
|
+
|
|
|
+ x = x | (x >> 1); // Propagate leftmost
|
|
|
+ x = x | (x >> 2); // 1-bit to the right.
|
|
|
+ x = x | (x >> 4);
|
|
|
+ x = x | (x >> 8);
|
|
|
+ x = x | (x >>16);
|
|
|
+ x = x*0x06EB14F9; // Multiplier is 7*255**3.
|
|
|
+ return table[x >> 26];
|
|
|
}
|
|
|
|
|
|
/* Harley's algorithm with multiply expanded.
|
|
|
19 elementary ops plus an indexed load. */
|
|
|
|
|
|
-int nlz10a(unsigned x) {
|
|
|
-
|
|
|
- static char table[64] =
|
|
|
- {32,31, u,16, u,30, 3, u, 15, u, u, u,29,10, 2, u,
|
|
|
- u, u,12,14,21, u,19, u, u,28, u,25, u, 9, 1, u,
|
|
|
- 17, u, 4, u, u, u,11, u, 13,22,20, u,26, u, u,18,
|
|
|
- 5, u, u,23, u,27, u, 6, u,24, 7, u, 8, u, 0, u};
|
|
|
-
|
|
|
- x = x | (x >> 1); // Propagate leftmost
|
|
|
- x = x | (x >> 2); // 1-bit to the right.
|
|
|
- x = x | (x >> 4);
|
|
|
- x = x | (x >> 8);
|
|
|
- x = x | (x >> 16);
|
|
|
- x = (x << 3) - x; // Multiply by 7.
|
|
|
- x = (x << 8) - x; // Multiply by 255.
|
|
|
- x = (x << 8) - x; // Again.
|
|
|
- x = (x << 8) - x; // Again.
|
|
|
- return table[x >> 26];
|
|
|
+int nlz10a(unsigned x)
|
|
|
+{
|
|
|
+ static char table[64] =
|
|
|
+ {32,31, u,16, u,30, 3, u, 15, u, u, u,29,10, 2, u,
|
|
|
+ u, u,12,14,21, u,19, u, u,28, u,25, u, 9, 1, u,
|
|
|
+ 17, u, 4, u, u, u,11, u, 13,22,20, u,26, u, u,18,
|
|
|
+ 5, u, u,23, u,27, u, 6, u,24, 7, u, 8, u, 0, u};
|
|
|
+
|
|
|
+ x = x | (x >> 1); // Propagate leftmost
|
|
|
+ x = x | (x >> 2); // 1-bit to the right.
|
|
|
+ x = x | (x >> 4);
|
|
|
+ x = x | (x >> 8);
|
|
|
+ x = x | (x >> 16);
|
|
|
+ x = (x << 3) - x; // Multiply by 7.
|
|
|
+ x = (x << 8) - x; // Multiply by 255.
|
|
|
+ x = (x << 8) - x; // Again.
|
|
|
+ x = (x << 8) - x; // Again.
|
|
|
+ return table[x >> 26];
|
|
|
}
|
|
|
|
|
|
/* Julius Goryavsky's version of Harley's algorithm.
|
|
|
17 elementary ops plus an indexed load, if the machine
|
|
|
has "and not." */
|
|
|
|
|
|
-int nlz10b(unsigned x) {
|
|
|
-
|
|
|
- static char table[64] =
|
|
|
- {32,20,19, u, u,18, u, 7, 10,17, u, u,14, u, 6, u,
|
|
|
- u, 9, u,16, u, u, 1,26, u,13, u, u,24, 5, u, u,
|
|
|
- u,21, u, 8,11, u,15, u, u, u, u, 2,27, 0,25, u,
|
|
|
- 22, u,12, u, u, 3,28, u, 23, u, 4,29, u, u,30,31};
|
|
|
-
|
|
|
- x = x | (x >> 1); // Propagate leftmost
|
|
|
- x = x | (x >> 2); // 1-bit to the right.
|
|
|
- x = x | (x >> 4);
|
|
|
- x = x | (x >> 8);
|
|
|
- x = x & ~(x >> 16);
|
|
|
- x = x*0xFD7049FF; // Activate this line or the following 3.
|
|
|
-// x = (x << 9) - x; // Multiply by 511.
|
|
|
-// x = (x << 11) - x; // Multiply by 2047.
|
|
|
-// x = (x << 14) - x; // Multiply by 16383.
|
|
|
- return table[x >> 26];
|
|
|
+int nlz10b(unsigned x)
|
|
|
+{
|
|
|
+ static char table[64] =
|
|
|
+ {32,20,19, u, u,18, u, 7, 10,17, u, u,14, u, 6, u,
|
|
|
+ u, 9, u,16, u, u, 1,26, u,13, u, u,24, 5, u, u,
|
|
|
+ u,21, u, 8,11, u,15, u, u, u, u, 2,27, 0,25, u,
|
|
|
+ 22, u,12, u, u, 3,28, u, 23, u, 4,29, u, u,30,31};
|
|
|
+
|
|
|
+ x = x | (x >> 1); // Propagate leftmost
|
|
|
+ x = x | (x >> 2); // 1-bit to the right.
|
|
|
+ x = x | (x >> 4);
|
|
|
+ x = x | (x >> 8);
|
|
|
+ x = x & ~(x >> 16);
|
|
|
+ x = x*0xFD7049FF; // Activate this line or the following 3.
|
|
|
+ // x = (x << 9) - x; // Multiply by 511.
|
|
|
+ // x = (x << 11) - x; // Multiply by 2047.
|
|
|
+ // x = (x << 14) - x; // Multiply by 16383.
|
|
|
+ return table[x >> 26];
|
|
|
}
|
|
|
|
|
|
int errors;
|
|
|
-void error(int x, int y) {
|
|
|
- errors = errors + 1;
|
|
|
- printf("Error for x = %08x, got %d\n", x, y);
|
|
|
+void error(int x, int y)
|
|
|
+{
|
|
|
+ errors = errors + 1;
|
|
|
+ printf("Error for x = %08x, got %d\n", x, y);
|
|
|
}
|
|
|
|
|
|
int main()
|