Browse Source

Optimizations to improve start-up time (esp. in JavaScript)

rdb 10 years ago
parent
commit
41184b1189

+ 1 - 0
dtool/src/dtoolbase/p3dtoolbase_composite2.cxx

@@ -2,6 +2,7 @@
 #include "mutexWin32Impl.cxx"
 #include "mutexSpinlockImpl.cxx"
 #include "neverFreeMemory.cxx"
+#include "pdtoa.c"
 #include "pstrtod.cxx"
 #include "register_type.cxx"
 #include "typeHandle.cxx"

+ 435 - 0
dtool/src/dtoolbase/pdtoa.c

@@ -0,0 +1,435 @@
+/*
+See pdtoa.h for explanation.
+
+Copyright (C) 2014 Milo Yip
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "pdtoa.h"
+#include "cmath.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdint.h>
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif
+
+#define UINT64_C2(h, l) ((static_cast<uint64_t>(h) << 32) | static_cast<uint64_t>(l))
+
+struct DiyFp {
+  DiyFp() {}
+
+  DiyFp(uint64_t f, int e) : f(f), e(e) {}
+
+  DiyFp(double d) {
+    union {
+      double d;
+      uint64_t u64;
+    } u = { d };
+
+    int biased_e = (u.u64 & kDpExponentMask) >> kDpSignificandSize;
+    uint64_t significand = (u.u64 & kDpSignificandMask);
+    if (biased_e != 0) {
+      f = significand + kDpHiddenBit;
+      e = biased_e - kDpExponentBias;
+    }
+    else {
+      f = significand;
+      e = kDpMinExponent + 1;
+    }
+  }
+
+  DiyFp operator-(const DiyFp& rhs) const {
+    assert(e == rhs.e);
+    assert(f >= rhs.f);
+    return DiyFp(f - rhs.f, e);
+  }
+
+  DiyFp operator*(const DiyFp& rhs) const {
+#if defined(_MSC_VER) && defined(_M_AMD64)
+    uint64_t h;
+    uint64_t l = _umul128(f, rhs.f, &h);
+    if (l & (uint64_t(1) << 63)) // rounding
+      h++;
+    return DiyFp(h, e + rhs.e + 64);
+#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
+    unsigned __int128 p = static_cast<unsigned __int128>(f) * static_cast<unsigned __int128>(rhs.f);
+    uint64_t h = p >> 64;
+    uint64_t l = static_cast<uint64_t>(p);
+    if (l & (uint64_t(1) << 63)) // rounding
+      h++;
+    return DiyFp(h, e + rhs.e + 64);
+#else
+    const uint64_t M32 = 0xFFFFFFFF;
+    const uint64_t a = f >> 32;
+    const uint64_t b = f & M32;
+    const uint64_t c = rhs.f >> 32;
+    const uint64_t d = rhs.f & M32;
+    const uint64_t ac = a * c;
+    const uint64_t bc = b * c;
+    const uint64_t ad = a * d;
+    const uint64_t bd = b * d;
+    uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
+    tmp += 1U << 31;  /// mult_round
+    return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
+#endif
+  }
+
+  DiyFp Normalize() const {
+#if defined(_MSC_VER) && defined(_M_AMD64)
+    unsigned long index;
+    _BitScanReverse64(&index, f);
+    return DiyFp(f << (63 - index), e - (63 - index));
+#elif defined(__GNUC__)
+    int s = __builtin_clzll(f);
+    return DiyFp(f << s, e - s);
+#else
+    DiyFp res = *this;
+    while (!(res.f & kDpHiddenBit)) {
+      res.f <<= 1;
+      res.e--;
+    }
+    res.f <<= (kDiySignificandSize - kDpSignificandSize - 1);
+    res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 1);
+    return res;
+#endif
+  }
+
+  DiyFp NormalizeBoundary() const {
+#if defined(_MSC_VER) && defined(_M_AMD64)
+    unsigned long index;
+    _BitScanReverse64(&index, f);
+    return DiyFp (f << (63 - index), e - (63 - index));
+#else
+    DiyFp res = *this;
+    while (!(res.f & (kDpHiddenBit << 1))) {
+      res.f <<= 1;
+      res.e--;
+    }
+    res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
+    res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
+    return res;
+#endif
+  }
+
+  void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
+    DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
+    DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
+    mi.f <<= mi.e - pl.e;
+    mi.e = pl.e;
+    *plus = pl;
+    *minus = mi;
+  }
+
+  static const int kDiySignificandSize = 64;
+  static const int kDpSignificandSize = 52;
+  static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
+  static const int kDpMinExponent = -kDpExponentBias;
+  static const uint64_t kDpExponentMask = UINT64_C2(0x7FF00000, 0x00000000);
+  static const uint64_t kDpSignificandMask = UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
+  static const uint64_t kDpHiddenBit = UINT64_C2(0x00100000, 0x00000000);
+
+  uint64_t f;
+  int e;
+};
+
+inline static DiyFp GetCachedPower(int e, int* K) {
+  // 10^-348, 10^-340, ..., 10^340
+  static const uint64_t kCachedPowers_F[] = {
+    UINT64_C2(0xfa8fd5a0, 0x081c0288), UINT64_C2(0xbaaee17f, 0xa23ebf76),
+    UINT64_C2(0x8b16fb20, 0x3055ac76), UINT64_C2(0xcf42894a, 0x5dce35ea),
+    UINT64_C2(0x9a6bb0aa, 0x55653b2d), UINT64_C2(0xe61acf03, 0x3d1a45df),
+    UINT64_C2(0xab70fe17, 0xc79ac6ca), UINT64_C2(0xff77b1fc, 0xbebcdc4f),
+    UINT64_C2(0xbe5691ef, 0x416bd60c), UINT64_C2(0x8dd01fad, 0x907ffc3c),
+    UINT64_C2(0xd3515c28, 0x31559a83), UINT64_C2(0x9d71ac8f, 0xada6c9b5),
+    UINT64_C2(0xea9c2277, 0x23ee8bcb), UINT64_C2(0xaecc4991, 0x4078536d),
+    UINT64_C2(0x823c1279, 0x5db6ce57), UINT64_C2(0xc2109436, 0x4dfb5637),
+    UINT64_C2(0x9096ea6f, 0x3848984f), UINT64_C2(0xd77485cb, 0x25823ac7),
+    UINT64_C2(0xa086cfcd, 0x97bf97f4), UINT64_C2(0xef340a98, 0x172aace5),
+    UINT64_C2(0xb23867fb, 0x2a35b28e), UINT64_C2(0x84c8d4df, 0xd2c63f3b),
+    UINT64_C2(0xc5dd4427, 0x1ad3cdba), UINT64_C2(0x936b9fce, 0xbb25c996),
+    UINT64_C2(0xdbac6c24, 0x7d62a584), UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
+    UINT64_C2(0xf3e2f893, 0xdec3f126), UINT64_C2(0xb5b5ada8, 0xaaff80b8),
+    UINT64_C2(0x87625f05, 0x6c7c4a8b), UINT64_C2(0xc9bcff60, 0x34c13053),
+    UINT64_C2(0x964e858c, 0x91ba2655), UINT64_C2(0xdff97724, 0x70297ebd),
+    UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), UINT64_C2(0xf8a95fcf, 0x88747d94),
+    UINT64_C2(0xb9447093, 0x8fa89bcf), UINT64_C2(0x8a08f0f8, 0xbf0f156b),
+    UINT64_C2(0xcdb02555, 0x653131b6), UINT64_C2(0x993fe2c6, 0xd07b7fac),
+    UINT64_C2(0xe45c10c4, 0x2a2b3b06), UINT64_C2(0xaa242499, 0x697392d3),
+    UINT64_C2(0xfd87b5f2, 0x8300ca0e), UINT64_C2(0xbce50864, 0x92111aeb),
+    UINT64_C2(0x8cbccc09, 0x6f5088cc), UINT64_C2(0xd1b71758, 0xe219652c),
+    UINT64_C2(0x9c400000, 0x00000000), UINT64_C2(0xe8d4a510, 0x00000000),
+    UINT64_C2(0xad78ebc5, 0xac620000), UINT64_C2(0x813f3978, 0xf8940984),
+    UINT64_C2(0xc097ce7b, 0xc90715b3), UINT64_C2(0x8f7e32ce, 0x7bea5c70),
+    UINT64_C2(0xd5d238a4, 0xabe98068), UINT64_C2(0x9f4f2726, 0x179a2245),
+    UINT64_C2(0xed63a231, 0xd4c4fb27), UINT64_C2(0xb0de6538, 0x8cc8ada8),
+    UINT64_C2(0x83c7088e, 0x1aab65db), UINT64_C2(0xc45d1df9, 0x42711d9a),
+    UINT64_C2(0x924d692c, 0xa61be758), UINT64_C2(0xda01ee64, 0x1a708dea),
+    UINT64_C2(0xa26da399, 0x9aef774a), UINT64_C2(0xf209787b, 0xb47d6b85),
+    UINT64_C2(0xb454e4a1, 0x79dd1877), UINT64_C2(0x865b8692, 0x5b9bc5c2),
+    UINT64_C2(0xc83553c5, 0xc8965d3d), UINT64_C2(0x952ab45c, 0xfa97a0b3),
+    UINT64_C2(0xde469fbd, 0x99a05fe3), UINT64_C2(0xa59bc234, 0xdb398c25),
+    UINT64_C2(0xf6c69a72, 0xa3989f5c), UINT64_C2(0xb7dcbf53, 0x54e9bece),
+    UINT64_C2(0x88fcf317, 0xf22241e2), UINT64_C2(0xcc20ce9b, 0xd35c78a5),
+    UINT64_C2(0x98165af3, 0x7b2153df), UINT64_C2(0xe2a0b5dc, 0x971f303a),
+    UINT64_C2(0xa8d9d153, 0x5ce3b396), UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
+    UINT64_C2(0xbb764c4c, 0xa7a44410), UINT64_C2(0x8bab8eef, 0xb6409c1a),
+    UINT64_C2(0xd01fef10, 0xa657842c), UINT64_C2(0x9b10a4e5, 0xe9913129),
+    UINT64_C2(0xe7109bfb, 0xa19c0c9d), UINT64_C2(0xac2820d9, 0x623bf429),
+    UINT64_C2(0x80444b5e, 0x7aa7cf85), UINT64_C2(0xbf21e440, 0x03acdd2d),
+    UINT64_C2(0x8e679c2f, 0x5e44ff8f), UINT64_C2(0xd433179d, 0x9c8cb841),
+    UINT64_C2(0x9e19db92, 0xb4e31ba9), UINT64_C2(0xeb96bf6e, 0xbadf77d9),
+    UINT64_C2(0xaf87023b, 0x9bf0ee6b)
+  };
+  static const int16_t kCachedPowers_E[] = {
+    -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007,  -980,
+     -954,  -927,  -901,  -874,  -847,  -821,  -794,  -768,  -741,  -715,
+     -688,  -661,  -635,  -608,  -582,  -555,  -529,  -502,  -475,  -449,
+     -422,  -396,  -369,  -343,  -316,  -289,  -263,  -236,  -210,  -183,
+     -157,  -130,  -103,   -77,   -50,   -24,     3,    30,    56,    83,
+      109,   136,   162,   189,   216,   242,   269,   295,   322,   348,
+      375,   402,   428,   455,   481,   508,   534,   561,   588,   614,
+      641,   667,   694,   720,   747,   774,   800,   827,   853,   880,
+      907,   933,   960,   986,  1013,  1039,  1066
+  };
+
+  //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
+  double dk = (-61 - e) * 0.30102999566398114 + 347;  // dk must be positive, so can do ceiling in positive
+  int k = static_cast<int>(dk);
+  if (k != dk)
+    k++;
+
+  unsigned index = static_cast<unsigned>((k >> 3) + 1);
+  *K = -(-348 + static_cast<int>(index << 3));  // decimal exponent no need lookup table
+
+  assert(index < sizeof(kCachedPowers_F) / sizeof(kCachedPowers_F[0]));
+  return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
+}
+
+inline static void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w) {
+  while (rest < wp_w && delta - rest >= ten_kappa &&
+       (rest + ten_kappa < wp_w ||  /// closer
+      wp_w - rest > rest + ten_kappa - wp_w)) {
+    buffer[len - 1]--;
+    rest += ten_kappa;
+  }
+}
+
+inline static unsigned CountDecimalDigit32(uint32_t n) {
+  // Simple pure C++ implementation was faster than __builtin_clz version in this situation.
+  if (n < 10) return 1;
+  if (n < 100) return 2;
+  if (n < 1000) return 3;
+  if (n < 10000) return 4;
+  if (n < 100000) return 5;
+  if (n < 1000000) return 6;
+  if (n < 10000000) return 7;
+  if (n < 100000000) return 8;
+  if (n < 1000000000) return 9;
+  return 10;
+}
+
+inline static void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K) {
+  static const uint32_t kPow10[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 };
+  const DiyFp one(uint64_t(1) << -Mp.e, Mp.e);
+  const DiyFp wp_w = Mp - W;
+  uint32_t p1 = static_cast<uint32_t>(Mp.f >> -one.e);
+  uint64_t p2 = Mp.f & (one.f - 1);
+  int kappa = static_cast<int>(CountDecimalDigit32(p1));
+  *len = 0;
+
+  while (kappa > 0) {
+    uint32_t d;
+    switch (kappa) {
+      case 10: d = p1 / 1000000000; p1 %= 1000000000; break;
+      case  9: d = p1 /  100000000; p1 %=  100000000; break;
+      case  8: d = p1 /   10000000; p1 %=   10000000; break;
+      case  7: d = p1 /    1000000; p1 %=    1000000; break;
+      case  6: d = p1 /     100000; p1 %=     100000; break;
+      case  5: d = p1 /      10000; p1 %=      10000; break;
+      case  4: d = p1 /       1000; p1 %=       1000; break;
+      case  3: d = p1 /        100; p1 %=        100; break;
+      case  2: d = p1 /         10; p1 %=         10; break;
+      case  1: d = p1;              p1 =           0; break;
+      default:
+#if defined(_MSC_VER)
+        __assume(0);
+#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
+        __builtin_unreachable();
+#else
+        d = 0;
+#endif
+    }
+    if (d || *len)
+      buffer[(*len)++] = '0' + static_cast<char>(d);
+    kappa--;
+    uint64_t tmp = (static_cast<uint64_t>(p1) << -one.e) + p2;
+    if (tmp <= delta) {
+      *K += kappa;
+      GrisuRound(buffer, *len, delta, tmp, static_cast<uint64_t>(kPow10[kappa]) << -one.e, wp_w.f);
+      return;
+    }
+  }
+
+  // kappa = 0
+  for (;;) {
+    p2 *= 10;
+    delta *= 10;
+    char d = static_cast<char>(p2 >> -one.e);
+    if (d || *len)
+      buffer[(*len)++] = '0' + d;
+    p2 &= one.f - 1;
+    kappa--;
+    if (p2 < delta) {
+      *K += kappa;
+      GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * kPow10[-kappa]);
+      return;
+    }
+  }
+}
+
+inline static void Grisu2(double value, char* buffer, int* length, int* K) {
+  const DiyFp v(value);
+  DiyFp w_m, w_p;
+  v.NormalizedBoundaries(&w_m, &w_p);
+
+  const DiyFp c_mk = GetCachedPower(w_p.e, K);
+  const DiyFp W = v.Normalize() * c_mk;
+  DiyFp Wp = w_p * c_mk;
+  DiyFp Wm = w_m * c_mk;
+  Wm.f++;
+  Wp.f--;
+  DigitGen(W, Wp, Wp.f - Wm.f, buffer, length, K);
+}
+
+static const char cDigitsLut[200] = {
+  '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9',
+  '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9',
+  '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9',
+  '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9',
+  '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9',
+  '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9',
+  '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
+  '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9',
+  '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9',
+  '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'
+};
+
+inline void WriteExponent(int K, char* buffer) {
+  if (K < 0) {
+    *buffer++ = '-';
+    K = -K;
+  }
+
+  if (K >= 100) {
+    *buffer++ = '0' + static_cast<char>(K / 100);
+    K %= 100;
+    const char* d = cDigitsLut + K * 2;
+    *buffer++ = d[0];
+    *buffer++ = d[1];
+  }
+  else if (K >= 10) {
+    const char* d = cDigitsLut + K * 2;
+    *buffer++ = d[0];
+    *buffer++ = d[1];
+  }
+  else
+    *buffer++ = '0' + static_cast<char>(K);
+
+  *buffer = '\0';
+}
+
+inline static void Prettify(char* buffer, int length, int k) {
+  const int kk = length + k;  // 10^(kk-1) <= v < 10^kk
+
+  if (length <= kk && kk <= 21) {
+    // 1234e7 -> 12340000000
+    for (int i = length; i < kk; i++)
+      buffer[i] = '0';
+    buffer[kk] = '.';
+    buffer[kk + 1] = '0';
+    buffer[kk + 2] = '\0';
+  }
+  else if (0 < kk && kk <= 21) {
+    // 1234e-2 -> 12.34
+    memmove(&buffer[kk + 1], &buffer[kk], length - kk);
+    buffer[kk] = '.';
+    buffer[length + 1] = '\0';
+  }
+  else if (-6 < kk && kk <= 0) {
+    // 1234e-6 -> 0.001234
+    const int offset = 2 - kk;
+    memmove(&buffer[offset], &buffer[0], length);
+    buffer[0] = '0';
+    buffer[1] = '.';
+    for (int i = 2; i < offset; i++)
+      buffer[i] = '0';
+    buffer[length + offset] = '\0';
+  }
+  else if (length == 1) {
+    // 1e30
+    buffer[1] = 'e';
+    WriteExponent(kk - 1, &buffer[2]);
+  }
+  else {
+    // 1234e30 -> 1.234e33
+    memmove(&buffer[2], &buffer[1], length - 1);
+    buffer[1] = '.';
+    buffer[length + 1] = 'e';
+    WriteExponent(kk - 1, &buffer[0 + length + 2]);
+  }
+}
+
+void pdtoa(double value, char *buffer) {
+#ifdef _MSC_VER
+  if (copysign(1.0, value) < 0) {
+#else
+  if (signbit(value)) {
+#endif
+    *buffer++ = '-';
+    value = -value;
+  }
+  if (cinf(value)) {
+    buffer[0] = 'i';
+    buffer[1] = 'n';
+    buffer[2] = 'f';
+    buffer[3] = '\0';
+  } else if (cnan(value)) {
+    buffer[0] = 'n';
+    buffer[1] = 'a';
+    buffer[2] = 'n';
+    buffer[3] = '\0';
+  } else if (value == 0.0) {
+    buffer[0] = '0';
+    buffer[1] = '.';
+    buffer[2] = '0';
+    buffer[3] = '\0';
+  } else if (value == 1.0) {
+    buffer[0] = '1';
+    buffer[1] = '.';
+    buffer[2] = '0';
+    buffer[3] = '\0';
+  } else {
+    int length, K;
+    Grisu2(value, buffer, &length, &K);
+    Prettify(buffer, length, K);
+  }
+}

+ 24 - 0
dtool/src/dtoolbase/pdtoa.h

@@ -0,0 +1,24 @@
+/*
+  This is a double-to-string conversion implementation by Milo Yip from:
+    https://github.com/miloyip/dtoa-benchmark
+
+  I introduced it because the ostringstream implementation is just too
+  darned slow, especially when compiled to JavaScript.
+*/
+
+#ifndef PDTOA_H
+#define PDTOA_H
+
+#include "dtoolsymbols.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+EXPCL_DTOOL void pdtoa(double value, char *buffer);
+
+#ifdef __cplusplus
+};  /* end of extern "C" */
+#endif
+
+#endif  // PDTOA_H

+ 78 - 0
dtool/src/dtoolutil/string_utils.I

@@ -20,3 +20,81 @@ format_string(const Thing &thing) {
   str << thing;
   return str.str();
 }
+
+INLINE string
+format_string(const string &value) {
+  return value;
+}
+
+INLINE string
+format_string(float value) {
+  char buffer[32];
+  pdtoa(value, buffer);
+  return string(buffer);
+}
+
+INLINE string
+format_string(double value) {
+  char buffer[32];
+  pdtoa(value, buffer);
+  return string(buffer);
+}
+
+INLINE string
+format_string(unsigned int value) {
+  char buffer[11];
+  char *p = buffer + 10;
+  *p = 0;
+  do {
+    *--p = '0' + (value % 10);
+    value /= 10;
+  } while (value > 0);
+
+  return string(p);
+}
+
+INLINE string
+format_string(int value) {
+  char buffer[12];
+  char *p = buffer + 11;
+  *p = 0;
+
+  if (value < 0) {
+    unsigned int posv = (unsigned int)-value;
+    do {
+      *--p = '0' + (posv % 10);
+      posv /= 10;
+    } while (posv > 0);
+    *--p = '-';
+  } else {
+    do {
+      *--p = '0' + (value % 10);
+      value /= 10;
+    } while (value > 0);
+  }
+
+  return string(p);
+}
+
+INLINE string
+format_string(PN_int64 value) {
+  char buffer[21];
+  char *p = buffer + 20;
+  *p = 0;
+
+  if (value < 0) {
+    PN_uint64 posv = (PN_uint64)-value;
+    do {
+      *--p = '0' + (posv % 10);
+      posv /= 10;
+    } while (posv > 0);
+    *--p = '-';
+  } else {
+    do {
+      *--p = '0' + (value % 10);
+      value /= 10;
+    } while (value > 0);
+  }
+
+  return string(p);
+}

+ 9 - 1
dtool/src/dtoolutil/string_utils.h

@@ -19,6 +19,7 @@
 
 #include <string>
 #include "vector_string.h"
+#include "pdtoa.h"
 
 // Case-insensitive string comparison, from Stroustrup's C++ third edition.
 // Works like strcmp().
@@ -66,7 +67,14 @@ EXPCL_DTOOL bool string_to_stdfloat(const string &str, PN_stdfloat &result);
 template<class Thing>
 INLINE string format_string(const Thing &thing);
 
+// Fast specializations for some primitive types.
+INLINE string format_string(const string &value);
+INLINE string format_string(float value);
+INLINE string format_string(double value);
+INLINE string format_string(unsigned int value);
+INLINE string format_string(int value);
+INLINE string format_string(PN_int64 value);
+
 #include "string_utils.I"
 
 #endif
-

+ 5 - 15
dtool/src/prc/configDeclaration.cxx

@@ -16,7 +16,7 @@
 #include "configVariableCore.h"
 #include "config_prc.h"
 #include "pstrtod.h"
-
+#include "string_utils.h"
 
 ////////////////////////////////////////////////////////////////////
 //     Function: ConfigDeclaration::Constructor
@@ -93,11 +93,7 @@ set_string_word(int n, const string &value) {
 ////////////////////////////////////////////////////////////////////
 void ConfigDeclaration::
 set_bool_word(int n, bool value) {
-  if (value) {
-    set_string_word(n, "1");
-  } else {
-    set_string_word(n, "0");
-  }
+  set_string_word(n, value ? "1" : "0");
 
   _words[n]._flags |= (F_checked_bool | F_valid_bool);
   _words[n]._bool = value;
@@ -112,9 +108,7 @@ set_bool_word(int n, bool value) {
 ////////////////////////////////////////////////////////////////////
 void ConfigDeclaration::
 set_int_word(int n, int value) {
-  ostringstream strm;
-  strm << value;
-  set_string_word(n, strm.str());
+  set_string_word(n, format_string(value));
 
   _words[n]._flags |= (F_checked_int | F_valid_int);
   _words[n]._int = value;
@@ -129,9 +123,7 @@ set_int_word(int n, int value) {
 ////////////////////////////////////////////////////////////////////
 void ConfigDeclaration::
 set_int64_word(int n, PN_int64 value) {
-  ostringstream strm;
-  strm << value;
-  set_string_word(n, strm.str());
+  set_string_word(n, format_string(value));
 
   _words[n]._flags |= (F_checked_int64 | F_valid_int64);
   _words[n]._int_64 = value;
@@ -146,9 +138,7 @@ set_int64_word(int n, PN_int64 value) {
 ////////////////////////////////////////////////////////////////////
 void ConfigDeclaration::
 set_double_word(int n, double value) {
-  ostringstream strm;
-  strm << value;
-  set_string_word(n, strm.str());
+  set_string_word(n, format_string(value));
 
   _words[n]._flags |= (F_checked_double | F_valid_double);
   _words[n]._double = value;

+ 2 - 4
dtool/src/prc/configVariableDouble.cxx

@@ -13,6 +13,7 @@
 ////////////////////////////////////////////////////////////////////
 
 #include "configVariableDouble.h"
+#include "string_utils.h"
 
 ////////////////////////////////////////////////////////////////////
 //     Function: ConfigVariableDouble::set_default_value
@@ -21,8 +22,5 @@
 ////////////////////////////////////////////////////////////////////
 void ConfigVariableDouble::
 set_default_value(double default_value) {
-  ostringstream strm;
-  strm << default_value;
-
-  _core->set_default_value(strm.str());
+  _core->set_default_value(format_string(default_value));
 }

+ 2 - 4
dtool/src/prc/configVariableInt.cxx

@@ -13,6 +13,7 @@
 ////////////////////////////////////////////////////////////////////
 
 #include "configVariableInt.h"
+#include "string_utils.h"
 
 ////////////////////////////////////////////////////////////////////
 //     Function: ConfigVariableInt::set_default_value
@@ -21,8 +22,5 @@
 ////////////////////////////////////////////////////////////////////
 void ConfigVariableInt::
 set_default_value(int default_value) {
-  ostringstream strm;
-  strm << default_value;
-
-  _core->set_default_value(strm.str());
+  _core->set_default_value(format_string(default_value));
 }

+ 2 - 4
dtool/src/prc/configVariableInt64.cxx

@@ -13,6 +13,7 @@
 ////////////////////////////////////////////////////////////////////
 
 #include "configVariableInt64.h"
+#include "string_utils.h"
 
 ////////////////////////////////////////////////////////////////////
 //     Function: ConfigVariableInt64::set_default_value
@@ -21,8 +22,5 @@
 ////////////////////////////////////////////////////////////////////
 void ConfigVariableInt64::
 set_default_value(PN_int64 default_value) {
-  ostringstream strm;
-  strm << default_value;
-
-  _core->set_default_value(strm.str());
+  _core->set_default_value(format_string(default_value));
 }

+ 4 - 8
panda/src/gobj/texture.cxx

@@ -8146,9 +8146,7 @@ do_fillin_body(CData *cdata, DatagramIterator &scan, BamReader *manager) {
 
     size_t u_size = scan.get_uint32();
     PTA_uchar image = PTA_uchar::empty_array(u_size, get_class_type());
-    for (size_t u_idx = 0; u_idx < u_size; ++u_idx) {
-      image[(int)u_idx] = scan.get_uint8();
-    }
+    scan.extract_bytes(image.p(), u_size);
 
     cdata->_simple_ram_image._image = image;
     cdata->_simple_ram_image._page_size = u_size;
@@ -8201,13 +8199,11 @@ do_fillin_rawdata(CData *cdata, DatagramIterator &scan, BamReader *manager) {
       cdata->_ram_images[n]._page_size = scan.get_uint32();
     }
 
-    size_t u_size = scan.get_uint32();
-
     // fill the cdata->_image buffer with image data
+    size_t u_size = scan.get_uint32();
     PTA_uchar image = PTA_uchar::empty_array(u_size, get_class_type());
-    for (size_t u_idx = 0; u_idx < u_size; ++u_idx) {
-      image[(int)u_idx] = scan.get_uint8();
-    }
+    scan.extract_bytes(image.p(), u_size);
+
     cdata->_ram_images[n]._image = image;
   }
   cdata->_loaded_from_image = true;

+ 17 - 3
panda/src/linmath/configVariableColor.cxx

@@ -13,6 +13,7 @@
 ////////////////////////////////////////////////////////////////////
 
 #include "configVariableColor.h"
+#include "pdtoa.h"
 
 ////////////////////////////////////////////////////////////////////
 //     Function: ConfigVariableColor::set_default_value
@@ -21,8 +22,21 @@
 ////////////////////////////////////////////////////////////////////
 void ConfigVariableColor::
 set_default_value(const LColor &default_value) {
-  ostringstream strm;
-  strm << default_value;
+  char buffer[128];
+  char *p = buffer;
+  pdtoa(default_value[0], p);
 
-  _core->set_default_value(strm.str());
+  p += strlen(p);
+  *p++ = ' ';
+  pdtoa(default_value[1], p);
+
+  p += strlen(p);
+  *p++ = ' ';
+  pdtoa(default_value[2], p);
+
+  p += strlen(p);
+  *p++ = ' ';
+  pdtoa(default_value[3], p);
+
+  _core->set_default_value(buffer);
 }

+ 5 - 0
panda/src/linmath/dblnames.h

@@ -38,8 +38,13 @@
 #undef FLOATTOKEN
 #undef FLOATCONST
 #undef FLOATTYPE_IS_INT
+#undef STRINGIFY
+#undef FLOATNAME_STR
 
 #define FLOATTYPE double
 #define FLOATNAME(ARG) ARG##d
 #define FLOATTOKEN 'd'
 #define FLOATCONST(ARG) ARG
+
+#define STRINGIFY(ARG) #ARG
+#define FLOATNAME_STR(ARG) STRINGIFY(ARG##d)

+ 5 - 0
panda/src/linmath/fltnames.h

@@ -38,8 +38,13 @@
 #undef FLOATTOKEN
 #undef FLOATCONST
 #undef FLOATTYPE_IS_INT
+#undef STRINGIFY
+#undef FLOATNAME_STR
 
 #define FLOATTYPE float
 #define FLOATNAME(ARG) ARG##f
 #define FLOATTOKEN 'f'
 #define FLOATCONST(ARG) ARG##f
+
+#define STRINGIFY(ARG) #ARG
+#define FLOATNAME_STR(ARG) STRINGIFY(ARG##f)

+ 5 - 0
panda/src/linmath/intnames.h

@@ -38,9 +38,14 @@
 #undef FLOATTOKEN
 #undef FLOATCONST
 #undef FLOATTYPE_IS_INT
+#undef STRINGIFY
+#undef FLOATNAME_STR
 
 #define FLOATTYPE int
 #define FLOATNAME(ARG) ARG##i
 #define FLOATTOKEN 'i'
 #define FLOATCONST(ARG) ARG
 #define FLOATTYPE_IS_INT
+
+#define STRINGIFY(ARG) #ARG
+#define FLOATNAME_STR(ARG) STRINGIFY(ARG##i)

+ 1 - 3
panda/src/linmath/lmatrix3_src.cxx

@@ -512,8 +512,6 @@ void FLOATNAME(LMatrix3)::
 init_type() {
   if (_type_handle == TypeHandle::none()) {
     // Format a string to describe the type.
-    string name = "LMatrix3";
-    name += FLOATTOKEN;
-    register_type(_type_handle, name);
+    register_type(_type_handle, FLOATNAME_STR(LMatrix3));
   }
 }

+ 2 - 6
panda/src/linmath/lmatrix4_src.cxx

@@ -593,9 +593,7 @@ void FLOATNAME(LMatrix4)::
 init_type() {
   if (_type_handle == TypeHandle::none()) {
     // Format a string to describe the type.
-    string name = "LMatrix4";
-    name += FLOATTOKEN;
-    register_type(_type_handle, name);
+    register_type(_type_handle, FLOATNAME_STR(LMatrix4));
   }
 }
 
@@ -608,8 +606,6 @@ void FLOATNAME(UnalignedLMatrix4)::
 init_type() {
   if (_type_handle == TypeHandle::none()) {
     // Format a string to describe the type.
-    string name = "UnalignedLMatrix4";
-    name += FLOATTOKEN;
-    register_type(_type_handle, name);
+    register_type(_type_handle, FLOATNAME_STR(UnalignedLMatrix4));
   }
 }

+ 1 - 3
panda/src/linmath/lorientation_src.cxx

@@ -23,9 +23,7 @@ void FLOATNAME(LOrientation)::
 init_type() {
   if (_type_handle == TypeHandle::none()) {
     FLOATNAME(LQuaternion)::init_type();
-    string name = "LOrientation";
-    name += FLOATTOKEN;
-    register_type(_type_handle, name,
+    register_type(_type_handle, FLOATNAME_STR(LOrientation),
                   FLOATNAME(LQuaternion)::get_class_type());
   }
 }

+ 1 - 3
panda/src/linmath/lpoint2_src.cxx

@@ -23,9 +23,7 @@ void FLOATNAME(LPoint2)::
 init_type() {
   if (_type_handle == TypeHandle::none()) {
     FLOATNAME(LVecBase2)::init_type();
-    string name = "LPoint2";
-    name += FLOATTOKEN;
-    register_type(_type_handle, name,
+    register_type(_type_handle, FLOATNAME_STR(LPoint2),
                   FLOATNAME(LVecBase2)::get_class_type());
   }
 }

+ 1 - 3
panda/src/linmath/lpoint3_src.cxx

@@ -23,9 +23,7 @@ void FLOATNAME(LPoint3)::
 init_type() {
   if (_type_handle == TypeHandle::none()) {
     FLOATNAME(LVecBase3)::init_type();
-    string name = "LPoint3";
-    name += FLOATTOKEN;
-    register_type(_type_handle, name,
+    register_type(_type_handle, FLOATNAME_STR(LPoint3),
                   FLOATNAME(LVecBase3)::get_class_type());
   }
 }

+ 1 - 3
panda/src/linmath/lpoint4_src.cxx

@@ -23,9 +23,7 @@ void FLOATNAME(LPoint4)::
 init_type() {
   if (_type_handle == TypeHandle::none()) {
     FLOATNAME(LVecBase4)::init_type();
-    string name = "LPoint4";
-    name += FLOATTOKEN;
-    register_type(_type_handle, name,
+    register_type(_type_handle, FLOATNAME_STR(LPoint4),
                   FLOATNAME(LVecBase4)::get_class_type());
   }
 }

+ 1 - 3
panda/src/linmath/lquaternion_src.cxx

@@ -323,9 +323,7 @@ void FLOATNAME(LQuaternion)::
 init_type() {
   if (_type_handle == TypeHandle::none()) {
     FLOATNAME(LVecBase4)::init_type();
-    string name = "LQuaternion";
-    name += FLOATTOKEN;
-    register_type(_type_handle, name,
+    register_type(_type_handle, FLOATNAME_STR(LQuaternion),
                   FLOATNAME(LVecBase4)::get_class_type());
   }
 }

+ 1 - 3
panda/src/linmath/lrotation_src.cxx

@@ -24,9 +24,7 @@ init_type() {
   if (_type_handle == TypeHandle::none()) {
     FLOATNAME(LQuaternion)::init_type();
     // Format a string to describe the type.
-    string name = "LRotation";
-    name += FLOATTOKEN;
-    register_type(_type_handle, name,
+    register_type(_type_handle, FLOATNAME_STR(LRotation),
                   FLOATNAME(LQuaternion)::get_class_type());
   }
 }

+ 1 - 3
panda/src/linmath/lvecBase2_src.cxx

@@ -30,8 +30,6 @@ void FLOATNAME(LVecBase2)::
 init_type() {
   if (_type_handle == TypeHandle::none()) {
     // Format a string to describe the type.
-    string name = "LVecBase2";
-    name += FLOATTOKEN;
-    register_type(_type_handle, name);
+    register_type(_type_handle, FLOATNAME_STR(LVecBase2));
   }
 }

+ 1 - 3
panda/src/linmath/lvecBase3_src.cxx

@@ -33,9 +33,7 @@ void FLOATNAME(LVecBase3)::
 init_type() {
   if (_type_handle == TypeHandle::none()) {
     // Format a string to describe the type.
-    string name = "LVecBase3";
-    name += FLOATTOKEN;
-    register_type(_type_handle, name);
+    register_type(_type_handle, FLOATNAME_STR(LVecBase3));
   }
 }
 

+ 2 - 6
panda/src/linmath/lvecBase4_src.cxx

@@ -35,9 +35,7 @@ void FLOATNAME(LVecBase4)::
 init_type() {
   if (_type_handle == TypeHandle::none()) {
     // Format a string to describe the type.
-    string name = "LVecBase4";
-    name += FLOATTOKEN;
-    register_type(_type_handle, name);
+    register_type(_type_handle, FLOATNAME_STR(LVecBase4));
   }
 }
 
@@ -51,9 +49,7 @@ void FLOATNAME(UnalignedLVecBase4)::
 init_type() {
   if (_type_handle == TypeHandle::none()) {
     // Format a string to describe the type.
-    string name = "UnalignedLVecBase4";
-    name += FLOATTOKEN;
-    register_type(_type_handle, name);
+    register_type(_type_handle, FLOATNAME_STR(UnalignedLVecBase4));
   }
 }
 

+ 1 - 3
panda/src/linmath/lvector2_src.cxx

@@ -23,9 +23,7 @@ void FLOATNAME(LVector2)::
 init_type() {
   if (_type_handle == TypeHandle::none()) {
     FLOATNAME(LVecBase2)::init_type();
-    string name = "LVector2";
-    name += FLOATTOKEN;
-    register_type(_type_handle, name,
+    register_type(_type_handle, FLOATNAME_STR(LVector2),
                   FLOATNAME(LVecBase2)::get_class_type());
   }
 }

+ 1 - 3
panda/src/linmath/lvector3_src.cxx

@@ -23,9 +23,7 @@ void FLOATNAME(LVector3)::
 init_type() {
   if (_type_handle == TypeHandle::none()) {
     FLOATNAME(LVecBase3)::init_type();
-    string name = "LVector3";
-    name += FLOATTOKEN;
-    register_type(_type_handle, name,
+    register_type(_type_handle, FLOATNAME_STR(LVector3),
                   FLOATNAME(LVecBase3)::get_class_type());
   }
 }

+ 1 - 3
panda/src/linmath/lvector4_src.cxx

@@ -23,9 +23,7 @@ void FLOATNAME(LVector4)::
 init_type() {
   if (_type_handle == TypeHandle::none()) {
     FLOATNAME(LVecBase4)::init_type();
-    string name = "LVector4";
-    name += FLOATTOKEN;
-    register_type(_type_handle, name,
+    register_type(_type_handle, FLOATNAME_STR(LVector4),
                   FLOATNAME(LVecBase4)::get_class_type());
   }
 }

+ 17 - 0
panda/src/pstatclient/pStatClient.cxx

@@ -879,6 +879,10 @@ stop(int collector_index, int thread_index, double as_of) {
 ////////////////////////////////////////////////////////////////////
 void PStatClient::
 clear_level(int collector_index, int thread_index) {
+  if (!client_is_connected()) {
+    return;
+  }
+
 #ifdef _DEBUG
   nassertv(collector_index >= 0 && collector_index < AtomicAdjust::get(_num_collectors));
   nassertv(thread_index >= 0 && thread_index < AtomicAdjust::get(_num_threads));
@@ -903,6 +907,10 @@ clear_level(int collector_index, int thread_index) {
 ////////////////////////////////////////////////////////////////////
 void PStatClient::
 set_level(int collector_index, int thread_index, double level) {
+  if (!client_is_connected()) {
+    return;
+  }
+
 #ifdef _DEBUG
   nassertv(collector_index >= 0 && collector_index < AtomicAdjust::get(_num_collectors));
   nassertv(thread_index >= 0 && thread_index < AtomicAdjust::get(_num_threads));
@@ -936,6 +944,10 @@ set_level(int collector_index, int thread_index, double level) {
 ////////////////////////////////////////////////////////////////////
 void PStatClient::
 add_level(int collector_index, int thread_index, double increment) {
+  if (!client_is_connected()) {
+    return;
+  }
+
 #ifdef _DEBUG
   nassertv(collector_index >= 0 && collector_index < AtomicAdjust::get(_num_collectors));
   nassertv(thread_index >= 0 && thread_index < AtomicAdjust::get(_num_threads));
@@ -955,12 +967,17 @@ add_level(int collector_index, int thread_index, double increment) {
 //     Function: PStatClient::get_level
 //       Access: Private
 //  Description: Returns the current level value of the given collector.
+//               Returns 0.0 if the pstats client is not connected.
 //
 //               Normally you would not use this interface directly;
 //               instead, call PStatCollector::get_level().
 ////////////////////////////////////////////////////////////////////
 double PStatClient::
 get_level(int collector_index, int thread_index) const {
+  if (!client_is_connected()) {
+    return 0.0;
+  }
+
 #ifdef _DEBUG
   nassertr(collector_index >= 0 && collector_index < AtomicAdjust::get(_num_collectors), 0.0f);
   nassertr(thread_index >= 0 && thread_index < AtomicAdjust::get(_num_threads), 0.0f);