浏览代码

pstats: Optimize PStatFrameData (de-)serialization

This can be slow (hundreds of us, up to multiple ms) with large amounts of data points, this makes it an order of magnitude more efficient
rdb 3 年之前
父节点
当前提交
3254c6d329
共有 1 个文件被更改,包括 70 次插入22 次删除
  1. 70 22
      panda/src/pstatclient/pStatFrameData.cxx

+ 70 - 22
panda/src/pstatclient/pStatFrameData.cxx

@@ -34,7 +34,6 @@ sort_time() {
  */
 bool PStatFrameData::
 write_datagram(Datagram &destination, PStatClient *client) const {
-  Data::const_iterator di;
   if (_time_data.size() >= 65536 || _level_data.size() >= 65536) {
     pstats_cat.info()
       << "Dropping frame with " << _time_data.size()
@@ -43,16 +42,62 @@ write_datagram(Datagram &destination, PStatClient *client) const {
     return false;
   }
 
+#if !defined(WORDS_BIGENDIAN) || defined(__GNUC__)
+  // Hand-roll this, significantly more efficient for many data points
+  size_t size = (_time_data.size() + _level_data.size()) * 6 + 4;
+  PTA_uchar array = destination.modify_array();
+  size_t offset = array.size();
+  array.resize(offset + size);
+  unsigned char *data = &array[0] + offset;
+
+  uint16_t *ptr = (uint16_t *)data;
+
+#ifdef WORDS_BIGENDIAN
+  *ptr++ = __builtin_bswap16(_time_data.size());
+
+  for (const DataPoint &dp : _time_data) {
+    *ptr++ = __builtin_bswap16(dp._index);
+    PN_float32 v = (PN_float32)dp._value;
+    *(uint32_t *)ptr = __builtin_bswap32(reinterpret_cast<uint32_t &>(v));
+    ptr += 2;
+  }
+
+  *ptr++ = __builtin_bswap16(_level_data.size());
+  for (const DataPoint &dp : _level_data) {
+    *ptr++ = __builtin_bswap16(dp._index);
+    PN_float32 v = (PN_float32)dp._value;
+    *(uint32_t *)ptr = __builtin_bswap32(reinterpret_cast<uint32_t &>(v));
+    ptr += 2;
+  }
+#else
+  *ptr++ = _time_data.size();
+
+  for (const DataPoint &dp : _time_data) {
+    *ptr++ = dp._index;
+    *(PN_float32 *)ptr = dp._value;
+    ptr += 2;
+  }
+
+  *ptr++ = _level_data.size();
+  for (const DataPoint &dp : _level_data) {
+    *ptr++ = dp._index;
+    *(PN_float32 *)ptr = dp._value;
+    ptr += 2;
+  }
+#endif
+
+#else
   destination.add_uint16(_time_data.size());
-  for (di = _time_data.begin(); di != _time_data.end(); ++di) {
-    destination.add_uint16((*di)._index);
-    destination.add_float32((*di)._value);
+  for (const DataPoint &dp : _time_data) {
+    destination.add_uint16(dp._index);
+    destination.add_float32(dp._value);
   }
   destination.add_uint16(_level_data.size());
-  for (di = _level_data.begin(); di != _level_data.end(); ++di) {
-    destination.add_uint16((*di)._index);
-    destination.add_float32((*di)._value);
+  for (const DataPoint &dp : _level_data) {
+    destination.add_uint16(dp._index);
+    destination.add_float32(dp._value);
   }
+#endif
 
   return true;
 }
@@ -64,22 +109,25 @@ void PStatFrameData::
 read_datagram(DatagramIterator &source, PStatClientVersion *) {
   clear();
 
-  int i;
-  int time_size = source.get_uint16();
-  for (i = 0; i < time_size; i++) {
-    nassertv(source.get_remaining_size() > 0);
-    DataPoint dp;
-    dp._index = source.get_uint16();
-    dp._value = source.get_float32();
-    _time_data.push_back(dp);
+  {
+    size_t time_size = source.get_uint16();
+    _time_data.resize(time_size);
+    for (DataPoint &dp : _time_data) {
+      nassertv(source.get_remaining_size() > 0);
+      dp._index = source.get_uint16();
+      dp._value = source.get_float32();
+    }
   }
-  int level_size = source.get_uint16();
-  for (i = 0; i < level_size; i++) {
-    nassertv(source.get_remaining_size() > 0);
-    DataPoint dp;
-    dp._index = source.get_uint16();
-    dp._value = source.get_float32();
-    _level_data.push_back(dp);
+
+  {
+    size_t level_size = source.get_uint16();
+    _level_data.resize(level_size);
+    for (DataPoint &dp : _level_data) {
+      nassertv(source.get_remaining_size() > 0);
+      dp._index = source.get_uint16();
+      dp._value = source.get_float32();
+    }
   }
+
   nassertv(source.get_remaining_size() == 0);
 }