profile.h 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "default.h"
  5. namespace embree
  6. {
  7. /*! helper structure for the implementation of the profile functions below */
  8. struct ProfileTimer
  9. {
  10. static const size_t N = 20;
  11. ProfileTimer () {}
  12. ProfileTimer (const size_t numSkip) : i(0), j(0), maxJ(0), numSkip(numSkip), t0(0)
  13. {
  14. for (size_t i=0; i<N; i++) names[i] = nullptr;
  15. for (size_t i=0; i<N; i++) dt_fst[i] = 0.0;
  16. for (size_t i=0; i<N; i++) dt_min[i] = pos_inf;
  17. for (size_t i=0; i<N; i++) dt_avg[i] = 0.0;
  18. for (size_t i=0; i<N; i++) dt_max[i] = neg_inf;
  19. }
  20. __forceinline void begin()
  21. {
  22. j=0;
  23. t0 = tj = getSeconds();
  24. }
  25. __forceinline void end() {
  26. absolute("total");
  27. i++;
  28. }
  29. __forceinline void operator() (const char* name) {
  30. relative(name);
  31. }
  32. __forceinline void absolute (const char* name)
  33. {
  34. const double t1 = getSeconds();
  35. const double dt = t1-t0;
  36. assert(names[j] == nullptr || names[j] == name);
  37. names[j] = name;
  38. if (i == 0) dt_fst[j] = dt;
  39. if (i>=numSkip) {
  40. dt_min[j] = min(dt_min[j],dt);
  41. dt_avg[j] = dt_avg[j] + dt;
  42. dt_max[j] = max(dt_max[j],dt);
  43. }
  44. j++;
  45. maxJ = max(maxJ,j);
  46. }
  47. __forceinline void relative (const char* name)
  48. {
  49. const double t1 = getSeconds();
  50. const double dt = t1-tj;
  51. tj = t1;
  52. assert(names[j] == nullptr || names[j] == name);
  53. names[j] = name;
  54. if (i == 0) dt_fst[j] = dt;
  55. if (i>=numSkip) {
  56. dt_min[j] = min(dt_min[j],dt);
  57. dt_avg[j] = dt_avg[j] + dt;
  58. dt_max[j] = max(dt_max[j],dt);
  59. }
  60. j++;
  61. maxJ = max(maxJ,j);
  62. }
  63. void print(size_t numElements)
  64. {
  65. for (size_t k=0; k<N; k++)
  66. dt_avg[k] /= double(i-numSkip);
  67. printf(" profile [M/s]:\n");
  68. for (size_t j=0; j<maxJ; j++)
  69. printf("%20s: fst = %7.2f M/s, min = %7.2f M/s, avg = %7.2f M/s, max = %7.2f M/s\n",
  70. names[j],numElements/dt_fst[j]*1E-6,numElements/dt_max[j]*1E-6,numElements/dt_avg[j]*1E-6,numElements/dt_min[j]*1E-6);
  71. printf(" profile [ms]:\n");
  72. for (size_t j=0; j<maxJ; j++)
  73. printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n",
  74. names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]);
  75. }
  76. void print()
  77. {
  78. printf(" profile:\n");
  79. for (size_t k=0; k<N; k++)
  80. dt_avg[k] /= double(i-numSkip);
  81. for (size_t j=0; j<maxJ; j++) {
  82. printf("%20s: fst = %7.2f ms, min = %7.2f ms, avg = %7.2f ms, max = %7.2fms\n",
  83. names[j],1000.0*dt_fst[j],1000.0*dt_min[j],1000.0*dt_avg[j],1000.0*dt_max[j]);
  84. }
  85. }
  86. double avg() {
  87. return dt_avg[maxJ-1]/double(i-numSkip);
  88. }
  89. private:
  90. size_t i;
  91. size_t j;
  92. size_t maxJ;
  93. size_t numSkip;
  94. double t0;
  95. double tj;
  96. const char* names[N];
  97. double dt_fst[N];
  98. double dt_min[N];
  99. double dt_avg[N];
  100. double dt_max[N];
  101. };
  102. /*! This function executes some code block multiple times and measured sections of it.
  103. Use the following way:
  104. profile(1,10,1000,[&](ProfileTimer& timer) {
  105. // code
  106. timer("A");
  107. // code
  108. timer("B");
  109. });
  110. */
  111. template<typename Closure>
  112. void profile(const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure)
  113. {
  114. ProfileTimer timer(numSkip);
  115. for (size_t i=0; i<numSkip+numIter; i++)
  116. {
  117. timer.begin();
  118. closure(timer);
  119. timer.end();
  120. }
  121. timer.print(numElements);
  122. }
  123. /*! similar as the function above, but the timer object comes externally */
  124. template<typename Closure>
  125. void profile(ProfileTimer& timer, const size_t numSkip, const size_t numIter, const size_t numElements, const Closure& closure)
  126. {
  127. timer = ProfileTimer(numSkip);
  128. for (size_t i=0; i<numSkip+numIter; i++)
  129. {
  130. timer.begin();
  131. closure(timer);
  132. timer.end();
  133. }
  134. timer.print(numElements);
  135. }
  136. }