gpu_profiler.h 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. /*
  2. * Copyright (c) 2019 ARM Limited.
  3. *
  4. * SPDX-License-Identifier: MIT
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to
  8. * deal in the Software without restriction, including without limitation the
  9. * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10. * sell copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in all
  14. * copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22. * SOFTWARE.
  23. */
  24. #pragma once
  25. #include "value.h"
  26. #include <string>
  27. #include <unordered_map>
  28. #include <unordered_set>
  29. namespace hwcpipe
  30. {
  31. // The available GPU counters. Profiler implementations will support a subset of them.
  32. enum class GpuCounter
  33. {
  34. GpuCycles,
  35. VertexComputeCycles,
  36. FragmentCycles,
  37. TilerCycles,
  38. VertexComputeJobs,
  39. FragmentJobs,
  40. Pixels,
  41. Tiles,
  42. TransactionEliminations,
  43. EarlyZTests,
  44. EarlyZKilled,
  45. LateZTests,
  46. LateZKilled,
  47. Instructions,
  48. DivergedInstructions,
  49. ShaderCycles,
  50. ShaderArithmeticCycles,
  51. ShaderLoadStoreCycles,
  52. ShaderTextureCycles,
  53. CacheReadLookups,
  54. CacheWriteLookups,
  55. ExternalMemoryReadAccesses,
  56. ExternalMemoryWriteAccesses,
  57. ExternalMemoryReadStalls,
  58. ExternalMemoryWriteStalls,
  59. ExternalMemoryReadBytes,
  60. ExternalMemoryWriteBytes,
  61. MaxValue
  62. };
  63. // Mapping from GPU counter names to enum values. Used for JSON initialization.
  64. const std::unordered_map<std::string, GpuCounter> gpu_counter_names{
  65. {"GpuCycles", GpuCounter::GpuCycles},
  66. {"VertexComputeCycles", GpuCounter::VertexComputeCycles},
  67. {"FragmentCycles", GpuCounter::FragmentCycles},
  68. {"TilerCycles", GpuCounter::TilerCycles},
  69. {"VertexComputeJobs", GpuCounter::VertexComputeJobs},
  70. {"Tiles", GpuCounter::Tiles},
  71. {"TransactionEliminations", GpuCounter::TransactionEliminations},
  72. {"FragmentJobs", GpuCounter::FragmentJobs},
  73. {"Pixels", GpuCounter::Pixels},
  74. {"EarlyZTests", GpuCounter::EarlyZTests},
  75. {"EarlyZKilled", GpuCounter::EarlyZKilled},
  76. {"LateZTests", GpuCounter::LateZTests},
  77. {"LateZKilled", GpuCounter::LateZKilled},
  78. {"Instructions", GpuCounter::Instructions},
  79. {"DivergedInstructions", GpuCounter::DivergedInstructions},
  80. {"ShaderCycles", GpuCounter::ShaderCycles},
  81. {"ShaderArithmeticCycles", GpuCounter::ShaderArithmeticCycles},
  82. {"ShaderLoadStoreCycles", GpuCounter::ShaderLoadStoreCycles},
  83. {"ShaderTextureCycles", GpuCounter::ShaderTextureCycles},
  84. {"CacheReadLookups", GpuCounter::CacheReadLookups},
  85. {"CacheWriteLookups", GpuCounter::CacheWriteLookups},
  86. {"ExternalMemoryReadAccesses", GpuCounter::ExternalMemoryReadAccesses},
  87. {"ExternalMemoryWriteAccesses", GpuCounter::ExternalMemoryWriteAccesses},
  88. {"ExternalMemoryReadStalls", GpuCounter::ExternalMemoryReadStalls},
  89. {"ExternalMemoryWriteStalls", GpuCounter::ExternalMemoryWriteStalls},
  90. {"ExternalMemoryReadBytes", GpuCounter::ExternalMemoryReadBytes},
  91. {"ExternalMemoryWriteBytes", GpuCounter::ExternalMemoryWriteBytes},
  92. };
  93. // A hash function for GpuCounter values
  94. struct GpuCounterHash
  95. {
  96. template <typename T>
  97. std::size_t operator()(T t) const
  98. {
  99. return static_cast<std::size_t>(t);
  100. }
  101. };
  102. struct GpuCounterInfo
  103. {
  104. std::string desc;
  105. std::string unit;
  106. };
  107. // Mapping from each counter to its corresponding information (description and unit)
  108. const std::unordered_map<GpuCounter, GpuCounterInfo, GpuCounterHash> gpu_counter_info{
  109. {GpuCounter::GpuCycles, {"Number of GPU cycles", "cycles"}},
  110. {GpuCounter::VertexComputeCycles, {"Number of vertex/compute cycles", "cycles"}},
  111. {GpuCounter::FragmentCycles, {"Number of fragment cycles", "cycles"}},
  112. {GpuCounter::TilerCycles, {"Number of tiler cycles", "cycles"}},
  113. {GpuCounter::VertexComputeJobs, {"Number of vertex/compute jobs", "jobs"}},
  114. {GpuCounter::Tiles, {"Number of physical tiles written", "tiles"}},
  115. {GpuCounter::TransactionEliminations, {"Number of transaction eliminations", "tiles"}},
  116. {GpuCounter::FragmentJobs, {"Number of fragment jobs", "jobs"}},
  117. {GpuCounter::Pixels, {"Number of pixels shaded", "cycles"}},
  118. {GpuCounter::EarlyZTests, {"Early-Z tests performed", "tests"}},
  119. {GpuCounter::EarlyZKilled, {"Early-Z tests resulting in a kill", "tests"}},
  120. {GpuCounter::LateZTests, {"Late-Z tests performed", "tests"}},
  121. {GpuCounter::LateZKilled, {"Late-Z tests resulting in a kill", "tests"}},
  122. {GpuCounter::Instructions, {"Number of shader instructions", "instructions"}},
  123. {GpuCounter::DivergedInstructions, {"Number of diverged shader instructions", "instructions"}},
  124. {GpuCounter::ShaderCycles, {"Shader total cycles", "cycles"}},
  125. {GpuCounter::ShaderArithmeticCycles, {"Shader arithmetic cycles", "cycles"}},
  126. {GpuCounter::ShaderLoadStoreCycles, {"Shader load/store cycles", "cycles"}},
  127. {GpuCounter::ShaderTextureCycles, {"Shader texture cycles", "cycles"}},
  128. {GpuCounter::CacheReadLookups, {"Cache read lookups", "lookups"}},
  129. {GpuCounter::CacheWriteLookups, {"Cache write lookups", "lookups"}},
  130. {GpuCounter::ExternalMemoryReadAccesses, {"Reads from external memory", "accesses"}},
  131. {GpuCounter::ExternalMemoryWriteAccesses, {"Writes to external memory", "accesses"}},
  132. {GpuCounter::ExternalMemoryReadStalls, {"Stalls when reading from external memory", "stalls"}},
  133. {GpuCounter::ExternalMemoryWriteStalls, {"Stalls when writing to external memory", "stalls"}},
  134. {GpuCounter::ExternalMemoryReadBytes, {"Bytes read to external memory", "B"}},
  135. {GpuCounter::ExternalMemoryWriteBytes, {"Bytes written to external memory", "B"}},
  136. };
  137. typedef std::unordered_set<GpuCounter, GpuCounterHash> GpuCounterSet;
  138. typedef std::unordered_map<GpuCounter, Value, GpuCounterHash> GpuMeasurements;
  139. /** An interface for classes that collect GPU performance data. */
  140. class GpuProfiler
  141. {
  142. public:
  143. virtual ~GpuProfiler() = default;
  144. // Returns the enabled counters
  145. virtual const GpuCounterSet &enabled_counters() const = 0;
  146. // Returns the counters that the platform supports
  147. virtual const GpuCounterSet &supported_counters() const = 0;
  148. // Sets the enabled counters after initialization
  149. virtual void set_enabled_counters(GpuCounterSet counters) = 0;
  150. // Starts a profiling session
  151. virtual void run() = 0;
  152. // Sample the counters. Returns a map of measurements for the counters
  153. // that are both available and enabled.
  154. // A profiling session must be running when sampling the counters.
  155. virtual const GpuMeasurements &sample() = 0;
  156. // Stops the active profiling session
  157. virtual void stop() = 0;
  158. };
  159. } // namespace hwcpipe