| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509 |
- #ifdef _WIN32
- #include <assert.h>
- #include <d3d11.h>
- #include <d3dcompiler.h>
- #include <stdio.h>
- #include <cassert>
- #include <cmath>
- #include <algorithm>
- #include <vector>
- #include "../src/meshoptimizer.h"
- #include "fast_obj.h"
- #pragma comment(lib, "d3d11.lib")
- #pragma comment(lib, "d3dcompiler.lib")
- #pragma comment(lib, "dxgi.lib")
- void stripGen(std::vector<unsigned int>& indices, int x0, int x1, int y0, int y1, int width, bool prefetch)
- {
- if (prefetch)
- {
- for (int x = x0; x < x1; x++)
- {
- indices.push_back(x + 0);
- indices.push_back(x + 0);
- indices.push_back(x + 1);
- }
- }
- for (int y = y0; y < y1; y++)
- {
- for (int x = x0; x < x1; x++)
- {
- indices.push_back((width + 1) * (y + 0) + (x + 0));
- indices.push_back((width + 1) * (y + 1) + (x + 0));
- indices.push_back((width + 1) * (y + 0) + (x + 1));
- indices.push_back((width + 1) * (y + 0) + (x + 1));
- indices.push_back((width + 1) * (y + 1) + (x + 0));
- indices.push_back((width + 1) * (y + 1) + (x + 1));
- }
- }
- }
- void gridGen(std::vector<unsigned int>& indices, int x0, int x1, int y0, int y1, int width, int cacheSize, bool prefetch)
- {
- if (x1 - x0 + 1 < cacheSize)
- {
- bool prefetchStrip = 2 * (x1 - x0) + 1 > cacheSize && prefetch;
- stripGen(indices, x0, x1, y0, y1, width, prefetchStrip);
- }
- else
- {
- int xm = x0 + cacheSize - 2;
- gridGen(indices, x0, xm, y0, y1, width, cacheSize, prefetch);
- gridGen(indices, xm, x1, y0, y1, width, cacheSize, prefetch);
- }
- }
- unsigned int queryVSInvocations(ID3D11Device* device, ID3D11DeviceContext* context, const unsigned int* indices, size_t index_count)
- {
- if (index_count == 0)
- return 0;
- ID3D11Buffer* ib = 0;
- {
- D3D11_BUFFER_DESC bd = {};
- bd.Usage = D3D11_USAGE_DYNAMIC;
- bd.ByteWidth = index_count * 4;
- bd.BindFlags = D3D11_BIND_INDEX_BUFFER;
- bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
- device->CreateBuffer(&bd, 0, &ib);
- D3D11_MAPPED_SUBRESOURCE ms;
- context->Map(ib, 0, D3D11_MAP_WRITE_DISCARD, 0, &ms);
- memcpy(ms.pData, indices, index_count * 4);
- context->Unmap(ib, 0);
- }
- context->IASetPrimitiveTopology(D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
- context->IASetIndexBuffer(ib, DXGI_FORMAT_R32_UINT, 0);
- D3D11_QUERY_DESC qdesc = {D3D11_QUERY_PIPELINE_STATISTICS};
- ID3D11Query* query = 0;
- device->CreateQuery(&qdesc, &query);
- context->Begin(query);
- context->DrawIndexed(index_count, 0, 0);
- context->End(query);
- D3D11_QUERY_DATA_PIPELINE_STATISTICS stats = {};
- while (S_FALSE == context->GetData(query, &stats, sizeof(stats), 0))
- ;
- query->Release();
- ib->Release();
- assert(stats.IAVertices == index_count);
- return stats.VSInvocations;
- }
- void setupShaders(ID3D11Device* device, ID3D11DeviceContext* context)
- {
- // load and compile the two shaders
- const char* shaders =
- "#define ATTRIBUTES 5\n"
- "struct Foo { float4 v[ATTRIBUTES]; };"
- "float4 VS(uint index: SV_VertexId, out Foo foo: FOO): SV_Position { uint i = index % 3; [unroll] for (int j = 0; j < ATTRIBUTES; j++) foo.v[j] = j; return float4(i != 0, i != 2, 0, 1); }"
- "float4 PS(Foo foo: FOO): SV_Target { float4 result = 0; [unroll] for (int j = 0; j < ATTRIBUTES; j++) result += foo.v[j]; return result; }";
- ID3DBlob* vsblob = 0;
- ID3DBlob* psblob = 0;
- D3DCompile(shaders, strlen(shaders), 0, 0, 0, "VS", "vs_5_0", 0, 0, &vsblob, 0);
- D3DCompile(shaders, strlen(shaders), 0, 0, 0, "PS", "ps_5_0", 0, 0, &psblob, 0);
- ID3D11VertexShader* vs = 0;
- ID3D11PixelShader* ps = 0;
- device->CreateVertexShader(vsblob->GetBufferPointer(), vsblob->GetBufferSize(), 0, &vs);
- device->CreatePixelShader(psblob->GetBufferPointer(), psblob->GetBufferSize(), 0, &ps);
- context->VSSetShader(vs, 0, 0);
- context->PSSetShader(ps, 0, 0);
- }
- template <typename Cache>
- void inspectCache(Cache cache)
- {
- unsigned int max_cache_size = 200;
- unsigned int grid_size = 100;
- for (unsigned int cache_size = 3; cache_size <= max_cache_size; cache_size += 1)
- {
- std::vector<unsigned int> grid1;
- gridGen(grid1, 0, grid_size, 0, grid_size, grid_size, cache_size, true);
- std::vector<unsigned int> grid2;
- gridGen(grid2, 0, grid_size, 0, grid_size, grid_size, cache_size, false);
- std::vector<unsigned int> grid3;
- gridGen(grid3, 0, grid_size, 0, grid_size, grid_size, grid_size * 4, false); // this generates a simple indexed grid without striping/degenerate triangles
- meshopt_optimizeVertexCacheFifo(&grid3[0], &grid3[0], grid3.size(), (grid_size + 1) * (grid_size + 1), cache_size);
- std::vector<unsigned int> grid4;
- gridGen(grid4, 0, grid_size, 0, grid_size, grid_size, grid_size * 4, false); // this generates a simple indexed grid without striping/degenerate triangles
- meshopt_optimizeVertexCache(&grid4[0], &grid4[0], grid4.size(), (grid_size + 1) * (grid_size + 1));
- unsigned int invocations1 = cache(&grid1[0], grid1.size());
- unsigned int invocations2 = cache(&grid2[0], grid2.size());
- unsigned int invocations3 = cache(&grid3[0], grid3.size());
- unsigned int invocations4 = cache(&grid4[0], grid4.size());
- unsigned int ideal_invocations = (grid_size + 1) * (grid_size + 1);
- printf("%d, %f, %f, %f, %f\n", cache_size,
- double(invocations1) / double(ideal_invocations),
- double(invocations2) / double(ideal_invocations),
- double(invocations3) / double(ideal_invocations),
- double(invocations4) / double(ideal_invocations));
- }
- }
- void testCache(IDXGIAdapter* adapter)
- {
- ID3D11Device* device = 0;
- ID3D11DeviceContext* context = 0;
- D3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, 0, 0, 0, 0, D3D11_SDK_VERSION, &device, 0, &context);
- setupShaders(device, context);
- inspectCache([&](const unsigned int* indices, size_t index_count) { return queryVSInvocations(device, context, indices, index_count); });
- }
- void testCacheSequence(IDXGIAdapter* adapter, int argc, char** argv)
- {
- ID3D11Device* device = 0;
- ID3D11DeviceContext* context = 0;
- D3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, 0, 0, 0, 0, D3D11_SDK_VERSION, &device, 0, &context);
- setupShaders(device, context);
- std::vector<unsigned int> ib;
- for (int i = 2; i < argc; ++i)
- {
- char* end;
- int i0 = strtol(argv[i], &end, 10);
- if (end[0] == '-')
- {
- int i1 = strtol(end + 1, &end, 10);
- if (end[0] != 0)
- {
- printf("Unrecognized index range: %s\n", argv[i]);
- return;
- }
- if (i0 < i1)
- {
- for (int ii = i0; ii <= i1; ++ii)
- ib.push_back(ii);
- }
- else
- {
- for (int ii = i0; ii >= i1; --ii)
- ib.push_back(ii);
- }
- }
- else if (end[0] == '*')
- {
- int i1 = strtol(end + 1, &end, 10);
- if (end[0] != 0 || i1 == 0)
- {
- printf("Unrecognized index range: %s\n", argv[i]);
- return;
- }
- for (int ii = 0; ii < i1; ++ii)
- ib.push_back(i0);
- }
- else if (end[0] == 'x')
- {
- int i1 = strtol(end + 1, &end, 10);
- if (end[0] != 0)
- {
- printf("Unrecognized index range: %s\n", argv[i]);
- return;
- }
- stripGen(ib, 0, i0, 0, i1, i0, true);
- }
- else if (end[0] == 0)
- {
- ib.push_back(i0);
- }
- else
- {
- printf("Unrecognized index range: %s\n", argv[i]);
- return;
- }
- }
- if (ib.size() % 3)
- ib.resize(ib.size() - ib.size() % 3);
- std::vector<bool> xformed(ib.size());
- for (size_t i = 0; i < ib.size(); i += 3)
- {
- unsigned int inv0 = i == 0 ? 0 : queryVSInvocations(device, context, ib.data(), i);
- unsigned int inv1 = queryVSInvocations(device, context, ib.data(), i + 3);
- assert(inv0 <= inv1);
- assert(inv0 + 3 >= inv1);
- switch (inv1 - inv0)
- {
- case 0:
- xformed[i + 0] = xformed[i + 1] = xformed[i + 2] = false;
- break;
- case 3:
- xformed[i + 0] = xformed[i + 1] = xformed[i + 2] = true;
- break;
- case 1:
- case 2:
- {
- unsigned int a = ib[i + 0];
- unsigned int b = ib[i + 1];
- unsigned int c = ib[i + 2];
- ib[i + 0] = ib[i + 1] = ib[i + 2] = a;
- unsigned int inva = queryVSInvocations(device, context, ib.data(), i + 3);
- ib[i + 1] = ib[i + 2] = b;
- unsigned int invb = queryVSInvocations(device, context, ib.data(), i + 3);
- ib[i + 2] = c;
- unsigned int invc = queryVSInvocations(device, context, ib.data(), i + 3);
- assert(inv0 <= inva && inva <= inv1);
- assert(inv0 <= invb && invb <= inv1);
- assert(inv0 <= invc && invc <= inv1);
- if (inv1 - inv0 == 1 && a == c && inva == inv1 && invb == inv0 && invc == inv1)
- {
- xformed[i + 0] = false;
- xformed[i + 1] = false;
- xformed[i + 2] = true;
- }
- else
- {
- assert(inva <= invb);
- assert(invb <= invc);
- xformed[i + 0] = inva == inv0 + 1;
- xformed[i + 1] = invb == inva + 1;
- xformed[i + 2] = invc == invb + 1;
- }
- break;
- }
- }
- }
- unsigned int xformed_total = 0;
- for (size_t i = 0; i < ib.size(); ++i)
- xformed_total += xformed[i];
- printf("// Sequence: %d indices", int(ib.size()));
- for (size_t i = 0; i < ib.size(); ++i)
- {
- if (i % 12 == 0)
- {
- printf("\n// %3d*3:", int(i / 3));
- }
- if (xformed[i])
- printf(" %3d*", ib[i]);
- else
- printf(" %3d ", ib[i]);
- }
- printf("\n");
- std::vector<unsigned int> cached;
- for (size_t i = 0; i < ib.size(); ++i)
- {
- unsigned int index = ib[i];
- unsigned int inv0 = queryVSInvocations(device, context, ib.data(), ib.size());
- ib.push_back(index);
- ib.push_back(index);
- ib.push_back(index);
- unsigned int inv1 = queryVSInvocations(device, context, ib.data(), ib.size());
- ib.resize(ib.size() - 3);
- if (inv1 == inv0)
- cached.push_back(index);
- }
- std::sort(cached.begin(), cached.end());
- cached.erase(std::unique(cached.begin(), cached.end()), cached.end());
- printf("// Cached :");
- for (size_t i = 0; i < cached.size(); ++i)
- printf(" %d", cached[i]);
- printf(" (%d)\n", int(cached.size()));
- unsigned int invocations = queryVSInvocations(device, context, ib.data(), ib.size());
- printf("// Invocations: %d\n", invocations);
- assert(xformed_total == invocations);
- }
- void testCacheMeshes(IDXGIAdapter* adapter, int argc, char** argv)
- {
- ID3D11Device* device = 0;
- ID3D11DeviceContext* context = 0;
- D3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, 0, 0, 0, 0, D3D11_SDK_VERSION, &device, 0, &context);
- setupShaders(device, context);
- bool stat = false;
- double atvr_sum = 0;
- double atvr_count = 0;
- unsigned int total_invocations = 0;
- unsigned int total_vertices = 0;
- for (int i = 1; i < argc; ++i)
- {
- const char* path = argv[i];
- if (strcmp(path, "--stat") == 0)
- {
- stat = true;
- continue;
- }
- fastObjMesh* obj = fast_obj_read(path);
- if (!obj)
- {
- printf("Error loading %s: file not found\n", path);
- continue;
- }
- std::vector<unsigned int> ib1;
- size_t index_offset = 0;
- for (unsigned int i = 0; i < obj->face_count; ++i)
- {
- for (unsigned int j = 0; j < obj->face_vertices[i]; ++j)
- {
- fastObjIndex gi = obj->indices[index_offset + j];
- // triangulate polygon on the fly; offset-3 is always the first polygon vertex
- if (j >= 3)
- {
- unsigned int i0 = ib1[ib1.size() - 3];
- unsigned int i1 = ib1[ib1.size() - 1];
- ib1.push_back(i0);
- ib1.push_back(i1);
- }
- ib1.push_back(gi.p);
- }
- index_offset += obj->face_vertices[i];
- }
- unsigned int vertex_count = obj->position_count;
- unsigned int index_count = ib1.size();
- unsigned int invocations1 = queryVSInvocations(device, context, ib1.data(), index_count);
- if (stat)
- {
- std::vector<unsigned int> ib2(ib1.size());
- meshopt_optimizeVertexCache(&ib2[0], &ib1[0], ib1.size(), vertex_count);
- unsigned int invocations = queryVSInvocations(device, context, ib2.data(), index_count);
- atvr_sum += double(invocations) / double(vertex_count);
- atvr_count += 1;
- total_invocations += invocations;
- total_vertices += vertex_count;
- }
- else
- {
- printf("%s: baseline %f\n", path, double(invocations1) / double(vertex_count));
- std::vector<unsigned int> ib3(ib1.size());
- meshopt_optimizeVertexCache(&ib3[0], &ib1[0], ib1.size(), vertex_count);
- unsigned int invocations3 = queryVSInvocations(device, context, ib3.data(), index_count);
- printf("%s: forsyth %f\n", path, double(invocations3) / double(vertex_count));
- for (unsigned int cache_size = 12; cache_size <= 24; ++cache_size)
- {
- std::vector<unsigned int> ib2(ib1.size());
- meshopt_optimizeVertexCacheFifo(&ib2[0], &ib1[0], ib1.size(), vertex_count, cache_size);
- unsigned int invocations2 = queryVSInvocations(device, context, ib2.data(), index_count);
- printf("%s: tipsify(%d) %f\n", path, cache_size, double(invocations2) / double(vertex_count));
- }
- }
- }
- if (stat)
- {
- printf("ATVR: average %f cumulative %f; %d vertices\n", atvr_sum / atvr_count, double(total_invocations) / double(total_vertices), total_vertices);
- }
- }
- int main(int argc, char** argv)
- {
- IDXGIFactory1* factory = 0;
- CreateDXGIFactory1(__uuidof(IDXGIFactory1), (void**)&factory);
- IDXGIAdapter* adapter = NULL;
- for (unsigned int index = 0; SUCCEEDED(factory->EnumAdapters(index, &adapter)); ++index)
- {
- DXGI_ADAPTER_DESC ad = {};
- adapter->GetDesc(&ad);
- if (ad.VendorId == 0x1414 && ad.DeviceId == 0x8c)
- continue; // Skip Microsoft Basic Render Driver
- printf("// GPU %d: %S (Vendor %04x Device %04x)\n", index, ad.Description, ad.VendorId, ad.DeviceId);
- if (argc == 1)
- {
- testCache(adapter);
- }
- else if (argc > 1 && strcmp(argv[1], "--") == 0)
- {
- testCacheSequence(adapter, argc, argv);
- }
- else
- {
- testCacheMeshes(adapter, argc, argv);
- }
- }
- }
- #endif
|