| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951 |
- // This file renders vertex buffers, converts raw meshes
- // to GL meshes, and manages threads that do the raw-mesh
- // building (found in cave_mesher.c)
- #include "stb_voxel_render.h"
- #define STB_GLEXT_DECLARE "glext_list.h"
- #include "stb_gl.h"
- #include "stb_image.h"
- #include "stb_glprog.h"
- #include "caveview.h"
- #include "cave_parse.h"
- #include "stb.h"
- #include "sdl.h"
- #include "sdl_thread.h"
- #include <math.h>
- #include <assert.h>
- //#define STBVOX_CONFIG_TEX1_EDGE_CLAMP
- // currently no dynamic way to set mesh cache size or view distance
- //#define SHORTVIEW
- stbvox_mesh_maker g_mesh_maker;
- GLuint main_prog;
- GLint uniform_locations[64];
- //#define MAX_QUADS_PER_DRAW (65536 / 4) // assuming 16-bit indices, 4 verts per quad
- //#define FIXED_INDEX_BUFFER_SIZE (MAX_QUADS_PER_DRAW * 6 * 2) // 16*1024 * 12 == ~192KB
- // while uploading texture data, this holds our each texture
- #define TEX_SIZE 64
- uint32 texture[TEX_SIZE][TEX_SIZE];
- GLuint voxel_tex[2];
- // chunk state
- enum
- {
- STATE_invalid,
- STATE_needed,
- STATE_requested,
- STATE_abandoned,
- STATE_valid,
- };
- // mesh is 32x32x255 ... this is hardcoded in that
- // a mesh covers 2x2 minecraft chunks, no #defines for it
- typedef struct
- {
- int state;
- int chunk_x, chunk_y;
- int num_quads;
- float priority;
- int vbuf_size, fbuf_size;
- float transform[3][3];
- float bounds[2][3];
- GLuint vbuf;// vbuf_tex;
- GLuint fbuf, fbuf_tex;
- } chunk_mesh;
- void scale_texture(unsigned char *src, int x, int y, int w, int h)
- {
- int i,j,k;
- assert(w == 256 && h == 256);
- for (j=0; j < TEX_SIZE; ++j) {
- for (i=0; i < TEX_SIZE; ++i) {
- uint32 val=0;
- for (k=0; k < 4; ++k) {
- val >>= 8;
- val += src[ 4*(x+(i>>2)) + 4*w*(y+(j>>2)) + k]<<24;
- }
- texture[j][i] = val;
- }
- }
- }
- void build_base_texture(int n)
- {
- int x,y;
- uint32 color = stb_rand() | 0x808080;
- for (y=0; y<TEX_SIZE; ++y)
- for (x=0; x<TEX_SIZE; ++x) {
- texture[y][x] = (color + (stb_rand()&0x1f1f1f))|0xff000000;
- }
- }
- void build_overlay_texture(int n)
- {
- int x,y;
- uint32 color = stb_rand();
- if (color & 16)
- color = 0xff000000;
- else
- color = 0xffffffff;
- for (y=0; y<TEX_SIZE; ++y)
- for (x=0; x<TEX_SIZE; ++x) {
- texture[y][x] = 0;
- }
- for (y=0; y < TEX_SIZE/8; ++y) {
- for (x=0; x < TEX_SIZE; ++x) {
- texture[y][x] = color;
- texture[TEX_SIZE-1-y][x] = color;
- texture[x][y] = color;
- texture[x][TEX_SIZE-1-y] = color;
- }
- }
- }
- // view radius of about 1024 = 2048 columns / 32 columns-per-mesh = 2^11 / 2^5 = 64x64
- // so we need bigger than 64x64 so we can precache, which means we have to be
- // non-power-of-two, or we have to be pretty huge
- #define CACHED_MESH_NUM_X 128
- #define CACHED_MESH_NUM_Y 128
- chunk_mesh cached_chunk_mesh[CACHED_MESH_NUM_Y][CACHED_MESH_NUM_X];
- void free_chunk(int slot_x, int slot_y)
- {
- chunk_mesh *cm = &cached_chunk_mesh[slot_y][slot_x];
- if (cm->state == STATE_valid) {
- glDeleteTextures(1, &cm->fbuf_tex);
- glDeleteBuffersARB(1, &cm->vbuf);
- glDeleteBuffersARB(1, &cm->fbuf);
- cached_chunk_mesh[slot_y][slot_x].state = STATE_invalid;
- }
- }
- void upload_mesh(chunk_mesh *cm, uint8 *build_buffer, uint8 *face_buffer)
- {
- glGenBuffersARB(1, &cm->vbuf);
- glBindBufferARB(GL_ARRAY_BUFFER_ARB, cm->vbuf);
- glBufferDataARB(GL_ARRAY_BUFFER_ARB, cm->num_quads*4*sizeof(uint32), build_buffer, GL_STATIC_DRAW_ARB);
- glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
- glGenBuffersARB(1, &cm->fbuf);
- glBindBufferARB(GL_TEXTURE_BUFFER_ARB, cm->fbuf);
- glBufferDataARB(GL_TEXTURE_BUFFER_ARB, cm->num_quads*sizeof(uint32), face_buffer , GL_STATIC_DRAW_ARB);
- glBindBufferARB(GL_TEXTURE_BUFFER_ARB, 0);
- glGenTextures(1, &cm->fbuf_tex);
- glBindTexture(GL_TEXTURE_BUFFER_ARB, cm->fbuf_tex);
- glTexBufferARB(GL_TEXTURE_BUFFER_ARB, GL_RGBA8UI, cm->fbuf);
- glBindTexture(GL_TEXTURE_BUFFER_ARB, 0);
- }
- static void upload_mesh_data(raw_mesh *rm)
- {
- int cx = rm->cx;
- int cy = rm->cy;
- int slot_x = (cx >> 1) & (CACHED_MESH_NUM_X-1);
- int slot_y = (cy >> 1) & (CACHED_MESH_NUM_Y-1);
- chunk_mesh *cm;
- free_chunk(slot_x, slot_y);
- cm = &cached_chunk_mesh[slot_y][slot_x];
- cm->num_quads = rm->num_quads;
- upload_mesh(cm, rm->build_buffer, rm->face_buffer);
- cm->vbuf_size = rm->num_quads*4*sizeof(uint32);
- cm->fbuf_size = rm->num_quads*sizeof(uint32);
- cm->priority = 100000;
- cm->chunk_x = cx;
- cm->chunk_y = cy;
- memcpy(cm->bounds, rm->bounds, sizeof(cm->bounds));
- memcpy(cm->transform, rm->transform, sizeof(cm->transform));
- // write barrier here
- cm->state = STATE_valid;
- }
- GLint uniform_loc[16];
- float table3[128][3];
- float table4[64][4];
- GLint tablei[2];
- float step=0;
- #ifdef SHORTVIEW
- int view_dist_in_chunks = 50;
- #else
- int view_dist_in_chunks = 80;
- #endif
- void setup_uniforms(float pos[3])
- {
- int i,j;
- step += 1.0f/60.0f;
- for (i=0; i < STBVOX_UNIFORM_count; ++i) {
- stbvox_uniform_info raw, *ui=&raw;
- stbvox_get_uniform_info(&raw, i);
- uniform_loc[i] = -1;
- if (i == STBVOX_UNIFORM_texscale || i == STBVOX_UNIFORM_texgen || i == STBVOX_UNIFORM_color_table)
- continue;
- if (ui) {
- void *data = ui->default_value;
- uniform_loc[i] = stbgl_find_uniform(main_prog, ui->name);
- switch (i) {
- case STBVOX_UNIFORM_face_data:
- tablei[0] = 2;
- data = tablei;
- break;
- case STBVOX_UNIFORM_tex_array:
- glActiveTextureARB(GL_TEXTURE0_ARB);
- glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[0]);
- glActiveTextureARB(GL_TEXTURE1_ARB);
- glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]);
- glActiveTextureARB(GL_TEXTURE0_ARB);
- tablei[0] = 0;
- tablei[1] = 1;
- data = tablei;
- break;
- case STBVOX_UNIFORM_color_table:
- data = ui->default_value;
- ((float *)data)[63*4+3] = 2.0f; // emissive
- break;
- case STBVOX_UNIFORM_camera_pos:
- data = table3[0];
- table3[0][0] = pos[0];
- table3[0][1] = pos[1];
- table3[0][2] = pos[2];
- table3[0][3] = stb_max(0,(float)sin(step*2)*0.125f);
- break;
- case STBVOX_UNIFORM_ambient: {
- float bright = 1.0;
- //float bright = 0.75;
- float amb[3][3];
- // ambient direction is sky-colored upwards
- // "ambient" lighting is from above
- table4[0][0] = 0.3f;
- table4[0][1] = -0.5f;
- table4[0][2] = 0.9f;
- amb[1][0] = 0.3f; amb[1][1] = 0.3f; amb[1][2] = 0.3f; // dark-grey
- amb[2][0] = 1.0; amb[2][1] = 1.0; amb[2][2] = 1.0; // white
- // convert so (table[1]*dot+table[2]) gives
- // above interpolation
- // lerp((dot+1)/2, amb[1], amb[2])
- // amb[1] + (amb[2] - amb[1]) * (dot+1)/2
- // amb[1] + (amb[2] - amb[1]) * dot/2 + (amb[2]-amb[1])/2
- for (j=0; j < 3; ++j) {
- table4[1][j] = (amb[2][j] - amb[1][j])/2 * bright;
- table4[2][j] = (amb[1][j] + amb[2][j])/2 * bright;
- }
- // fog color
- table4[3][0] = 0.6f, table4[3][1] = 0.7f, table4[3][2] = 0.9f;
- table4[3][3] = 1.0f / (view_dist_in_chunks * 16);
- table4[3][3] *= table4[3][3];
- data = table4;
- break;
- }
- }
- switch (ui->type) {
- case STBVOX_UNIFORM_TYPE_sampler: stbglUniform1iv(uniform_loc[i], ui->array_length, data); break;
- case STBVOX_UNIFORM_TYPE_vec2: stbglUniform2fv(uniform_loc[i], ui->array_length, data); break;
- case STBVOX_UNIFORM_TYPE_vec3: stbglUniform3fv(uniform_loc[i], ui->array_length, data); break;
- case STBVOX_UNIFORM_TYPE_vec4: stbglUniform4fv(uniform_loc[i], ui->array_length, data); break;
- }
- }
- }
- }
- GLuint unitex[64], unibuf[64];
- void make_texture_buffer_for_uniform(int uniform, int slot)
- {
- GLenum type;
- stbvox_uniform_info raw, *ui=&raw;
- GLint uloc;
-
- stbvox_get_uniform_info(ui, uniform);
- uloc = stbgl_find_uniform(main_prog, ui->name);
- if (uniform == STBVOX_UNIFORM_color_table)
- ((float *)ui->default_value)[63*4+3] = 2.0f; // emissive
- glGenBuffersARB(1, &unibuf[uniform]);
- glBindBufferARB(GL_ARRAY_BUFFER_ARB, unibuf[uniform]);
- glBufferDataARB(GL_ARRAY_BUFFER_ARB, ui->array_length * ui->bytes_per_element, ui->default_value, GL_STATIC_DRAW_ARB);
- glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
- glGenTextures(1, &unitex[uniform]);
- glBindTexture(GL_TEXTURE_BUFFER_ARB, unitex[uniform]);
- switch (ui->type) {
- case STBVOX_UNIFORM_TYPE_vec2: type = GL_RG32F; break;
- case STBVOX_UNIFORM_TYPE_vec3: type = GL_RGB32F; break;
- case STBVOX_UNIFORM_TYPE_vec4: type = GL_RGBA32F; break;
- default: assert(0);
- }
- glTexBufferARB(GL_TEXTURE_BUFFER_ARB, type, unibuf[uniform]);
- glBindTexture(GL_TEXTURE_BUFFER_ARB, 0);
- glActiveTextureARB(GL_TEXTURE0 + slot);
- glBindTexture(GL_TEXTURE_BUFFER_ARB, unitex[uniform]);
- glActiveTextureARB(GL_TEXTURE0);
- stbglUseProgram(main_prog);
- stbglUniform1i(uloc, slot);
- }
- #define MAX_MESH_WORKERS 8
- #define MAX_CHUNK_LOAD_WORKERS 2
- int num_mesh_workers;
- int num_chunk_load_workers;
- typedef struct
- {
- int state;
- int request_cx;
- int request_cy;
- int padding[13];
- SDL_sem * request_received;
- SDL_sem * chunk_server_done_processing;
- int chunk_action;
- int chunk_request_x;
- int chunk_request_y;
- fast_chunk *chunks[4][4];
- int padding2[16];
- raw_mesh rm;
- int padding3[16];
- uint8 *build_buffer;
- uint8 *face_buffer ;
- } mesh_worker;
- enum
- {
- WSTATE_idle,
- WSTATE_requested,
- WSTATE_running,
- WSTATE_mesh_ready,
- };
- mesh_worker mesh_data[MAX_MESH_WORKERS];
- int num_meshes_started; // stats
- int request_chunk(int chunk_x, int chunk_y);
- void update_meshes_from_render_thread(void);
- unsigned char tex2_data[64][4];
- void init_tex2_gradient(void)
- {
- int i;
- for (i=0; i < 16; ++i) {
- tex2_data[i+ 0][0] = 64 + 12*i;
- tex2_data[i+ 0][1] = 32;
- tex2_data[i+ 0][2] = 64;
- tex2_data[i+16][0] = 255;
- tex2_data[i+16][1] = 32 + 8*i;
- tex2_data[i+16][2] = 64;
- tex2_data[i+32][0] = 255;
- tex2_data[i+32][1] = 160;
- tex2_data[i+32][2] = 64 + 12*i;
- tex2_data[i+48][0] = 255;
- tex2_data[i+48][1] = 160 + 6*i;
- tex2_data[i+48][2] = 255;
- }
- }
- void set_tex2_alpha(float fa)
- {
- int i;
- int a = (int) stb_lerp(fa, 0, 255);
- if (a < 0) a = 0; else if (a > 255) a = 255;
- glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]);
- for (i=0; i < 64; ++i) {
- tex2_data[i][3] = a;
- glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, 1,1,1, GL_RGBA, GL_UNSIGNED_BYTE, tex2_data[i]);
- }
- }
- void render_init(void)
- {
- int i;
- char *binds[] = { "attr_vertex", "attr_face", NULL };
- char *vertex;
- char *fragment;
- int w=0,h=0;
- unsigned char *texdata = stbi_load("terrain.png", &w, &h, NULL, 4);
- stbvox_init_mesh_maker(&g_mesh_maker);
- for (i=0; i < num_mesh_workers; ++i) {
- stbvox_init_mesh_maker(&mesh_data[i].rm.mm);
- }
- vertex = stbvox_get_vertex_shader();
- fragment = stbvox_get_fragment_shader();
- {
- char error_buffer[1024];
- char *main_vertex[] = { vertex, NULL };
- char *main_fragment[] = { fragment, NULL };
- main_prog = stbgl_create_program(main_vertex, main_fragment, binds, error_buffer, sizeof(error_buffer));
- if (main_prog == 0) {
- ods("Compile error for main shader: %s\n", error_buffer);
- assert(0);
- exit(1);
- }
- }
- //init_index_buffer();
- make_texture_buffer_for_uniform(STBVOX_UNIFORM_texscale , 3);
- make_texture_buffer_for_uniform(STBVOX_UNIFORM_texgen , 4);
- make_texture_buffer_for_uniform(STBVOX_UNIFORM_color_table , 5);
- glGenTextures(2, voxel_tex);
- glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[0]);
- glTexImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, GL_RGBA,
- TEX_SIZE,TEX_SIZE,256,
- 0,GL_RGBA,GL_UNSIGNED_BYTE,NULL);
- for (i=0; i < 256; ++i) {
- if (texdata)
- scale_texture(texdata, (i&15)*w/16, (h/16)*(i>>4), w,h);
- else
- build_base_texture(i);
- glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, TEX_SIZE,TEX_SIZE,1, GL_RGBA, GL_UNSIGNED_BYTE, texture[0]);
- }
- glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
- glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
- glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAX_ANISOTROPY_EXT, 16);
- #ifdef STBVOX_CONFIG_TEX1_EDGE_CLAMP
- glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
- glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
- #endif
- glGenerateMipmapEXT(GL_TEXTURE_2D_ARRAY_EXT);
- glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]);
- glTexImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, GL_RGBA,
- 1,1,64,
- 0,GL_RGBA,GL_UNSIGNED_BYTE,NULL);
- init_tex2_gradient();
- set_tex2_alpha(0.0);
- #if 0
- for (i=0; i < 128; ++i) {
- //build_overlay_texture(i);
- glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, TEX_SIZE,TEX_SIZE,1, GL_RGBA, GL_UNSIGNED_BYTE, texture[0]);
- }
- #endif
- glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
- glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
- glGenerateMipmapEXT(GL_TEXTURE_2D_ARRAY_EXT);
- }
- void world_init(void)
- {
- int a,b,x,y;
- Uint64 start_time, end_time;
- #ifdef NDEBUG
- int range = 32;
- #else
- int range = 12;
- #endif
- start_time = SDL_GetPerformanceCounter();
- // iterate in 8x8 clusters of qchunks at a time to get better converted-chunk-cache reuse
- // than a purely row-by-row ordering is (single-threaded this is a bigger win than
- // any of the above optimizations were, since it halves zlib/mc-conversion costs)
- for (x=-range; x <= range; x += 16)
- for (y=-range; y <= range; y += 16)
- for (b=y; b < y+16 && b <= range; b += 2)
- for (a=x; a < x+16 && a <= range; a += 2)
- while (!request_chunk(a, b)) { // if request fails, all threads are busy
- update_meshes_from_render_thread();
- SDL_Delay(1);
- }
- // wait until all the workers are done,
- // (this is only needed if we want to time
- // when the build finishes, or when we want to reset the
- // cache size; otherwise we could just go ahead and
- // start rendering whatever we've got)
- for(;;) {
- int i;
- update_meshes_from_render_thread();
- for (i=0; i < num_mesh_workers; ++i)
- if (mesh_data[i].state != WSTATE_idle)
- break;
- if (i == num_mesh_workers)
- break;
- SDL_Delay(3);
- }
- end_time = SDL_GetPerformanceCounter();
- ods("Build time: %7.2fs\n", (end_time - start_time) / (float) SDL_GetPerformanceFrequency());
- // don't waste lots of storage on chunk caches once it's finished starting-up;
- // this was only needed to be this large because we worked in large blocks
- // to maximize sharing
- reset_cache_size(32);
- }
- extern SDL_mutex * chunk_cache_mutex;
- int mesh_worker_handler(void *data)
- {
- mesh_worker *mw = data;
- mw->face_buffer = malloc(FACE_BUFFER_SIZE);
- mw->build_buffer = malloc(BUILD_BUFFER_SIZE);
- // this loop only works because the compiler can't
- // tell that the SDL_calls don't access mw->state;
- // really we should barrier that stuff
- for(;;) {
- int i,j;
- int cx,cy;
- // wait for a chunk request
- SDL_SemWait(mw->request_received);
- // analyze the chunk request
- assert(mw->state == WSTATE_requested);
- cx = mw->request_cx;
- cy = mw->request_cy;
- // this is inaccurate as it can block while another thread has the cache locked
- mw->state = WSTATE_running;
- // get the chunks we need (this takes a lock and caches them)
- for (j=0; j < 4; ++j)
- for (i=0; i < 4; ++i)
- mw->chunks[j][i] = get_converted_fastchunk(cx-1 + i, cy-1 + j);
- // build the mesh based on the chunks
- mw->rm.build_buffer = mw->build_buffer;
- mw->rm.face_buffer = mw->face_buffer;
- build_chunk(cx, cy, mw->chunks, &mw->rm);
- mw->state = WSTATE_mesh_ready;
- // don't need to notify of this, because it gets polled
- // when done, free the chunks
- // for efficiency we just take the mutex once around the whole thing,
- // though this spreads the mutex logic over two files
- SDL_LockMutex(chunk_cache_mutex);
- for (j=0; j < 4; ++j)
- for (i=0; i < 4; ++i) {
- deref_fastchunk(mw->chunks[j][i]);
- mw->chunks[j][i] = NULL;
- }
- SDL_UnlockMutex(chunk_cache_mutex);
- }
- return 0;
- }
- int request_chunk(int chunk_x, int chunk_y)
- {
- int i;
- for (i=0; i < num_mesh_workers; ++i) {
- mesh_worker *mw = &mesh_data[i];
- if (mw->state == WSTATE_idle) {
- mw->request_cx = chunk_x;
- mw->request_cy = chunk_y;
- mw->state = WSTATE_requested;
- SDL_SemPost(mw->request_received);
- ++num_meshes_started;
- return 1;
- }
- }
- return 0;
- }
- void prepare_threads(void)
- {
- int i;
- int num_proc = SDL_GetCPUCount();
- if (num_proc > 6)
- num_mesh_workers = num_proc/2;
- else if (num_proc > 4)
- num_mesh_workers = 4;
- else
- num_mesh_workers = num_proc-1;
- // @TODO
- // Thread usage is probably pretty terrible; need to make a
- // separate queue of needed chunks, instead of just generating
- // one request per thread per frame, and a separate queue of
- // results. (E.g. If it takes 1.5 frames to build mesh, thread
- // is idle for 0.5 frames.) To fake this for now, I've just
- // doubled the number of threads to let those serve as a 'queue',
- // but that's dumb.
- num_mesh_workers *= 2; // try to get better thread usage
- if (num_mesh_workers > MAX_MESH_WORKERS)
- num_mesh_workers = MAX_MESH_WORKERS;
- for (i=0; i < num_mesh_workers; ++i) {
- mesh_worker *data = &mesh_data[i];
- data->request_received = SDL_CreateSemaphore(0);
- data->chunk_server_done_processing = SDL_CreateSemaphore(0);
- SDL_CreateThread(mesh_worker_handler, "mesh worker", data);
- }
- }
- // "better" buffer uploading
- #if 0
- if (glBufferStorage) {
- glDeleteBuffersARB(1, &vb->vbuf);
- glGenBuffersARB(1, &vb->vbuf);
- glBindBufferARB(GL_ARRAY_BUFFER_ARB, vb->vbuf);
- glBufferStorage(GL_ARRAY_BUFFER_ARB, sizeof(build_buffer), build_buffer, 0);
- glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
- } else {
- glBindBufferARB(GL_ARRAY_BUFFER_ARB, vb->vbuf);
- glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(build_buffer), build_buffer, GL_STATIC_DRAW_ARB);
- glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
- }
- #endif
- typedef struct
- {
- float x,y,z,w;
- } plane;
- static plane frustum[6];
- static void matd_mul(double out[4][4], double src1[4][4], double src2[4][4])
- {
- int i,j,k;
- for (j=0; j < 4; ++j) {
- for (i=0; i < 4; ++i) {
- double t=0;
- for (k=0; k < 4; ++k)
- t += src1[k][i] * src2[j][k];
- out[i][j] = t;
- }
- }
- }
- // https://fgiesen.wordpress.com/2012/08/31/frustum-planes-from-the-projection-matrix/
- static void compute_frustum(void)
- {
- int i;
- GLdouble mv[4][4],proj[4][4], mvproj[4][4];
- glGetDoublev(GL_MODELVIEW_MATRIX , mv[0]);
- glGetDoublev(GL_PROJECTION_MATRIX, proj[0]);
- matd_mul(mvproj, proj, mv);
- for (i=0; i < 4; ++i) {
- (&frustum[0].x)[i] = (float) (mvproj[3][i] + mvproj[0][i]);
- (&frustum[1].x)[i] = (float) (mvproj[3][i] - mvproj[0][i]);
- (&frustum[2].x)[i] = (float) (mvproj[3][i] + mvproj[1][i]);
- (&frustum[3].x)[i] = (float) (mvproj[3][i] - mvproj[1][i]);
- (&frustum[4].x)[i] = (float) (mvproj[3][i] + mvproj[2][i]);
- (&frustum[5].x)[i] = (float) (mvproj[3][i] - mvproj[2][i]);
- }
- }
- static int test_plane(plane *p, float x0, float y0, float z0, float x1, float y1, float z1)
- {
- // return false if the box is entirely behind the plane
- float d=0;
- assert(x0 <= x1 && y0 <= y1 && z0 <= z1);
- if (p->x > 0) d += x1*p->x; else d += x0*p->x;
- if (p->y > 0) d += y1*p->y; else d += y0*p->y;
- if (p->z > 0) d += z1*p->z; else d += z0*p->z;
- return d + p->w >= 0;
- }
- static int is_box_in_frustum(float *bmin, float *bmax)
- {
- int i;
- for (i=0; i < 6; ++i)
- if (!test_plane(&frustum[i], bmin[0], bmin[1], bmin[2], bmax[0], bmax[1], bmax[2]))
- return 0;
- return 1;
- }
- float compute_priority(int cx, int cy, float x, float y)
- {
- float distx, disty, dist2;
- distx = (cx*16+8) - x;
- disty = (cy*16+8) - y;
- dist2 = distx*distx + disty*disty;
- return view_dist_in_chunks*view_dist_in_chunks * 16 * 16 - dist2;
- }
- int chunk_locations, chunks_considered, chunks_in_frustum;
- int quads_considered, quads_rendered;
- int chunk_storage_rendered, chunk_storage_considered, chunk_storage_total;
- int update_frustum = 1;
- #ifdef SHORTVIEW
- int max_chunk_storage = 450 << 20;
- int min_chunk_storage = 350 << 20;
- #else
- int max_chunk_storage = 900 << 20;
- int min_chunk_storage = 800 << 20;
- #endif
- float min_priority = -500; // this really wants to be in unit space, not squared space
- int num_meshes_uploaded;
- void update_meshes_from_render_thread(void)
- {
- int i;
- for (i=0; i < num_mesh_workers; ++i) {
- mesh_worker *mw = &mesh_data[i];
- if (mw->state == WSTATE_mesh_ready) {
- upload_mesh_data(&mw->rm);
- ++num_meshes_uploaded;
- mw->state = WSTATE_idle;
- }
- }
- }
- extern float tex2_alpha;
- extern int global_hack;
- int num_threads_active;
- float chunk_server_activity;
- void render_caves(float campos[3])
- {
- float x = campos[0], y = campos[1];
- int qchunk_x, qchunk_y;
- int cam_x, cam_y;
- int i,j, rad;
- compute_frustum();
- chunk_locations = chunks_considered = chunks_in_frustum = 0;
- quads_considered = quads_rendered = 0;
- chunk_storage_total = chunk_storage_considered = chunk_storage_rendered = 0;
- cam_x = (int) floor(x+0.5);
- cam_y = (int) floor(y+0.5);
- qchunk_x = (((int) floor(x)+16) >> 5) << 1;
- qchunk_y = (((int) floor(y)+16) >> 5) << 1;
- glEnable(GL_ALPHA_TEST);
- glAlphaFunc(GL_GREATER, 0.5);
- stbglUseProgram(main_prog);
- setup_uniforms(campos); // set uniforms to default values inefficiently
- glActiveTextureARB(GL_TEXTURE2_ARB);
- stbglEnableVertexAttribArray(0);
- {
- float lighting[2][3] = { { campos[0],campos[1],campos[2] }, { 0.75,0.75,0.65f } };
- float bright = 8;
- lighting[1][0] *= bright;
- lighting[1][1] *= bright;
- lighting[1][2] *= bright;
- stbglUniform3fv(stbgl_find_uniform(main_prog, "light_source"), 2, lighting[0]);
- }
- if (global_hack)
- set_tex2_alpha(tex2_alpha);
- num_meshes_uploaded = 0;
- update_meshes_from_render_thread();
- // traverse all in-range chunks and analyze them
- for (j=-view_dist_in_chunks; j <= view_dist_in_chunks; j += 2) {
- for (i=-view_dist_in_chunks; i <= view_dist_in_chunks; i += 2) {
- float priority;
- int cx = qchunk_x + i;
- int cy = qchunk_y + j;
- priority = compute_priority(cx, cy, x, y);
- if (priority >= min_priority) {
- int slot_x = (cx>>1) & (CACHED_MESH_NUM_X-1);
- int slot_y = (cy>>1) & (CACHED_MESH_NUM_Y-1);
- chunk_mesh *cm = &cached_chunk_mesh[slot_y][slot_x];
- ++chunk_locations;
- if (cm->state == STATE_valid && priority >= 0) {
- // check if chunk pos actually matches
- if (cm->chunk_x != cx || cm->chunk_y != cy) {
- // we have a stale chunk we need to recreate
- free_chunk(slot_x, slot_y); // it probably will have already gotten freed, but just in case
- }
- }
- if (cm->state == STATE_invalid) {
- cm->chunk_x = cx;
- cm->chunk_y = cy;
- cm->state = STATE_needed;
- }
- cm->priority = priority;
- }
- }
- }
- // draw front-to-back
- for (rad = 0; rad <= view_dist_in_chunks; rad += 2) {
- for (j=-rad; j <= rad; j += 2) {
- // if j is +- rad, then iterate i through all values
- // if j isn't +-rad, then i should be only -rad & rad
- int step = 2;
- if (abs(j) != rad)
- step = 2*rad;
- for (i=-rad; i <= rad; i += step) {
- int cx = qchunk_x + i;
- int cy = qchunk_y + j;
- int slot_x = (cx>>1) & (CACHED_MESH_NUM_X-1);
- int slot_y = (cy>>1) & (CACHED_MESH_NUM_Y-1);
- chunk_mesh *cm = &cached_chunk_mesh[slot_y][slot_x];
- if (cm->state == STATE_valid && cm->priority >= 0) {
- ++chunks_considered;
- quads_considered += cm->num_quads;
- if (is_box_in_frustum(cm->bounds[0], cm->bounds[1])) {
- ++chunks_in_frustum;
- // @TODO if in range
- stbglUniform3fv(uniform_loc[STBVOX_UNIFORM_transform], 3, cm->transform[0]);
- glBindBufferARB(GL_ARRAY_BUFFER_ARB, cm->vbuf);
- glVertexAttribIPointer(0, 1, GL_UNSIGNED_INT, 4, (void*) 0);
- glBindTexture(GL_TEXTURE_BUFFER_ARB, cm->fbuf_tex);
- glDrawArrays(GL_QUADS, 0, cm->num_quads*4);
- quads_rendered += cm->num_quads;
- chunk_storage_rendered += cm->vbuf_size + cm->fbuf_size;
- }
- chunk_storage_considered += cm->vbuf_size + cm->fbuf_size;
- }
- }
- }
- }
- stbglDisableVertexAttribArray(0);
- glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
- glActiveTextureARB(GL_TEXTURE0_ARB);
- stbglUseProgram(0);
- num_meshes_started = 0;
- {
- #define MAX_QUEUE 8
- float highest_priority[MAX_QUEUE];
- int highest_i[MAX_QUEUE], highest_j[MAX_QUEUE];
- float lowest_priority = view_dist_in_chunks * view_dist_in_chunks * 16 * 16.0f;
- int lowest_i = -1, lowest_j = -1;
- for (i=0; i < MAX_QUEUE; ++i) {
- highest_priority[i] = min_priority;
- highest_i[i] = -1;
- highest_j[i] = -1;
- }
- for (j=0; j < CACHED_MESH_NUM_Y; ++j) {
- for (i=0; i < CACHED_MESH_NUM_X; ++i) {
- chunk_mesh *cm = &cached_chunk_mesh[j][i];
- if (cm->state == STATE_valid) {
- cm->priority = compute_priority(cm->chunk_x, cm->chunk_y, x, y);
- chunk_storage_total += cm->vbuf_size + cm->fbuf_size;
- if (cm->priority < lowest_priority) {
- lowest_priority = cm->priority;
- lowest_i = i;
- lowest_j = j;
- }
- }
- if (cm->state == STATE_needed) {
- cm->priority = compute_priority(cm->chunk_x, cm->chunk_y, x, y);
- if (cm->priority < min_priority)
- cm->state = STATE_invalid;
- else if (cm->priority > highest_priority[0]) {
- int k;
- highest_priority[0] = cm->priority;
- highest_i[0] = i;
- highest_j[0] = j;
- // bubble this up to right place
- for (k=0; k < MAX_QUEUE-1; ++k) {
- if (highest_priority[k] > highest_priority[k+1]) {
- highest_priority[k] = highest_priority[k+1];
- highest_priority[k+1] = cm->priority;
- highest_i[k] = highest_i[k+1];
- highest_i[k+1] = i;
- highest_j[k] = highest_j[k+1];
- highest_j[k+1] = j;
- } else {
- break;
- }
- }
- }
- }
- }
- }
- // I couldn't find any straightforward logic that avoids
- // the hysteresis problem of continually creating & freeing
- // a block on the margin, so I just don't free a block until
- // it's out of range, but this doesn't actually correctly
- // handle when the cache is too small for the given range
- if (chunk_storage_total >= min_chunk_storage && lowest_i >= 0) {
- if (cached_chunk_mesh[lowest_j][lowest_i].priority < -1200) // -1000? 0?
- free_chunk(lowest_i, lowest_j);
- }
- if (chunk_storage_total < max_chunk_storage && highest_i[0] >= 0) {
- for (j=MAX_QUEUE-1; j >= 0; --j) {
- if (highest_j[0] >= 0) {
- chunk_mesh *cm = &cached_chunk_mesh[highest_j[j]][highest_i[j]];
- if (request_chunk(cm->chunk_x, cm->chunk_y)) {
- cm->state = STATE_requested;
- } else {
- // if we couldn't queue this one, skip the remainder
- break;
- }
- }
- }
- }
- }
- update_meshes_from_render_thread();
- num_threads_active = 0;
- for (i=0; i < num_mesh_workers; ++i) {
- num_threads_active += (mesh_data[i].state == WSTATE_running);
- }
- }
|