/******************************************************************************/
#include "stdafx.h"
#include "../Shaders/!Header CPU.h"
namespace EE{
#define SVEL_CLEAR Vec4Zero
#define  VEL_CLEAR Vec4(0.5f, 0.5f, 0.5f, 0)
#define SNRM_CLEAR Vec4(0   , 0   , -1, 0) // set Z to 0   to set VecZero normals, however Z set to -1 makes ambient occlusion more precise when it uses normals (because ambient occlusion will not work good on the VecZero normals)
#define  NRM_CLEAR Vec4(0.5f, 0.5f,  0, 0) // set Z to 0.5 to set VecZero normals, however Z set to  0 makes ambient occlusion more precise when it uses normals (because ambient occlusion will not work good on the VecZero normals)
#define  NRM_CLEAR_START 1 // 1 works faster on GeForce 650m GT, TODO: check on newer hardware
inline Bool ClearNrm() {return D.aoWant() && D.ambientNormal() || Renderer.stage==RS_NORMAL;}
/******************************************************************************

   Graphics API differences:
      Reading and Writing to the same Render Target           - Yes: DX9, GL          No: DX10+
      null Color Render Target                                - Yes: DX10+, GL        No: DX9 (Render Target always must be specified, IMAGE_NULL IMAGE_TYPE is used instead, which driver interprets as empty render target - video memory shouldn't be allocated)
      Depth Textures                                          - Yes: DX10+, GL        Limited: DX9 (only INTZ - GeForce>=8 Radeon>=4000, RAWZ - GeForce 6 and 7, DF24 Radeon x1000..3000) do not work with Multi-Sampling
      Deferred Multi-Sampling                                 - Yes: DX>=10.1         No: DX9, DX10.0, GL
      Lost/Reset of D3DPOOL_DEFAULT (IMAGE_RT, dynamic VB/IB) - Yes: DX9              No: DX10+, GL
      Multiple Render Targets                                 - Yes: DX9, DX10+, GL   No: GLES2

   TODO: remove RT_SIMPLE, simplePrecision, simpleVertexFog

/******************************************************************************/
RendererClass Renderer;
MeshRender    MshrBox ,
              MshrBoxR,
              MshrBall;
/******************************************************************************/
static void InitMshr()
{
   MeshBase mshb;

   mshb.createFast(Box(1));
                   MshrBox .create(mshb);
   mshb.reverse(); MshrBoxR.create(mshb);

   mshb.create(Ball(1), 0, 12);
   MshrBall.create(mshb);
}
static void ShutMshr()
{
   MshrBox .del();
   MshrBoxR.del();
   MshrBall.del();
}
/******************************************************************************/
RendererClass::RendererClass() : ambient_color(null), highlight(null), material_color(null)
{
   stage           =RS_DEFAULT;
  _solid_mode_index=RM_SIMPLE;

   lowest_visible_point=-DBL_MAX;

  _first_pass=true;
  _get_target=false;
  _set_depth_needed=false;
  _stereo=false;
  _eye=0; _eye_num=1;

  _type=_cur_type=(MOBILE ? RT_SIMPLE : RT_DEFERRED);

#if MOBILE
  _simple_prec=false;
#else
  _simple_prec=true;
#endif

  _mesh_blend_alpha  =ALPHA_BLEND_FACTOR;
  _mesh_stencil_value=STENCIL_REF_ZERO;
  _mesh_stencil_mode =STENCIL_NONE;
  _mesh_highlight    .zero();
  _mesh_draw_mask    =0xFFFFFFFF;
  _shader_param_changes=null;
   SetVariation(0);

#if DX9
  _cull_mode[0]=D3DCULL_NONE;
  _cull_mode[1]=D3DCULL_CCW ;
#elif DX11
  _cull_mode[0]=0;
  _cull_mode[1]=1;
#endif

  _vtx_fog_start=0.80f;
  _vtx_fog_end  =1.00f;
  _vtx_fog_color.set(0.40f, 0.48f, 0.64f);

  _gui   =_cur_main   =&_main;
  _gui_ds=_cur_main_ds=&_main_ds;
}
void RendererClass::del()
{
   Sky   .del();
   Astros.del();
   Clouds.del();
   Water .del();

   ShutInstances();
   ShutMshr     ();
   ShutParticles();
   ShutMesh     ();
   ShutMaterial ();
   ShutLight    ();

   rtDel();
}
void RendererClass::create()
{
   if(LogInit)LogN("RendererClass.create");

#if 0 // convert DDS from SMAA source to Esenthel Image format
   Str in="D:/!/SMAA/", out="C:/Esenthel/Data/Img/";
   Image img;
   img.Import(in+"SearchTex.dds", -1, IMAGE_2D); img._type=IMAGE_R8;
   img.save(out+"SMAA Search.img");
   img.Import(in+"AreaTexDX10.dds", IMAGE_R8G8, IMAGE_2D);
   img.save(out+"SMAA Area.img");
#endif

#if SUPPORT_EARLY_Z
  _shader_early_z     =DefaultShaders(&MaterialDefault, VTX_POS         , 0, false).EarlyZ();
#endif
  _shader_shd_map     =DefaultShaders(&MaterialDefault, VTX_POS         , 0, false).Shadow();
  _shader_shd_map_skin=DefaultShaders(&MaterialDefault, VTX_POS|VTX_SKIN, 0, false).Shadow();

   InitCamera   ();
   InitLight    ();
   InitMaterial ();
   InitFur      ();
   InitMshr     ();
   InitInstances();

   Sky   .create();
   Clouds.create();
   Water .create();
}
RendererClass& RendererClass::type(RENDER_TYPE type)
{
	Clamp(type, RENDER_TYPE(0), RENDER_TYPE(RT_NUM-1));
   if(type==RT_DEFERRED && D.deferredUnavailable())return T;
   if(T._type!=type)
   {
      T._type=T._cur_type=type;
      if(type==RT_DEFERRED && D.deferredMSUnavailable())D.samples(1); // disable multi-sampling if we can't support it
      D.setShader(); // needed because shaders are set only for current renderer type
   }
   return T;
}
void RendererClass::mode(RENDER_MODE mode)
{
   T._mode            =mode;
   T._palette_mode    =(mode==RM_PALETTE || mode==RM_PALETTE1);
   T._mesh_shader_vel =(_vel && (mode==RM_SOLID || mode==RM_BLEND));
   T._solid_mode_index=((_cur_type==RT_SIMPLE) ? RM_SIMPLE : mirror() ? RM_SOLID_M : RM_SOLID);
#if DX9
   T._cull_mode[1]    =((mirror() && mode!=RM_SHADOW) ? D3DCULL_CW : D3DCULL_CCW);
#elif DX11
   T._cull_mode[1]    =((mirror() && mode!=RM_SHADOW) ? 2 : 1);
#elif GL
   D.cullGL();
#endif
   Bool cull=D._cull; D.cull(false); D.cull(cull); // force reset
   D.lodSetCurrentFactor();
   MaterialClear();
}
RendererClass& RendererClass::simplePrecision(Bool per_pixel)
{
   if(T._simple_prec!=per_pixel)
   {
      T._simple_prec=per_pixel;
      D.setShader();
   }
   return T;
}
/******************************************************************************/
RendererClass& RendererClass::simpleVertexFogRange(Flt start_frac, Flt end_frac)
{
	T._vtx_fog_start=start_frac;
   T._vtx_fog_end  =  end_frac;
	return T;
}
RendererClass& RendererClass::simpleVertexFogColor(C Vec &fog_color)
{
	T._vtx_fog_color=fog_color;
	return T;
}
/******************************************************************************/
void RendererClass::requestMirror(C PlaneM &plane, Int priority, Bool shadows, Int resolution)
{
   if(!_mirror_want || priority>T._mirror_priority)
   {
     _mirror_want      =true;
     _mirror_priority  =priority;
     _mirror_plane     =plane;
     _mirror_shadows   =shadows;
     _mirror_resolution=resolution;
   }
}
/******************************************************************************/
void RendererClass::linearizeDepth(Image &dest, Image &depth)
{
   D.alpha(ALPHA_NONE);
   set(&dest, null, true);

#if DX9
   if(depth.type()==IMAGE_RAWZ)Sh.h_LinearizeDepthRAWZ[FovPerspective(D.viewFovMode())]->draw(depth);else // 1s->1s
#endif
   if(!depth.multiSample() || depth.size()!=dest.size())Sh.h_LinearizeDepth[FovPerspective(D.viewFovMode())][0]->draw(depth);else // 1s->1s, if we're resizing then we also need to use the simple version
   if(!dest .multiSample()                             )Sh.h_LinearizeDepth[FovPerspective(D.viewFovMode())][1]->draw(depth);else // ms->1s
                                                        Sh.h_LinearizeDepth[FovPerspective(D.viewFovMode())][2]->draw(depth);     // ms->ms
   Sh.h_ImageCol[0]->set(null);
   Sh.h_ImageColMS ->set(null);
}
void RendererClass::setDepthForDebugDrawing()
{
   if(_set_depth_needed)
   {
     _set_depth_needed=false;
      if(_ds_1s)if(Shader *shader=Sh.h_SetDepth)
      {
      #if DX9
         D.colWrite(0); // DX9 always requires a RT set
      #else
         Image *rt=_cur[0]; set(null, _cur_ds, true);
      #endif
         ALPHA_MODE alpha=D.alpha(ALPHA_NONE); D.depthLock  (true); D.depthFunc(FUNC_ALWAYS); shader->draw(_ds_1s);
                          D.alpha(alpha     ); D.depthUnlock(    ); D.depthFunc(FUNC_LESS  );
      #if DX9
         D.colWrite(COL_WRITE_RGBA);
      #else
         set(rt, _cur_ds, true);
      #endif
      }
   }
}
ImageRTPtr RendererClass::getBackBuffer() // this may get called during rendering and outside of it
{
   if(Image *src=_cur[0])
   {
      ImageRTPtr hlp(ImageRTDesc(src->w(), src->h(), IMAGERT_RGBA)); // here Alpha is used for storing opacity
      src->copyHw(*hlp, true);
      return hlp;
   }
   return null;
}
void RendererClass::adaptEye(ImageRC &src, Image &dest)
{
   Hdr.load();
   ImageRTPtr temp=&src;
   VecI2    size=RoundPos(fx()*(D.viewRect().size()/D.size2())); // calculate viewport size in pixels
   Int  max_size=size.min()/4;
   Int  s=1, num=1; for(;;){Int next_size=s*4; if(next_size>max_size)break; s=next_size; num++;} // go from 1 up to 'max_size', inrease *4 in each step
   FREP(num) // now go backwards, from up to 'max_size' to 1 inclusive
   {
      ImageRTPtr next=temp; next.get(ImageRTDesc(s, s, IMAGERT_F32)); s/=4; // we could use 16-bit as according to calculations, the max error for 1920x1080, starting with 256x256 as first step and going down to 1x1, with average luminance of 1.0 (255 byte) is 0.00244140625 at the final stage, which gives 410 possible colors, however we may use some special tricks in the shader that requires higher precision (for example BRIGHT with Sqr and Sqrt later, or use Linear/sRGB)
      set(next(), null, false);
      if(i)Hdr.h_HdrDS[1]->draw(temp());
      else Hdr.h_HdrDS[0]->draw(temp(), null, D.screenToUV(D.viewRect()));
      temp=next;
   }
   Sh.h_Step    ->set(Pow(Mid(1/D.eyeAdaptationSpeed(), EPS, 1.0f), Time.d())); // can use EPS and not EPS_GPU because we're using Pow here and not on GPU
   Sh.h_ImageLum->set(_eye_adapt_scale[_eye_adapt_scale_cur]); _eye_adapt_scale_cur^=1; _eye_adapt_scale[_eye_adapt_scale_cur].discard(); set(&_eye_adapt_scale[_eye_adapt_scale_cur], null, false); Hdr.h_HdrUpdate->draw(temp());
   Sh.h_ImageLum->set(_eye_adapt_scale[_eye_adapt_scale_cur]);                                                                            set(&dest                                  , null, true ); Hdr.h_Hdr      ->draw(src   ); // TODO: for simple mode we could do ALPHA_MUL_KEEP (if that would be faster)
   MaterialClear();
}
INLINE Shader* GetBloomDS(Bool glow, Bool viewport_clamp, Bool half, Bool saturate) {Shader* &s=Sh.h_BloomDS[glow][viewport_clamp][half][saturate]; if(SLOW_SHADER_LOAD && !s)s=Sh.getBloomDS(glow, viewport_clamp, half, saturate); return s;}
INLINE Shader* GetBloom  (Bool dither                                             ) {Shader* &s=Sh.h_Bloom  [dither]                              ; if(SLOW_SHADER_LOAD && !s)s=Sh.getBloom  (dither                              ); return s;}
// !! Assumes that 'ColClamp' was already set !!
void RendererClass::bloom(Image &src, Image &dest, Bool dither)
{
   const Int     shift=(D.bloomHalf() ? 1 : 2);
   ImageRTDesc   rt_desc(fxW()>>shift, fxH()>>shift, IMAGERT_RGB);
   ImageRTPtrRef rt0(D.bloomHalf() ? _h0 : _q0); rt0.get(rt_desc);
   ImageRTPtrRef rt1(D.bloomHalf() ? _h1 : _q1); rt1.get(rt_desc); Bool discard=false; // we've already discarded in 'get' so no need to do it again

   if(_has_glow || D.bloomScale()) // if we have something there
   {
      set(rt0(), null, false);

      Rect ext_rect, *rect=null; // set rect, after setting render target
      if(!D._view_main.full){ext_rect=D.viewRect(); rect=&ext_rect.extend(Renderer.pixelToScreenSize((D.bloomMaximum()+D.bloomBlurs())*SHADER_BLUR_RANGE+1));} // when not rendering entire viewport, then extend the rectangle, add +1 because of texture filtering, have to use 'Renderer.pixelToScreenSize' and not 'D.pixelToScreenSize'

      Bool half=(Flt(src.h())/rt0->h() <= 2.5f); // half=scale 2, ..3.., quarter=scale 4, 2.5 was the biggest scale that didn't cause jittering when using half down-sampling
      Sh.h_BloomParams->setConditional(Vec(D.bloomOriginal(), _has_glow ? D.bloomScale()/Sqr(half ? 2 : 4) : half ? D.bloomScale() : D.bloomScale()/4, -D.bloomCut()*D.bloomScale()));
      GetBloomDS(_has_glow, !D._view_main.full, half, D.bloomSaturate())->draw(src, rect);
      if(D.bloomMaximum())
      { // 'discard' before 'set' because it already may have requested discard, and if we 'discard' manually after 'set' then we might discard 2 times
                         set(rt1(), null, false); Sh.h_MaxX->draw(rt0(), rect); discard=true; // discard next time
         rt0->discard(); set(rt0(), null, false); Sh.h_MaxY->draw(rt1(), rect);
      }
      REP(D.bloomBlurs())
      { // 'discard' before 'set' because it already may have requested discard, and if we 'discard' manually after 'set' then we might discard 2 times
         if(discard)rt1->discard(); set(rt1(), null, false); Sh.h_BlurX[D.bloomSamples()]->draw(rt0(), rect); discard=true; // discard next time
                    rt0->discard(); set(rt0(), null, false); Sh.h_BlurY[D.bloomSamples()]->draw(rt1(), rect);
      }
   }else
   {
      rt0()->clearFull();
   }
   set(&dest, null, true);
   Sh.h_ImageCol[1]->set(rt0()); MaterialClear();
   GetBloom(dither /*&& (src.highPrecision() || rt0->highPrecision())*/ && !dest.highPrecision())->draw(src); // merging 2 RT's ('src' and 'rt0') with some scaling factors will give us high precision
}
static Flt PixelsToScale(Flt pixels, Int res) {return pixels*2/res;} // 'pixels=max blur range in pixels in one direction, 'res'=total resolution
static Flt ScaleToPixels(Flt scale , Int res) {return scale*res/2 ;}
static void SetMotionBlurParams(Flt pixels) // !! this needs to be called when the RT is 'D.motionRes' sized because it needs that size and not the full size !!
{
   // see "C:\Users\Greg\SkyDrive\Code\Tests\Motion Blur.cpp"
   const Flt scale=PixelsToScale(MAX_MOTION_BLUR_PIXEL_RANGE, 1080); // scale should be small, because inside the shader we do "x/(1 +- blur.z)"
   const Flt limit=pixels/MAX_MOTION_BLUR_PIXEL_RANGE; // if we're using only 'pixels' then we have to limit from the full 0..1 range to the fraction

   Vec2 viewport_center=D._view_active.recti.centerF()/Renderer.res(), size2=D._unscaled_size*(2/scale)*limit;
   // pos=(inTex-viewport_center)*size2;
   // pos=inTex*size2 - viewport_center*size2;
   Mtn.h_MotionUVMulAdd     ->setConditional(Vec4(size2.x, size2.y, -viewport_center.x*size2.x, -viewport_center.y*size2.y));

   Mtn.h_MotionVelScaleLimit->setConditional(Vec4(D.scale()/D.viewFovTanFull().x*limit, -D.scale()/D.viewFovTanFull().y*limit, scale, limit));
   Mtn.h_MotionPixelSize    ->setConditional(Flt(MAX_MOTION_BLUR_PIXEL_RANGE)/Renderer.res()); // the same value is used for 'SetDirs' (D.motionRes) and 'Blur' (D.res)
}
// !! Assumes that 'ColClamp' was already set !!
Bool RendererClass::motionBlur(Image &src, Image &dest, Bool dither)
{
   if(stage==RS_VEL && set(_vel))return true;

   Mtn.load();

   Bool camera_object=(_vel!=null); // remember blur mode because it depends on '_vel' which gets cleared

   VecI2 res;
   res.y=Min(ByteScaleRes(fxH(), D._mtn_res), 1080); // only up to 1080 is supported, because shaders support only up to MAX_MOTION_BLUR_PIXEL_RANGE pixels, but if we enable higher resolution then it would require more pixels
   res.x=Max(1, Round(res.y*D._unscaled_size.div())); // calculate proportionally to 'res.y' and current mode aspect (do not use 'D.aspectRatio' because that's the entire monitor screen aspect, and not application window), all of this is needed because we need to have square pixels for motion blur render targets, however the main application resolution may not have square pixels
   const Flt pixels=res.y*(Flt(MAX_MOTION_BLUR_PIXEL_RANGE)/1080);
   const Int dilate_round_range=1; // this value should be the same as "Int range" in "Dilate_PS" Motion Blur shader
         Int dilate_round_steps;
   switch(D.motionDilate()) // get round dilate steps 
   {
      default:
      case DILATE_ORTHO :
      case DILATE_ORTHO2: dilate_round_steps=                                    0; break; // zero round steps
      case DILATE_MIXED : dilate_round_steps=Round(pixels/dilate_round_range*0.3f); break; // 0.3f was chosen to achieve quality/performance that's between orthogonal and round mode
      case DILATE_ROUND : dilate_round_steps=Round(pixels/dilate_round_range     ); break; // 'Round' should be enough
   }
   Int  dilate_round_pixels=dilate_round_steps*dilate_round_range, dilate_ortho_pixels=Max(Round(pixels)-dilate_round_pixels, 0);
   Bool diagonal=(D.motionDilate()==DILATE_ORTHO2);
 C MotionBlur::Pixel *ortho=Mtn.pixel(dilate_ortho_pixels, diagonal); if(ortho)dilate_ortho_pixels=ortho->pixels; // reset 'dilate_ortho_pixels' because it can actually be bigger based on what is supported
   const Int total_pixels=dilate_round_pixels+dilate_ortho_pixels; // round+ortho
   DEBUG_ASSERT(D.motionDilate()==DILATE_ROUND ? dilate_ortho_pixels==0 : true, "Ortho should be zero in round mode");

   ImageRTDesc rt_desc(res.x, res.y, D.signedVelRT() ? IMAGERT_RGB_S : IMAGERT_RGB); // Alpha not used (XY=Dir, Z=Dir.length)
   ImageRTPtr  converted(rt_desc);
   Shader     *shader;
   if(camera_object)shader=Mtn.h_Convert[true ][!D._view_main.full];else
   {                shader=Mtn.h_Convert[false][!D._view_main.full];
                    SetFastVel();
   }
   set(converted(), null, false);
   Rect ext_rect, *rect=null;
   if(D._view_main.full)REPS(_eye, _eye_num)
   {
      Rect *eye_rect=setEyeParams();
      SetMotionBlurParams(pixels); // call after 'setEyeParams' because we need to set 'D._view_active'
      shader->draw(_vel, eye_rect);
   }else
   {
      SetMotionBlurParams(pixels);
      ext_rect=D.viewRect(); rect=&ext_rect.extend(Renderer.pixelToScreenSize(total_pixels+1)); // when not rendering entire viewport, then extend the rectangle because of 'Dilate' and 'SetDirs' checking neighbors, add +1 because of texture filtering, we can ignore stereoscopic there because that's always disabled for not full viewports, have to use 'Renderer.pixelToScreenSize' and not 'D.pixelToScreenSize'
      shader->draw(_vel, rect);
   }
  _vel.clear();

   if(stage==RS_VEL_CONVERT && set(converted))return true;

   ImageRTPtr dilated=converted, helper;

   if(camera_object) // we apply Dilation only in MOTION_CAMERA_OBJECT mode, for MOTION_CAMERA it's not needed
   {
      rt_desc.rt_type=(D.signedVelRT() ? IMAGERT_RGB_S : IMAGERT_RGB); // Alpha not used (XY=Dir, Z=Max Dir length of all nearby pixels)
      // we need to apply Dilation, for example, if a ball object has movement, then it should be blurred, and also pixels around the ball should be blurred too
      // however velocity for pixels around the ball (the background) may be zero, so we need to apply dilation and apply the velocity onto neighboring pixels
      // remember that it doesn't make sense to perform depth based tests on not first steps "dilated!=converted",
      // because the depth tests compare only center pixels and the pixels around it, however if in step #0 we dilate velocity from pixel with X coordinate=2,
      // onto pixel X=1, then in step #1, we dilate pixel X=1 onto X=0 (the velocity is carried over from pixel X=2 into X=0, however we don't store information
      // about what was the depth of that velocity, so in step #1 we're comparing depth of pixel X=0 with X=1, however we're using velocity from X=2,
      // so we should have information about X=2 depth, however there's no easy way to do that
      // TODO: check if depth tests are useful for the first step ("dilated==converted")

      if(ortho) // do orthogonal first (this will result in slightly less artifacts when the camera is moving)
      {
         helper .get(rt_desc); set(helper (), null, false); ortho->h_DilateX[diagonal]->draw(dilated(), rect);
         dilated.get(rt_desc); set(dilated(), null, false); ortho->h_DilateY[diagonal]->draw(helper (), rect);
      }
      REP(dilate_round_steps)
      {
         if(!helper || helper==converted)helper.get(rt_desc);else helper->discard(); // don't write to original 'converted' in the next step, because we need it later
         set(helper(), null, false); Mtn.h_Dilate->draw(dilated(), rect); Swap(dilated, helper);
      }
   }
   if(stage==RS_VEL_DILATED && set(dilated))return true;

   // check how far can we go (remove leaks)
   Sh.h_ImageCol[1]->set(dilated()); MaterialClear();
   rt_desc.rt_type=(D.signedVelRT() ? IMAGERT_RGBA_S : IMAGERT_RGBA); // XY=Dir#0, ZW=Dir#1
   helper.get(rt_desc); // we always need to call this because 'helper' can be set to 'converted'
   set(helper(), null, false); Mtn.h_SetDirs[!D._view_main.full]->draw(converted(), rect);
   if(stage==RS_VEL_LEAK && set(helper))return true;

   Sh.h_ImageCol[1]->set(helper()); MaterialClear();
   set(&dest, null, true);
   Mtn.h_Blur[dither /*&& src.highPrecision()*/ && !dest.highPrecision()]->draw(src); // here blurring may generate high precision values

   return false;
}
INLINE Shader* GetDofDS(Bool clamp , Bool realistic, Bool half) {Shader* &s=Dof.h_DofDS[clamp ][realistic][half]; if(SLOW_SHADER_LOAD && !s)s=Dof.getDS(clamp , realistic, half); return s;}
INLINE Shader* GetDof  (Bool dither, Bool realistic           ) {Shader* &s=Dof.h_Dof  [dither][realistic]      ; if(SLOW_SHADER_LOAD && !s)s=Dof.get  (dither, realistic      ); return s;}
// !! Assumes that 'ColClamp' was already set !!
void RendererClass::dof(Image &src, Image &dest, Bool dither)
{ // Depth of Field shader does not require stereoscopic processing because it just reads the depth buffer
   const Int   shift=1; // half
   ImageRTDesc rt_desc(fxW()>>shift, fxH()>>shift, src.highPrecision() ? IMAGERT_RGBA_H : IMAGERT_RGBA); // here Alpha is used to store amount of Blur, use high precision if source is to don't lose smooth gradients when having full blur (especially visible on sky), IMAGERT_RGBA_H vs IMAGERT_RGBA has no significant difference on GeForce 1050Ti
   ImageRTPtr  rt0(rt_desc),
               rt1(rt_desc);

   Bool half=(Flt(src.h())/rt0->h() <= 2.5f); // half=scale 2, ..3.., quarter=scale 4, 2.5 was the biggest scale that didn't cause jittering when using half down-sampling
   Dof.load();
 C DepthOfField::Pixel &pixel=Dof.pixel(Round(fxH()*(5.0f/1080))); // use 5 pixel range blur on a 1080 resolution

   Flt range_inv=1.0f/Max(D.dofRange(), EPS);
   Dof.h_DofParams->setConditional(Vec4(D.dofIntensity(), D.dofFocus(), range_inv, -D.dofFocus()*range_inv));

   set(rt0(), null, false); Rect ext_rect, *rect=null; if(!D._view_main.full){ext_rect=D.viewRect(); rect=&ext_rect.extend(Renderer.pixelToScreenSize(pixel.pixels+1));} // when not rendering entire viewport, then extend the rectangle because of blurs checking neighbors, add +1 because of texture filtering, we can ignore stereoscopic there because that's always disabled for not full viewports, have to use 'Renderer.pixelToScreenSize' and not 'D.pixelToScreenSize' and call after setting RT
     GetDofDS(!D._view_main.full, D.dofFocusMode(), half)->draw(src  , rect);
   set(rt1(), null, false);                 pixel.h_BlurX->draw(rt0(), rect);
   set(rt0(), null, false); rt0->discard(); pixel.h_BlurY->draw(rt1(), rect);

   set(&dest, null, true);
   Sh.h_ImageCol[1]->set(rt0()); MaterialClear();
   GetDof(dither && (src.highPrecision() || rt0->highPrecision()) && !dest.highPrecision(), D.dofFocusMode())->draw(src);
}
INLINE Shader* GetCombine       () {Shader* &s=Sh.h_Combine       ; if(SLOW_SHADER_LOAD && !s)s=Sh.get("Combine"       ); return s;}
INLINE Shader* GetCombineMS     () {Shader* &s=Sh.h_CombineMS     ; if(SLOW_SHADER_LOAD && !s)s=Sh.get("CombineMS"     ); return s;}
INLINE Shader* GetCombineSS     () {Shader* &s=Sh.h_CombineSS     ; if(SLOW_SHADER_LOAD && !s)s=Sh.get("CombineSS"     ); return s;}
INLINE Shader* GetCombineSSAlpha() {Shader* &s=Sh.h_CombineSSAlpha; if(SLOW_SHADER_LOAD && !s)s=Sh.get("CombineSSAlpha"); return s;}
void RendererClass::Combine()
{
   Bool alpha_premultiplied=false;
   if(_ds->multiSample() && Sh.h_CombineMS) // '_col' could have been resolved already, so check '_ds' instead
   {
      ImageRTPtr resolve=_final; if(resolve->compatible(*_ds_1s))D.alpha(ALPHA_BLEND);else
      {
         resolve.get(ImageRTDesc(_ds_1s->w(), _ds_1s->h(), IMAGERT_RGBA)); // resolve to a temp RT and apply that later, here Alpha is used for storing image opacity
         D.alpha(ALPHA_SETBLEND_SET); alpha_premultiplied=true;
      }
      set(resolve(), _ds_1s(), true, NEED_DEPTH_READ);
      if(hasStencilAttached())
      {
         D.stencil(STENCIL_MSAA_TEST, STENCIL_REF_MSAA); GetCombineMS()->draw(_col);
                         D.stencilRef(0               ); GetCombine  ()->draw(_col);
         D.stencil(STENCIL_NONE     );
      }else
      {
         GetCombineMS()->draw(_col); // we have to run all at multi-sampled frequency
      }
     _col=resolve;
   }else
   if(_col->w()<_final->w()) // resolve first to small buffer
   {
      ImageRTPtr resolve=_col; resolve.get(ImageRTDesc(_col->w(), _col->h(), IMAGERT_RGBA)); // here Alpha is used for storing image opacity
      set(resolve(), null, false); // request full viewport because we will need it below, when drawing black borders
      D.alpha(ALPHA_SETBLEND_SET); alpha_premultiplied=true;
      GetCombine()->draw(_col, &D.viewRect());
     _col=resolve;
   }else
   if(_ds->w()>_final->w()) // resolve Alpha first to full buffer, check '_ds' because '_col' could have been already downsampled
   {
      ImageRTPtr alpha; alpha.get(ImageRTDesc(_ds_1s->w(), _ds_1s->h(), IMAGERT_ONE));
      set(alpha(), null, true);
      D.alpha(ALPHA_NONE);
      GetCombineSSAlpha()->draw();
      Sh.h_ImageCol[1]->set(alpha); MaterialClear();
      set(_final(), null, true);
      D.alpha(ALPHA_BLEND);
      GetCombineSS()->draw(_col);
     _col=_final;
   }else
   {
      set(_final(), null, true);
      D.alpha(ALPHA_BLEND);
      GetCombine()->draw(_col);
     _col=_final;
   }
   if(_col!=_final)
   {
      Shader *shader=null;
      Bool    upscale_none=false;
      if(_col->w()<_final->w()) // upscale
      {
         Bool dither=(D.dither() && !_final->highPrecision()); // disable dithering if destination has high precision
         Int  pixels=1+1; // 1 for filtering + 1 for borders (because source is smaller and may not cover the entire range for dest, for example in dest we want 100 pixels, but 1 source pixel covers 30 dest pixels, so we may get only 3 source pixels covering 90 dest pixels)
         switch(D.densityFilter()) // remember that cubic shaders are optional and can be null if failed to load
         {
            case FILTER_NONE:
            {
               upscale_none=true;
               pixels=1; // 1 for borders
            #if DX9
               Sh.h_ImageCol[0]->_sampler=&SamplerPoint;
            #elif DX11
               SamplerPoint.setPS(SSI_DEFAULT);
            #elif GL // in GL 'ShaderImage.Sampler' does not affect filtering, so modify it manually
               D.texBind(GL_TEXTURE_2D, _col->_txtr); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
            #endif
            }break;

            case FILTER_CUBIC_FAST       :
            case FILTER_CUBIC_FAST_SMOOTH:
            case FILTER_CUBIC_FAST_SHARP :
               pixels=2+1; // 2 for filtering + 1 for borders
               shader=(dither ? Sh.h_DrawTexCubicFastD : Sh.h_DrawTexCubicFast1); // this doesn't need to check for "_col->highPrecision" because resizing and cubic filtering generates smooth values
            break;

            case FILTER_BEST       :
            case FILTER_CUBIC      :
            case FILTER_CUBIC_SHARP:
               pixels=3+1; // 3 for filtering + 1 for borders
               Sh.loadCubicShaders(); shader=(dither ? Sh.h_DrawTexCubicD : Sh.h_DrawTexCubic1); // this doesn't need to check for "_col->highPrecision" because resizing and cubic filtering generates smooth values
            break;
         }
         if(!D._view_main.full)
         {
            set(_col(), null, false); // need full viewport
            D.viewRect().drawBorder(TRANSPARENT, Renderer.pixelToScreenSize(-pixels)); // draw black border around the viewport to clear and prevent from potential artifacts on viewport edges
         }
      }
      if(!shader)shader=Sh.h_Draw; // ignore dithering for simple filtering because we've resolved this to low precision RT

      set(_final(), null, true);
      D.alpha(alpha_premultiplied ? ALPHA_MERGE : ALPHA_BLEND);
      shader->draw(_col);
      if(upscale_none)
      {
         #if DX9
            Sh.h_ImageCol[0]->_sampler=null;
         #elif DX11
            SamplerLinearClamp.setPS(SSI_DEFAULT);
         #elif GL
            if(!GL_ES || ImageTI[_col->hwType()].precision<IMAGE_PRECISION_32) // GLES2/3 don't support filtering F32 textures, without this check reading from F32 textures will fail - https://www.khronos.org/registry/OpenGL-Refpages/es3.0/html/glTexImage2D.xhtml
               {D.texBind(GL_TEXTURE_2D, _col->_txtr); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);}
         #endif
      }
     _col=_final;
   }
}
/******************************************************************************/
inline void TexLod(Int lod)
{
#if DX9
   REP(16)D3D->SetSamplerState(i, D3DSAMP_MAXMIPLEVEL, lod); // this is 'D3D11_SAMPLER_DESC.MinLOD'
#elif DX11
   // handled in 'CreateAnisotropicSampler'
#endif
}
inline void TexLodLimit() {if(D.texLod())TexLod(D.texLod());} // this   sets the 'D.texLod' into device settings, it's handled only during rendering to affect only 3D graphics and not the 2D interface (DX11 version operates on 'AnisotropicSampler' which is used only in 3D graphics)
inline void TexLodFull () {if(D.texLod())TexLod(         0);} // this resets the                 device settings to have full quality (all LOD's)

void RendererClass::cleanup()
{
  _ds          .clear();
//_ds_1s       .clear(); do not clear '_ds_1s' because 'setDepthForDebugDrawing' may be called after rendering finishes, also 'capture' makes use of it
  _nrm         .clear();
  _vel         .clear();
  _lum         .clear();
  _lum_1s      .clear();
  _shd_1s      .clear();
  _shd_ms      .clear();
  _water_col   .clear();
  _water_nrm   .clear();
  _water_ds    .clear();
  _water_lum   .clear();
  _vol         .clear();
  _ao          .clear();
  _mirror_rt   .clear();
  _outline_rt  .clear();
  _sky_coverage.clear();
   Lights.clear();
   ClearInstances();
}
RendererClass& RendererClass::operator()(void (&render)())
{
#if DEBUG
   if(Kb.b(KB_NP0))stage=RS_DEPTH;else
   if(Kb.b(KB_NP1))stage=RS_COLOR;else
   if(Kb.b(KB_NP2))stage=RS_NORMAL;else
   if(Kb.b(KB_NP3))stage=RS_VEL;else
   if(Kb.b(KB_NP4))stage=(Kb.b(KB_NP5) ? RS_LIGHT_AO : RS_LIGHT);else
   if(Kb.b(KB_NP5))stage=RS_AO;else
   if(Kb.b(KB_NP6))stage=RS_LIT_COLOR;else
   if(Kb.b(KB_NP7))stage=RS_WATER_COLOR;else
   if(Kb.b(KB_NP8))stage=RS_WATER_NORMAL;else
   if(Kb.b(KB_NP9))stage=RS_WATER_LIGHT;else
   if(Kb.b(KB_NPDIV))stage=RS_REFLECTION;else
   if(Kb.br(KB_NP0) || Kb.br(KB_NP1) || Kb.br(KB_NP2) || Kb.br(KB_NP3) || Kb.br(KB_NP4) || Kb.br(KB_NP5) || Kb.br(KB_NP6) || Kb.br(KB_NP7) || Kb.br(KB_NP8) || Kb.br(KB_NP9)
   || Kb.br(KB_NPDIV))stage=RS_DEFAULT;
#endif

   // Shadow Settings
   Sh.h_ShdRange->setConditional(T._shd_range=D._shd_frac*D.viewRange());
   {
      Flt from=T._shd_range*D._shd_fade,
          to  =T._shd_range;
      if(from>=D.viewRange()-EPSL) // disabled
      {
         Sh.h_ShdRangeMulAdd->setConditional(Vec2(0));
      }else
      {
         MAX(to, from+0.01f);
         from*=from; to*=to;
         Flt mul=1/(to-from), add=-from*mul;
         Sh.h_ShdRangeMulAdd->setConditional(Vec2(mul, add));
      }
   }

   // vertex fog settings
   {
      Flt from=D.viewRange()*simpleVertexFogStart(),
          to  =D.viewRange()*simpleVertexFogEnd  (); MAX(to, from+0.01f);

      // remember that GL ES MP precision is in range Pow2(2, -14) .. Pow2(2, 14) : (0.000061035 .. 16384)

   #if 1 // quadratic mul add (currently used) (does not qualify for MP), must be HP because of high values !!
    //Flt fog_intensity=Length2(O.pos)*VertexFogMulAdd.x+VertexFogMulAdd.y;
    //                0=Sqr    (from )*VertexFogMulAdd.x+VertexFogMulAdd.y;
    //                1=Sqr    (to   )*VertexFogMulAdd.x+VertexFogMulAdd.y;
      Flt mul, add;
      if(simpleVertexFogStart()>=1){mul=0; add=1;}else // no fog
      { // fog
         to*=to; from*=from;
         mul=1/(to-from); add=-from*mul;

         // now reverse (fog_intensity -> 1-fog_intensity)
         mul=-mul  ; // for viewRange(350).fogRange(1, 1) mul = ~ -0.14269789
         add=-add+1; // for viewRange(350).fogRange(1, 1) add = ~  17481.490
      }
   #elif 0 // linear mul add (does not qualify for MP)
    //Flt fog_intensity=Length(O.pos)*VertexFogMulAdd.x+VertexFogMulAdd.y;
    //                0=      (from )*VertexFogMulAdd.x+VertexFogMulAdd.y;
    //                1=      (to   )*VertexFogMulAdd.x+VertexFogMulAdd.y;
      Flt mul=1/(to-from), add=-from*mul;

      // now reverse (fog_intensity -> 1-fog_intensity)
      mul=-mul  ; // for viewRange(350).fogRange(1, 1) mul = ~ -  100
      add=-add+1; // for viewRange(350).fogRange(1, 1) add = ~ -35000
   #elif 0 // quadratic add mul (does not qualify for MP)
    //Flt fog_intensity=(Length2(O.pos)+VertexFogMulAdd.y)*VertexFogMulAdd.x;
    //                0=(Sqr    (from )+VertexFogMulAdd.y)*VertexFogMulAdd.x;
    //                1=(Sqr    (to   )+VertexFogMulAdd.y)*VertexFogMulAdd.x;
      //Flt add=-Sqr(from), mul=1.0f/(Sqr(to)-Sqr(from));

      // reversed (fog_intensity -> 1-fog_intensity)
      Flt add=-Sqr(to),                 // for viewRange(350).fogRange(1, 1) add = ~ -122507    , for viewRange(2000).fogRange(0, 1); add = ~ -4000000
          mul=1.0f/(Sqr(from)-Sqr(to)); // for viewRange(350).fogRange(1, 1) mul = ~ -0.14271572, for viewRange(2000).fogRange(0, 1); mul = ~ -2.5000000e-007
   #else // linear add mul (qualifies for MP) (however since Length(Vec) is required, it can't be MP because Length(Vec(128)) = Sqrt(Dot(Vec(128), Vec(128))) where Dot would result in 16384 values which is the MP limit (and making only first 128 meters usable)
    //Flt fog_intensity=(Length(O.pos)+VertexFogMulAdd.y)*VertexFogMulAdd.x;
    //                0=(      (from )+VertexFogMulAdd.y)*VertexFogMulAdd.x;
    //                1=(      (to   )+VertexFogMulAdd.y)*VertexFogMulAdd.x;
      //Flt add=-from, mul=1.0f/(to-from);

      // reversed (fog_intensity -> 1-fog_intensity)
      Flt add=-to,            // for viewRange(350).fogRange(1, 1) add = ~ -350, for viewRange(2000).fogRange(0, 1); add = ~ -2000
          mul=1.0f/(from-to); // for viewRange(350).fogRange(1, 1) mul = ~ -100, for viewRange(2000).fogRange(0, 1); mul = ~ -0.0005
   #endif

      Sh.h_VertexFogMulAdd->setConditional(Vec2(mul, add));
      Sh.h_VertexFogColor ->setConditional(_vtx_fog_color);
   }

   // prepare
  _render     =render;
  _stereo     =(VR.active() && D._view_main.full && !combine && !target && !_get_target && D._allow_stereo); // use stereo only for full viewport, if we're not combining (games may use combining to display 3D items/characters in Gui)
  _eye_num    =_stereo+1; // _stereo ? 2 : 1
  _has_glow   =false;
  _fur_is     =false;
  _mirror_want=false;
  _outline    =0;
  _final      =(target ? target : _stereo ? VR.getNewRender() : _cur_main);

   if(VR.active())D.setViewFovTan(); // !! call after setting _stereo and _render !!

   // !! it is important to call this as the first thing during rendering, because 'Game.WorldManager.draw' assumes so, if this needs to be changed then rework 'Game.WorldManager.draw' !!
   // set water
   Water.prepare();

#define MEASURE(x) if(_t_measure){D.finish(); Dbl c=Time.curTime(); x+=c-t; t=c;}

   // render
   {
      Dbl t; Flt temp, water;
      if(_t_measure){D.finish(); t=Time.curTime(); temp=water=0; _t_measures[0]++;}

      TexLodLimit();
      if(reflection())goto finished; MEASURE(_t_reflection[1])

      prepare(); MEASURE(_t_prepare[1])
      solid  (); MEASURE(_t_solid  [1])
   #if GL_ES && !WEB // we need to make sure that depth RT is flushed to depth texture on tile-based deferred renderers, this is because on those devices the RT's (including depth buffer) are stored in fast on-chip memory and to be able to read from them, we need to flush them to the texture memory. This is done after reading solid's and before we may read from the depth buffer. No need to do on WEB because there we can never read from depth while writing to it.
      if(canReadDepth())
         if(D.edgeDetect() || D.particlesSoft() || Sky.wantDepth() || Clouds.wantDepth() || Fog.draw/* || Sun.wantDepth()*/) // here we need to check only effects that may read from depth without changing any RT's, because on any RT change the depth is flushed. Sun doesn't bind DS to FBO when drawing rays. TODO: we wouldn't need to do this if all shaders reading from the depth would use gl_LastFragDepth - https://www.khronos.org/registry/OpenGL/extensions/ARM/ARM_shader_framebuffer_fetch_depth_stencil.txt
      { // unbinding will force the flush (calling just 'glFlush' was not enough)
         glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT  , GL_RENDERBUFFER, 0);
         glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER, 0);
        _cur_ds=null; _cur_ds_id=0;
      }
   #endif
      overlay(); MEASURE(_t_overlay[1])

      // set background sky pixels not drawn by foreground object meshes (everything at the end of depth buffer), this needs to be done before 'blend' because fur may set depth buffer without setting velocities, and before water surface
      {
         Bool clear_nrm=(_nrm && !NRM_CLEAR_START && ClearNrm());
         if(  clear_nrm || _vel)
         {
            D.alpha    (ALPHA_NONE);
            D.depth2DOn(true);
            if(clear_nrm)
            {
               set(_nrm(), _ds(), true); Sh.clear(D.signedNrmRT() ? SNRM_CLEAR : NRM_CLEAR); // use DS because we use it for 'D.depth2D' optimization
            }
            if(_vel)
            {
               Mtn.load(); set(_vel(), _ds(), true); Mtn.h_ClearSkyVel->draw(null); // use DS because we use it for 'D.depth2D' optimization
            }
            D.depth2DOff();
         }
      }

      if(stage)switch(stage)
      {
         case RS_COLOR : if(_cur_type==RT_DEFERRED && set(_col  ))goto finished; break; // only on deferred renderer the color is unlit
         case RS_NORMAL: if(                          set(_nrm  ))goto finished; break;
         case RS_DEPTH : if(                          set(_ds_1s))goto finished; break; // this may be affected by test blend materials later
      }
      waterPreLight(); MEASURE(water)
      light        (); MEASURE(_t_light[1])
      if(stage)switch(stage)
      {
         case RS_LIT_COLOR: if(set(_col))goto finished; break;

         case RS_LIGHT: if(_lum_1s)
         {
            if(_ao) // if there's AO available, then it means that ambient hasn't been applied yet to '_lum_1s', to keep consistency with '_lum_1s' when AO is disabled, apply flat ambient here
            {
               set(_lum_1s(), null, true);
               D.alpha(ALPHA_ADD);
               Sh.clear(Vec4(D.ambientColor(), 0));
            }
            if(set(_lum_1s))goto finished;
         }break;

         case RS_AO: if(_ao)
         {
            set(_final(), null, true); D.alpha(ALPHA_NONE); VI.shader(GPU_API(Sh.h_DrawTexW, Sh.h_DrawTexX, Sh.h_DrawTexX)); _ao->drawFs(FIT_FULL, FILTER_LINEAR); // DX9 uses A8 while others use R8 RT
            goto finished;
         }break;

         case RS_LIGHT_AO: if(_lum_1s)
         {
            if(_ao)
            {
               set(_lum_1s(), null, true); D.alpha(ALPHA_ADD);
               Sh.h_Color[0]->set(Vec4(D.ambientColor(), 0));
               Sh.h_Color[1]->set(Vec4Zero                 );
               Sh.GPU_API(h_DrawTexWC, h_DrawTexXC, h_DrawTexXC)->draw(_ao); // DX9 uses A8 while others use R8 RT
            }
            if(set(_lum_1s))goto finished;
         }break;
      }
      if(waterPostLight())goto finished; MEASURE(_t_water[1])if(_t_measure)_t_water[1]+=water;
         edgeDetect    ();

      MEASURE(temp)

      TexLodFull();
      sky(); MEASURE(_t_sky[1])
      TexLodLimit();

      blend(); MEASURE(_t_blend[1])
    /*if(stage)switch(stage)
      {
         case RS_DEPTH: if(set(_ds_1s))goto finished; break;
      }*/

      palette(0);
      palette(1); MEASURE(_t_palette[1])
      behind ( ); MEASURE(_t_behind [1])
      outline( );

      // 2D
      TexLodFull   ();
      finalizeGlow (); // !! assume that nothing below can trigger glow on the scene !!
      applyOutline ();
      edgeSoften   (); MEASURE(temp)
      // all following effects below that modify '_col' (and not create new '_col') should call 'downSample' first, otherwise they should call 'resolveMultiSample'
      if(AstroDrawRays())goto finished; MEASURE(_t_rays[1])
      volumetric   (); MEASURE(_t_volumetric[1])
      refract      (); MEASURE(_t_refract[1])
      postProcess  (); MEASURE(_t_post_process[1])
   finished:;
   }

   // cleanup
   {
      TexLodFull();
     _render=null; // this specifies that we're outside of Rendering
     _final.clear();
      D.alpha(ALPHA_BLEND); mode(RM_SIMPLE);
      set(_cur_main, _cur_main_ds, true);

      Sky.setFracMulAdd(); // in case user draws billboards/particles outside of Renderer, call before 'cleanup' because this relies on depth buffer being available
      cleanup();

      if(VR.active())
      {
         D.setViewFovTan(); // !! call after clearing _render !!
         if(_stereo)
         {
            D.clearCol(); // normally after rendering has finished we expect to draw on top of the result, however for stereoscopic, we've rendered to a separate render target, that won't be used for 2D drawing now, instead we're now back to '_cur_main' which is not yet initialized, so we need to clear it here
            // restore settings to centered (not one of the eyes)
            D._view_main.setShader().setProjMatrix(false); // viewport
            SetCam(ActiveCam.matrix, false); Frustum=FrustumMain; // camera and frustum
         }
      }
   }
   if(_shader_param_changes)Exit("'LinkShaderParamChanges' was called without 'UnlinkShaderParamChanges'.");
   return T;
}
ImageRTPtr RendererClass::get(void (&render)())
{
  _get_target=true ; T(render);
  _get_target=false; ImageRTPtr temp=_col; _col.clear(); return temp;
}
Bool RendererClass::reflection()
{
   // render reflection
   if(_mirror_want)
   {
      // remember current settings and disable fancy effects
      Camera           cam            =ActiveCam            ;
      Bool             combine        =T. combine           ;                     T. combine        =false             ;
      Bool             stereo         =T._stereo            ;                     T._stereo         =false             ; // don't reset FOV because we want to render the reflection with the same exact FOV settings as only one eye, because this reflection will be reused for both eyes
      Int              eye_num        =T._eye_num           ;                     T._eye_num        =1                 ;
      RENDER_TYPE      render_type    =T._cur_type          ;                     T._cur_type       =Water.reflectionRenderer();
      Bool             hp_col_rt      =D.highPrecColRT    ();                     D._hp_col_rt      =false             ;
      Bool             hp_nrm_rt      =D.highPrecNrmRT    ();                     D._hp_nrm_rt      =false             ;
      Bool             hp_lum_rt      =D.highPrecLumRT    ();                     D._hp_lum_rt      =false             ;
      IMAGE_PRECISION  lit_col_rt_prec=D.litColRTPrecision();                     D._lit_col_rt_prec=IMAGE_PRECISION_8 ;
      Bool             hp_nrm_calc    =D.highPrecNrmCalc  ();                     D.highPrecNrmCalc (false            );
      Bool             eye_adapt      =D.eyeAdaptation    ();                     D.eyeAdaptation   (false            );
      Bool             vol_light      =D.volLight         ();                     D.volLight        (false            ); // if it will be enabled, then calling 'volumetric' is required and clearing Renderer._vol_is
      AMBIENT_MODE     amb_mode       =D.ambientMode      ();                     D.ambientMode     (AMBIENT_FLAT     );
      MOTION_MODE      mtn_mode       =D.motionMode       ();                     D.motionMode      (MOTION_NONE      );
      SHADOW_MODE      shd_mode       =D.shadowMode       (); if(!_mirror_shadows)D.shadowMode      (SHADOW_NONE      );
      Byte             shd_soft       =D.shadowSoft       ();                     D.shadowSoft      (0                );
      Bool             shd_jitter     =D.shadowJitter     ();                     D.shadowJitter    (false            );
      EDGE_SOFTEN_MODE edge_soft      =D.edgeSoften       ();                     D._edge_soften    =EDGE_SOFTEN_NONE  ;
      Bool             tesselation    =D.tesselationAllow ();                     D.tesselationAllow(false            );
      Byte             density        =D.densityByte      ();                     D.densityFast     (Mid(((D.densityByte()+1)>>_mirror_resolution)-1, 0, 255));

      // set new settings
     _mirror=true;                                                                 // set before viewport and camera
      // <- change viewport here if needed
      ConstCast(ActiveCam).matrix.mirror(_mirror_plane); SetCam(ActiveCam.matrix); // set mirrored camera
      D.clipPlane(_mirror_plane);                                                  // set clip plane after viewport and camera
      Sh.h_AllowBackFlip->set(1);                                                  // disable back flipping

      // render !! adding new modes here will require setting there D.clipPlane !!
      prepare();
      solid  ();
      light  ();
      sky    ();
      blend  ();
      AstroDrawRays();

      // cleanup
      cleanup(); Swap(_mirror_rt, _col); // 'cleanup' clears '_mirror_rt' so we need to set it after calling the function

      // restore effects (before viewport and camera, because stereoscopic affects viewport fov)
      T. combine        =combine        ;
      T._stereo         =stereo         ;
      T._eye_num        =eye_num        ;
      T._cur_type       =render_type    ;
      D._hp_col_rt      =hp_col_rt      ;
      D._hp_nrm_rt      =hp_nrm_rt      ;
      D._hp_lum_rt      =hp_lum_rt      ;
      D._lit_col_rt_prec=lit_col_rt_prec;
      D.highPrecNrmCalc (hp_nrm_calc   );
      D.eyeAdaptation   (eye_adapt     );
      D.volLight        (vol_light     );
      D.ambientMode     (amb_mode      );
      D.motionMode      (mtn_mode      );
      D.shadowMode      (shd_mode      );
      D.shadowSoft      (shd_soft      );
      D.shadowJitter    (shd_jitter    );
      D._edge_soften    =edge_soft      ;
      D.tesselationAllow(tesselation   );
      D.densityFast     (density       );

      // restore previous settings (mirror, viewport and camera)
      Sh.h_AllowBackFlip->set(-1); // re-enable back flipping
     _mirror=false;                // !! set before viewport and camera, because it affects the Frustum, and after 'cleanup' !!
      // <- reset viewport here if needed
      cam.set();                   // camera

      if(stage==RS_REFLECTION && set(_mirror_rt))return true;
   }
   return false;
}
/******************************************************************************/
Bool RendererClass:: hasEdgeSoften()C {return wantEdgeSoften() && !fastCombine();}
Bool RendererClass::wantEdgeSoften()C
{
   switch(D.edgeSoften())
   {
      case EDGE_SOFTEN_FXAA: return Sh.h_FXAA!=null;
   #if SUPPORT_MLAA
      case EDGE_SOFTEN_MLAA: return Sh.h_MLAAEdge && Sh.h_MLAABlend && Sh.h_MLAA && _mlaa_area;
   #endif
      case EDGE_SOFTEN_SMAA: return Sh.h_SMAAEdge && Sh.h_SMAABlend && Sh.h_SMAA && _smaa_area && _smaa_search;
   }
   return false;
}
Bool RendererClass::wantDepth         ()C {return wantMotion() || wantDof() || D.aoWant() || D.edgeDetect() || D.particlesSoft() || D.volLight() || Sky.wantDepth() || Clouds.wantDepth() || Fog.draw || Sun.wantDepth() || !Water.max1Light();} // TODO: even though we check all known things here, there are still some things about we don't know up-front (like local fog, decals, Image.drawVolume, ..)
Bool RendererClass::canReadDepth      ()C {return        _ds->depthTexture();} // have to check '_ds' because this is the original source depth, it can be multi-sampled (in that case it's possible depth reading won't be available), but '_ds_1s' is 1-sampled (and could have depth-reads even if '_ds' doesn't)
Bool RendererClass::safeCanReadDepth  ()C {return _ds && _ds->depthTexture();}
Bool RendererClass::hasStencilAttached()C {return hasDepthAttached() && ImageTI[_cur_ds->hwType()].s;}
Bool RendererClass::hasDepthAttached  ()C
{
#if GL
   return _cur_ds && _cur_ds->_txtr==_cur_ds_id; // check both '_cur_ds' and '_cur_ds_id' because '_cur_ds_id' will be 0 when Image is a RenderBuffer or temporarily unbound Texture (only Textures can be temporarily unbound), this will work OK for RenderBuffers because both '_cur_ds_id' and 'cur_ds->_txtr' will be zero
#else
   return _cur_ds_id!=null; // we have to check '_cur_ds_id' because on DX10 it can be null if read-only is not supported
#endif
}
Bool RendererClass::canReadDepth1S()C
{
   return canReadDepth()
   #if DX11
      && _ds_1s->_dsv !=_cur_ds_id // we always read from '_ds_1s', we can do that only if it's not bound as Depth RT, on purpose we check '_dsv' and not '_rdsv' because '_rdsv' IS allowed
   #elif WEB
      && _ds_1s->_txtr!=_cur_ds_id // we always read from '_ds_1s', we can do that only if it's not bound as Depth RT
   #endif
   ;
}

Bool RendererClass::wantBloom   ()C {return D.bloomUsed();}
Bool RendererClass:: hasBloom   ()C {return wantBloom() && !fastCombine();}
Bool RendererClass::wantEyeAdapt()C {return D.eyeAdaptation() && _eye_adapt_scale[0].is();}
Bool RendererClass:: hasEyeAdapt()C {return wantEyeAdapt() && !fastCombine();}
Bool RendererClass::wantMotion  ()C {return D.motionMode() && FovPerspective(D.viewFovMode());}
Bool RendererClass:: hasMotion  ()C {return wantMotion() && canReadDepth() && !fastCombine();}
Bool RendererClass::wantDof     ()C {return D.dofWant();}
Bool RendererClass:: hasDof     ()C {return wantDof() && canReadDepth() && !fastCombine();}
Bool RendererClass:: hasAO      ()C {return D.aoWant() && canReadDepth() && !fastCombine();}

Bool RendererClass::fastCombine      ()C {return combine      && _col==_final;}
Bool RendererClass::slowCombine      ()C {return combine      && !fastCombine() && canReadDepth();}
Bool RendererClass::hasVolLight      ()C {return D.volLight() && canReadDepth();}
Bool RendererClass::anyDeferred      ()C {return type()==RT_DEFERRED || Water.reflectionRenderer()==RT_DEFERRED;}
Bool RendererClass::anyForward       ()C {return type()==RT_FORWARD  || Water.reflectionRenderer()==RT_FORWARD ;}
Bool RendererClass::lowDepthPrecision()C {return _main_ds.type()==IMAGE_D16;} // this can happen only on Android, and there we do have information about the depth buffer
/******************************************************************************/
Bool RendererClass::set(C ImageRTPtr &image)
{
   if(image)
   {
      if(ImageTI[image->hwType()].d) // depth
      {
         if(!image->depthTexture())return false; // can't read
         set(_final(), null, true); D.alpha(ALPHA_NONE); Sh.get("DrawDepth")->draw(image);
      }else
      if(image->type()!=IMAGE_R8G8B8A8_SIGN
      && image->type()!=IMAGE_R8G8_SIGN
      && image->type()!=IMAGE_R8_SIGN)image->copyHw(*_final, false, D.viewRect());else
      {
         set(_final(), null, true); D.alpha(ALPHA_NONE);
         // we need to draw image*0.5f+0.5f
         Sh.h_Color[0]->set (Vec4(0.5f, 0.5f, 0.5f, 0));
         Sh.h_Color[1]->set (Vec4(0.5f, 0.5f, 0.5f, 1));
         Sh.h_DrawC   ->draw(image);
      }
      return true;
   }
   return false;
}
Bool RendererClass::swapDS1S(ImageRTPtr &ds_1s)
{
   if(!T._ds_1s->accessible())return false; // can't swap if current DS is not accessible
   if( T._ds==T._ds_1s)
   {
       if(T._ds==T._cur_main_ds)T._cur_main_ds=ds_1s.rc();
          T._ds   = ds_1s ; Sh.h_ImageDepthMS->set(T._ds   );
   } Swap(T._ds_1s, ds_1s); Sh.h_ImageDepth  ->set(T._ds_1s);
   return true;
}
void RendererClass::setDS()
{
   if(_col==&_main    )_ds=&_main_ds    ;else // we should always pair '_main' with '_main_ds', even if it's not 'accessible'
   if(_col== _cur_main)_ds= _cur_main_ds;else // reuse '_cur_main_ds' if we're rendering to '_cur_main'
                       _ds.getDS(_col->w(), _col->h(), _col->samples()); // create a new one
}
void RendererClass::prepare()
{
   Byte  samples=(mirror() ? 1 : D.samples()); // disable multi-sampling for reflection
   VecI2 rt_size; if(VR.active() && D._allow_stereo) // following conditions affect this: _stereo, _allow_stereo, mirror()
   { /* We want this case when:
         -rendering to VR    'GuiTexture'
         -rendering to VR 'RenderTexture'
         -rendering to mirror reflection to be used for VR 'RenderTexture'
        Remember that '_stereo' gets temporarily disabled when rendering mirror reflection */
      rt_size=_final->size();
      if(D.densityUsed())
      {
         Int mul=D.densityByte()+1;
         rt_size.set(Mid((rt_size.x*mul+64)/128, 1, D.maxTexSize()),
                     Mid((rt_size.y*mul+64)/128, 1, D.maxTexSize()));
      }
   }else rt_size=D.render();

start:
   IMAGE_PRECISION prec=((_cur_type==RT_DEFERRED) ? D.highPrecColRT() ? IMAGE_PRECISION_10 : IMAGE_PRECISION_8 : D.litColRTPrecision()); // for deferred renderer we first render to col and only after that we mix it with light, other modes render color already mixed with light, for high precision we need only 10-bit, no need for 16-bit
  _col=_final;
   if(_cur_type==RT_DEFERRED || mirror() || _get_target // <- these always require
   || _col->size()!=rt_size || _col->samples()!=samples || _col->precision()<prec // if current RT does not match the requested rendering settings
   || wantBloom() || wantEdgeSoften() || wantMotion() || wantDof() || wantEyeAdapt() // if we want to perform post process effects then we will be rendering to a secondary RT anyway, so let's start with secondary with a chance that during the effect we can render directly to '_final'
   || (D.glowAllow() && ImageTI[_col->hwType()].a<8) // we need alpha for glow, this check is needed for example if we have IMAGE_R10G10B10A2
   || (_col==&_main && !_main_ds.depthTexture() && wantDepth()) // if we're setting '_main' which is always paired with '_main_ds', and that is not a depth texture but we need to access depth, then try getting custom RT with depth texture (don't check for '_cur_main' and '_cur_main_ds' because depth buffers other than '_main_ds' are always tried to be created as depth texture first, so if that failed, then there's no point in trying to create one again)
   )_col.get(ImageRTDesc(rt_size.x, rt_size.y, GetImageRTType(D.glowAllow(), prec), samples)); // here Alpha is used for glow

   // depth stencil buffer
   setDS();
   if(combine && !canReadDepth() && _col!=_final) // if we need to combine (treat combine with priority), but after getting the depth buffer it turns out we can't read it (can't do combine), then we must render to final render target directly
   {
     _col=_final; setDS();
   }

   if(!canReadDepth() && _cur_type==RT_DEFERRED) // if depth access is not available and we want deferred renderer then fall back to forward renderer
   {
      if(type                    ()==RT_DEFERRED)type                    (RT_FORWARD);
      if(Water.reflectionRenderer()==RT_DEFERRED)Water.reflectionRenderer(RT_FORWARD);
                                            _cur_type                    =RT_FORWARD ;
      goto start;
   }

   if(!_ds->multiSample ())_ds_1s=_ds;else // if the source is not multisampled then reuse it
   if( _ds->depthTexture())_ds_1s.getDS(_ds->w(), _ds->h());else // create new only if we can resolve multi-sample onto 1-sample
                           _ds_1s.clear(); // there's no point in creating a 1-sampled depth buffer if we can't resolve from the multi-sampled
   Sh.h_ImageDepth  ->set(_ds_1s);
   Sh.h_ImageDepthMS->set(_ds   );

  _set_depth_needed=(_ds!=_cur_main_ds && _ds_1s!=_cur_main_ds && canReadDepth()); // if we're not rendering to currently main depth buffer and we have depth access

   D.alpha(ALPHA_NONE);

   mode(RM_PREPARE); AstroPrepare(); // !! call after obtaining '_col', '_ds' and '_ds_1s' because we rely on having them, and after RM_PREPARE because we may add lights !!
  _eye=0; if(_stereo)SetCam(EyeMatrix[_eye], false); // start with the first eye and set camera so we can calculate view_matrix for instances, this is important, because precalculated view_matrix is assumed to be for the first eye, so when rendering instances we need to adjust the projection matrix for next eye, this affects 'BeginPrecomputedViewMatrix'
  _render(); // we can call '_render' only once for RM_PREPARE
   Bool clear_ds=true; // if need to clear depth

#if SUPPORT_EARLY_Z
   if(HasEarlyZInstances())
   {
      set(GPU_API(_col(), null, null), _ds(), true); // DX9 always requires a RT to be set
      D.clearDS(); clear_ds=false; // already cleared so no need anymore
      D.set3D(); if(DX9)D.colWrite(0);

   early_z:
      setEyeViewport();
      DrawEarlyZInstances();
      if(++_eye<_eye_num)goto early_z;

      ClearEarlyZInstances();
      D.set2D(); if(DX9)D.colWrite(COL_WRITE_RGBA);
   }
#endif

   const Bool clear_col=((!Sky.isActual() || stage==RS_COLOR || stage==RS_LIT_COLOR || _col->multiSample()) && !fastCombine()); // performance tests suggested it's better don't clear unless necessary, instead 'Image.discard' is used and improves performance (at least on Mobile), always have to clear for multi-sampled to allow for proper detection of MSAA pixels using 'Sh.h_DetectMSCol' (this is needed for all renderers, not only Deferred, without this edges of sky/meshes may not get multi-sampled, especially when there's small variation in material color texture or no texture at all having just a single color)
   switch(_cur_type)
   {
      case RT_DEFERRED:
      {
         const Bool merged_clear=D._view_main.full // use when possible, should improve performance on tile-based renderers
                           #if GL && WINDOWS
                              && glClearBufferfv!=null // on Desktop GL we need this function to make "D.clearCol(Int i, .." work, on GLES3 it's always available, on GLES2 it's not but it doesn't have deferred renderer either
                           #endif
                   , clear_nrm  =(NRM_CLEAR_START && ClearNrm()),
                     clear_vel  =false; // this is not needed because "ClearSkyVel" is used later, performance tests suggested it's better don't clear unless necessary, instead 'Image.discard' is used and improves performance (at least on Mobile)

         if(D.motionMode()==MOTION_CAMERA_OBJECT && hasMotion() && D._max_rt>=3)
         {
           _vel.get(ImageRTDesc(_col->w(), _col->h(), D.signedVelRT() ? IMAGERT_RGB_S : IMAGERT_RGB, _col->samples())); // "_vel!=null" is treated as MOTION_CAMERA_OBJECT mode across the engine, doesn't use Alpha
            if(clear_vel && !merged_clear)_vel->clearViewport(D.signedVelRT() ? SVEL_CLEAR : VEL_CLEAR);
         }

        _nrm.get(ImageRTDesc(_col->w(), _col->h(), D.signedNrmRT() ? (D.highPrecNrmRT() ? IMAGERT_RGBA_SP : IMAGERT_RGBA_S) : (D.highPrecNrmRT() ? IMAGERT_RGBA_P : IMAGERT_RGBA), _col->samples())); // here Alpha is used for specular
         if(clear_nrm && !merged_clear)_nrm->clearViewport(D.signedNrmRT() ? SNRM_CLEAR : NRM_CLEAR);
         Sh.h_ImageNrmMS->set(_nrm);

         if(clear_col && !merged_clear)_col->clearViewport();

         set(_col(), _nrm(), _vel(), null, _ds(), true);
         if(merged_clear)
         {
            if(clear_col)D.clearCol(0, Vec4Zero);
            if(clear_nrm)D.clearCol(1, D.signedNrmRT() ? SNRM_CLEAR : NRM_CLEAR);
            if(clear_vel)D.clearCol(2, D.signedVelRT() ? SVEL_CLEAR : VEL_CLEAR);
         }
      }break;

      case RT_FORWARD:
      {
         set(_col(), _ds(), true);
         if(clear_col)D.clearCol(combine ? TRANSPARENT : Color(clear_color.r, clear_color.g, clear_color.b, 0));
      }break;

      case RT_SIMPLE:
      {
         set(_col(), _ds(), true);
         if(clear_col)D.clearCol(combine ? TRANSPARENT : Color(clear_color.r, clear_color.g, clear_color.b, 0));
      }break;
   }
   if(clear_ds)D.clearDS();
}
void RendererClass::solid()
{
   switch(_cur_type)
   {
      case RT_DEFERRED:
      {
         D.stencil(STENCIL_ALWAYS_SET, 0); D.set3D(); mode(RM_SOLID);
         REPS(_eye, _eye_num)
         {
            setEyeViewport();
            DrawSolidInstances(); _render();
         }
         ClearSolidInstances();
         D.stencil(STENCIL_NONE); D.set2D();

         resolveDepth();
      }break;

      case RT_FORWARD:
      {
         // Lights + Solid
         LimitLights();
          SortLights();

         // find initial directional light
         Int start_light=-1;
         if(Lights.elms() && !hasAO()) // if we do AO then first we need to draw without lights (ambient only)
         {
            if(Lights[0].type==LIGHT_DIR && Lights[0].shadow) // for shadow mapping 0-th light is the most significant, its shadow map must be rendered last to be used by BLEND_LIGHT
            {
               start_light=0; // assume it's 0-th
               REPA(Lights)if(i!=0) // check all other lights
                  if(Lights[i].shadow) // if at least one has shadows
               {
                  if(Lights[i].type==LIGHT_DIR){start_light=i; break;} // if it's some other directional light, we can draw it first
                  start_light=-1; // if it's not directional light, we can't use 0-th light as the starting light, but keep on checking for other directional lights
               }
            }else // most significant light doesn't require shadows, so pick any directional light
            {
               REPA(Lights)if(Lights[i].type==LIGHT_DIR){start_light=i; break;} // find any directional light
            }
         }

         // draw main light
         if(start_light>=0)
         {
            Lights[start_light].drawForward(_col(), fastCombine() ? ALPHA_NONE_ADD : ALPHA_NONE);
         }else // no light
         {
           _frst_light_offset=OFFSET(FRST, none);
            D.alpha(fastCombine() ? ALPHA_NONE_ADD : ALPHA_NONE);
            D.stencil(STENCIL_ALWAYS_SET, 0); D.set3D(); mode(RM_SOLID);
            REPS(_eye, _eye_num)
            {
               setEyeViewport();
               DrawSolidInstances(); _render();
            }
            ClearSolidInstances();
            D.stencil(STENCIL_NONE); D.set2D();

            resolveDepth();
         }

         // apply ambient occlusion
         if(hasAO())
         {
            ao(); if(_ao)
            {
               set(_col(), _ds(), true); // restore rendering RT's after calculating AO
               D.alpha(ALPHA_MUL);
               D.depth2DOn();
               Sh.h_Color[0]->set(Vec4(1, 1, 1, 0));
               Sh.h_Color[1]->set(Vec4(0, 0, 0, 1));
               Sh.GPU_API(h_DrawTexWC, h_DrawTexXC, h_DrawTexXC)->draw(_ao); // DX9 uses A8 while others use R8 RT
               D.depth2DOff();
            }
         }

         // draw rest of the lights
         if(Lights.elms()-(start_light>=0)>0)
         {
           _first_pass=false;
            Bool clip=D._clip, clip_allow=D._clip_allow; T._clip=(clip ? D._clip_rect : D.rect()); // remember clipping because 'drawForward' may change it
            ambient_color->set(VecZero); Sh.h_AmbientMaterial->set(0); // disable ambient lighting
            D.depthFunc(FUNC_LESS_EQUAL); // need to make sure we can apply lights on existing depth
            REPA(Lights)if(i!=start_light)Lights[i].drawForward(_col(), ALPHA_ADD_KEEP); // draw 0-th at the end to setup shadow maps (needed for BLEND_LIGHT), keep alpha which is glow
            D.clip(clip ? &T._clip : null); D.clipAllow(clip_allow);
            D.depthFunc(FUNC_LESS);
           _first_pass=true;

            // restore settings
            ambient_color->set(D.ambientColor()); Sh.h_AmbientMaterial->set(1); // restore ambient lighting
            Frustum.set();                                                      // restore frustum after it being potentially changed when drawing shadow maps or setting frustum for visible objects for lights
         }

       //resolveDepth(); was already called for the main light
      }break;

      case RT_SIMPLE:
      {
         // Light + Solid
         SortLights();

         // set light
         if(Lights.elms() && Lights[0].type==LIGHT_DIR)Lights[0].dir.set();else LightDir(Vec(0, -1, 0), VecZero).set();

         // solid
         D.alpha  (fastCombine() ? ALPHA_NONE_ADD : ALPHA_NONE);
         D.stencil(STENCIL_ALWAYS_SET, 0); D.set3D(); mode(RM_SOLID);
         REPS(_eye, _eye_num)
         {
            setEyeViewport();
            DrawSolidInstances(); _render();
         }
         ClearSolidInstances();
         D.stencil(STENCIL_NONE); D.set2D();

         resolveDepth();
      }break;
   }
}
void RendererClass::resolveDepth()
{
   // this resolves the entire '_ds' into '_ds_1s' (by choosing Min of depth samples), and sets 'STENCIL_REF_MSAA' if needed
   if(_ds->multiSample() && _ds->depthTexture())
   {
      D.alpha(ALPHA_NONE);

      // set multi-sampled '_ds' MSAA
      if(_cur_type==RT_DEFERRED      // for     deferred set it always (needed for lighting)
      || Fog.draw || Sky.isActual()) // for non-deferred it will be used only for fog and sky
      {
         D.stencil(STENCIL_MSAA_SET, STENCIL_REF_MSAA);
         set(null, _ds(), true);
       //if(_nrm)Sh.h_DetectMSNrm->draw(_nrm);else 'DetectMSNrm' generates too many MS pixels, making rendering slower, so don't use
                 Sh.h_DetectMSCol->draw(_col);
      }

      // always resolve '_ds' into '_ds_1s'
      set(null, _ds_1s(), true);
      D.stencil(STENCIL_ALWAYS_SET, 0); // use 'STENCIL_ALWAYS_SET' here so when down-sampling depth, we clear the entire stencil mask for '_ds_1s'
      D.depthFunc(FUNC_ALWAYS); D.depthLock  (true); Sh.h_ResolveDepth->draw(_ds);
      D.depthFunc(FUNC_LESS  ); D.depthUnlock(    );

      // set 1-sampled '_ds_1s' MSAA
      if(_cur_type==RT_DEFERRED // for     deferred set it always (needed for lighting)
      || slowCombine())         // for non-deferred it will be used only for slow combine
      {
         D.stencilRef(STENCIL_REF_MSAA);
       //if(_nrm)Sh.h_DetectMSNrm->draw(_nrm);else 'DetectMSNrm' generates too many MS pixels, making rendering slower, so don't use
                 Sh.h_DetectMSCol->draw(_col);
      }
      D.stencil(STENCIL_NONE);
   }
}
void RendererClass::overlay()
{
   D.stencilRef(STENCIL_REF_TERRAIN); // set in case draw codes will use stencil

   if(_cur_type==RT_DEFERRED && D._mrt_post_process && D.bumpMode()!=BUMP_FLAT){set(_col(), _nrm(), null, null, _ds(), true, WANT_DEPTH_READ); D.colWrite(COL_WRITE_RGB, 1);} // if we can blend normals
   else                                                                         set(_col(),                     _ds(), true, WANT_DEPTH_READ);
   setDSLookup(); // 'setDSLookup' after 'set'
   D.alpha(ALPHA_BLEND_FACTOR);
   D.set3D(); D.depthWrite(false); D.bias(BIAS_OVERLAY); D.depthFunc(FUNC_LESS_EQUAL); D.depth(true); mode(RM_OVERLAY); // overlay requires BIAS because we may use 'MeshOverlay' which generates triangles by clipping existing ones
   REPS(_eye, _eye_num)
   {
      setEyeViewport();
      DrawOverlayObjects(); _render();
   }
   D.set2D(); D.depthWrite(true); D.bias(BIAS_ZERO); D.depthFunc(FUNC_LESS);
   D.colWrite(COL_WRITE_RGBA, 1);

   D.stencil(STENCIL_NONE); // disable any stencil that might have been enabled
   OverlayObjects.clear();
}
void RendererClass::waterPreLight()
{
   Water._use_secondary_rt=(!Water.max1Light()
                          && canReadDepth()
                          && D._max_rt>=2 // col+nrm
                          && _cur_type!=RT_FORWARD && _cur_type!=RT_SIMPLE); // for forward/simple for the moment we can't do it, because all lights have already been applied, but in current mode we expect solids to be drawn (so we have depth set because we copy it, and stencil set because we swap DS to preserve it and restore later)
   if(Water._use_secondary_rt)Water.drawSurfaces(); // if we use secondary RT's then we need to draw water surfaces before we calculate lights (otherwise setup lights first and then draw surfaces having shadow-maps known)
}
inline Shader* AmbientOcclusion::get(Int quality, Bool jitter, Bool normal)
{
   Shader* &s=h_AO[quality][jitter][normal]; if(!s)
   {
      if(!shader)shader=ShaderFiles("Ambient Occlusion");
      s=shader->get(S8+"AO"+quality+(jitter?'J':'\0')+(normal?'N':'\0'));
   }
   return s;
}
void RendererClass::ao()
{
   D.alpha(ALPHA_NONE);
   Shader *tech_occl=AO.get(D.ambientMode()-1, D.ambientJitter(), D.ambientNormal() && _nrm);
   VecI2 res=ByteScaleRes(fx(), D._amb_res); _ao.get(ImageRTDesc(res.x, res.y, IMAGERT_ONE));

   // always downsample and linearize at the same time
   ImageRTPtr ao_depth;
   ao_depth.get(ImageRTDesc(_ao->w(), _ao->h(), IMAGERT_F32)); // don't try to reduce to IMAGERT_F16 because it can create artifacts on big view ranges under certain angles (especially when we don't use normal maps, like in forward renderer)
   linearizeDepth(*ao_depth, *_ds_1s);

   Sh.h_ImageNrm[0]->set(_nrm);
   Sh.h_ImageDepth ->set(ao_depth);
   Bool foreground=_ao->compatible(*_ds_1s);
   if(_col->multiSample())foreground&=Sky.isActual(); // when having multi-sampling, then allow this optimization only if we're rendering Sky, this is related to smooth edges between solid and sky pixels
   if(stage)if(stage==RS_AO || stage==RS_LIGHT_AO)foreground=false; // if we will display AO then set fully
   if(foreground)D.depth2DOn();
   set(_ao(), foreground ? _ds_1s() : null, true, NEED_DEPTH_READ); // use DS for 'D.depth2D'
   REPS(_eye, _eye_num)tech_occl->draw(ao_depth, setEyeParams()); // calculate occlusion
   ao_depth.clear(); // this one is no longer needed
   Sh.h_ImageDepth->set(_ds_1s); // restore full resolution depth

   if(D.ambientSoft()) // this needs to be in sync with 'D.shadowSoft'
   {
      ImageRTDesc rt_desc(_ao->w(), _ao->h(), IMAGERT_ONE);
      if(D.ambientSoft()>=5)
      {
         ImageRTPtr temp; temp.get(rt_desc);
         set(temp(), foreground ? _ds_1s() : null, true, NEED_DEPTH_READ);                 Sh.h_ShdBlurX->draw( _ao); // use DS for 'D.depth2D'
         set( _ao(), foreground ? _ds_1s() : null, true, NEED_DEPTH_READ); _ao->discard(); Sh.h_ShdBlurY->draw(temp); // use DS for 'D.depth2D'
      }else
      {
         ImageRTPtr src=_ao; _ao.get(rt_desc);
         set(_ao(), foreground ? _ds_1s() : null, true, NEED_DEPTH_READ); Sh.h_ShdBlur[D.ambientSoft()-1]->draw(src); // use DS for 'D.depth2D'
      }
   }
   if(foreground)D.depth2DOff();
}
INLINE Shader* GetColLight(Int multi_sample, Bool ao, Bool cel_shade, Bool night_shade) {Shader* &s=Sh.h_ColLight[multi_sample][ao][cel_shade][night_shade]; if(SLOW_SHADER_LOAD && !s)s=Sh.getColLight(multi_sample, ao, cel_shade, night_shade); return s;}
void RendererClass::light()
{
   if(_cur_type==RT_DEFERRED) // on other renderers light is applied when rendering solid
   {
      /*
      -set '_ao' as Ambient Occlusion (one channel, without D.ambientColor)
      -clear '_lum' and '_lum_1s'
      -add ambient light from meshes
      -calculate screen space light (on MSAA and non-MSAA)
      -final light = sum of all buffers together
     _ao    = AO;
     _lum   = 0 ; _lum+=mesh_ambient;     MSAA of _lum   +=light;
     _lum_1s= 0 ;                     non-MSAA of _lum_1s+=light;
      LIGHT =_lum + _lum_1s + _ao*D.ambientColor
   OR LIGHT =_lum + _lum_1s +     D.ambientColor (if "_ao==null")
      */

      // Ambient Occlusion
      if(hasAO())ao();

      // add dynamic lights
      LimitLights();
       SortLights();
       DrawLights();

           _nrm.clear();
   //_water_nrm.clear(); we may still need it for refraction

      getLumRT();

      // add ambient light from meshes
      set(_lum(), _ds(), true);
      D.alpha(ALPHA_ADD);
      D.set3D(); mode(RM_AMBIENT); D.depth(true);
      SortAmbientInstances();
      REPS(_eye, _eye_num)
      {
         setEyeViewport();
         DrawAmbientInstances(); _render();
      }
      ClearAmbientInstances();
      D.set2D();

      // light buffer is ready so we can combine it with color
      Bool ao=(_ao!=null), cel_shade=(cel_shade_palette!=null), night_shade=(D.nightShadeColor().max()>EPS_COL);
      Sh.h_ImageLum   ->set(_lum_1s);
      Sh.h_ImageDet[0]->set(_ao    );
      Sh.h_ImageDet[1]->set( cel_shade_palette());
      D .alpha(ALPHA_NONE);
      ImageRTPtr src=_col; // can't read and write to the same RT
      Bool has_last_frag_color=false, // TODO: there would be no need to write to a new RT if we would use gl_LastFragColor/gl_LastFragData[0] using extensions - https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_shader_framebuffer_fetch.txt and https://www.khronos.org/registry/OpenGL/extensions/ARM/ARM_shader_framebuffer_fetch.txt
           use_last_frag_color=(has_last_frag_color && (D.highPrecColRT() ? IMAGE_PRECISION_10 : IMAGE_PRECISION_8)==D.litColRTPrecision());
      if(!use_last_frag_color)_col.get(ImageRTDesc(_col->w(), _col->h(), GetImageRTType(D.glowAllow(), D.litColRTPrecision()), _col->samples())); // glow requires alpha

      set(_col(), _ds(), true, NEED_DEPTH_READ); // use DS because it may be used for 'D.depth2D' optimization and stencil tests
      if((_col==src || Sky.isActual()) && stage!=RS_LIT_COLOR)D.depth2DOn(); // we can skip background only if we're applying to the same RT or if the background will be later overwritten by Sky
      if(!_col->multiSample())GetColLight(0, ao, cel_shade, night_shade)->draw(src);else
      {
         Sh.h_ImageLumMS->set(_lum);
         if(hasStencilAttached())
         {
            D.stencil   (STENCIL_MSAA_TEST, 0); GetColLight(1, ao, cel_shade, night_shade)->draw(src); // 1 sample
            if(Sky.isActual())D.depth2DOff();                                                          // multi-sampled always fill fully when sky will be rendered
            D.stencilRef(STENCIL_REF_MSAA    ); GetColLight(2, ao, cel_shade, night_shade)->draw(src); // n samples
            D.stencil   (STENCIL_NONE        );
         }else
         {
            if(Sky.isActual())D.depth2DOff();                      // multi-sampled always fill fully when sky will be rendered
            GetColLight(2, ao, cel_shade, night_shade)->draw(src); // n samples
         }
      }
      D.depth2DOff();
      src.clear();
      if(_lum!=_lum_1s && (_fur_is || stage==RS_LIGHT || stage==RS_LIGHT_AO)){set(_lum_1s(), null, true); D.alpha(ALPHA_ADD); Sh.draw(*_lum);} // need to apply multi-sampled lum to 1-sample for fur and light stage
     _lum.clear(); // '_lum' will not be used after this point, however '_lum_1s' may be for rendering fur

      MaterialClear();
   }
}
Bool RendererClass::waterPostLight()
{
   if(!Water._use_secondary_rt)Water.drawSurfaces();else // if we don't want to use secondary RT's
   if(_water_col) // only if we've got some water
   {
   /* -we can always do soft, because '_use_secondary_rt' is enabled only if we can read from depth buffer
      -we need to read from both depth buffers to perform softing, so in order to modify depth, we need to do this after in another operation
      -when doing refraction, we need to have a copy of '_col' and apply to '_col' (this is better for multi-sampling because copy can be 1-sampled)
          or alternatively apply to a new RT using existing '_col' (however the new RT would have to be multi-sampled for MS case)
      -when not doing refraction, we don't need any copy or separate RT, we can just apply to existing '_col' using alpha blending without reading from it, however doing this would prevent from applying glow, so don't do it
      -we can't use stencil optimizations, because:
         -when applying to MS RT the DS is MS and does not have any information about water
         -otherwise we apply to another RT and we have to write all pixels
   */
      getWaterLumRT(); // get in case we still haven't initialized it
      Bool refract=(Water.refract>EPS_MATERIAL_BUMP);

      ImageRTPtr src=_col;
      Bool depth_test;
      if(  depth_test=src->multiSample()) // multi-sampled textures can't be sampled smoothly as there will be artifacts, so resolve them, also in this case we render back to '_col' (which is not set to a new RT because we have a copy of it in 'src') but only to pixels with depth FUNC_LESS, this solves the problem of AA with water
      {  // convert to 1 sample
         ImageRTPtr temp(ImageRTDesc(src->w(), src->h(), GetImageRTType(src->type())));
      #if DX11
         src->copyMs(*temp, false, false, D.viewRect()); Swap(src, temp);
      #endif
         D.depthLock (true); // we need depth testing
         D.depthWrite(false); // disable depth writing because we can't read and write to same DS
         D.depthFunc (FUNC_LESS); // process only pixels that are closer (which means water on top of existing solid)
      }
      if(_col==src)_col.get(ImageRTDesc(_col->w(), _col->h(), GetImageRTType(_col->type()), _col->samples())); // can't read and write to same RT, in multi-sampling we're writing back to '_col' as it's not changed

      SetOneMatrix(); // needed for refraction
      set(_col(), _ds(), true, NEED_DEPTH_READ); // we need depth read because we always need to read depth buffer for softing, but still use '_ds' in case we apply to existing '_col'
      D.alpha(ALPHA_NONE);

      Water.set();
      Water.setImages(src(), _water_ds());
      Sh.h_ImageCol[3]->set(_water_col());
      Sh.h_ImageNrm[0]->set(_water_nrm());
      Sh.h_ImageLum   ->set(_water_lum());
      REPS(_eye, _eye_num)
      {
         Water.setEyeViewport();
         WS.h_Apply[refract][depth_test]->draw(src()); // we need to output depth only if we need it for depth testing
      }
      if(depth_test)
      {
         D.depthUnlock();
         D.depthWrite(true);
      }
      Water.endImages();

      // now we have to modify the depth buffer
      if((!Water._swapped_ds || !swapDS1S(_water_ds)) && Sh.h_SetDepth) // if we haven't swapped before, or swap back failed, then we have to apply '_water_ds' on top of existing '_ds_1s', otherwise we just swap back '_water_ds' because it had the stencil values
      {
         if(!DX9)set(null, _ds_1s(), true);else{set(_col(), _ds_1s(), true); D.colWrite(0);} // DX9 always requires RT
         D.depthLock(true); Sh.h_SetDepth->draw(_water_ds); // keep FUNC_LESS to modify only those that are closer
         D.depthUnlock();
         if(DX9)D.colWrite(COL_WRITE_RGBA);
      }
      if(_ds!=_ds_1s && Sh.h_SetDepth) // multi-sample
      {
         if(!DX9)set(null, _ds(), true);else{set(_col(), _ds(), true); D.colWrite(0);} // DX9 always requires RT
         D.depthLock(true); Sh.h_SetDepth->draw(_water_ds); // keep FUNC_LESS to modify only those that are closer
         D.depthUnlock();
         if(DX9)D.colWrite(COL_WRITE_RGBA);
      }

      if(stage)switch(stage)
      {
         case RS_WATER_COLOR : if(set(_water_col))return true; break;
         case RS_WATER_NORMAL: if(set(_water_nrm))return true; break;
         case RS_WATER_LIGHT : if(set(_water_lum))return true; break;
      }
   }
  _water_col.clear();
  _water_nrm.clear();
  _water_ds .clear();
  _water_lum.clear();
  _mirror_rt.clear();
   return false;
}
void RendererClass::edgeDetect()
{
   if(D.edgeDetect() && !mirror() && canReadDepth())switch(D.edgeDetect())
   {
      case EDGE_DETECT_THIN:
      {
         D.depth2DOn (); D.alpha(ALPHA_MUL); set(_col(), _ds(), true, NEED_DEPTH_READ); Sh.h_EdgeDetect->draw(_ds_1s);
         D.depth2DOff();
      }break;

      case EDGE_DETECT_FAT:
      {
         ImageRTPtr edge(ImageRTDesc(fxW(), fxH(), IMAGERT_ONE));
         D.alpha     (ALPHA_NONE); set(edge(), null , true, NEED_DEPTH_READ); Sh.h_EdgeDetect     ->draw(_ds_1s); // we need to fill the entire buffer because below we're using blurring (which takes nearby texels)
         D.depth2DOn ();
         D.alpha     (ALPHA_MUL ); set(_col(), _ds(), true,   NO_DEPTH_READ); Sh.h_EdgeDetectApply->draw(edge());
         D.depth2DOff();
      }break;
   }
}
void RendererClass::sky()
{
   Fog.Draw(false);
   Sky.draw();
   if(!mirror())AstroDraw();
   Clouds.drawAll();
      Fog.Draw(true);
}
void RendererClass::blend()
{
   Sky.setFracMulAdd();

   // set main light parameters for *BLEND_LIGHT* and 'Mesh.drawBlend'
   if(Lights.elms() && Lights[0].type==LIGHT_DIR) // use 0 index as it has already been set in 'SortLights'
   {
      Lights[0].dir.set();
     _blst_light_offset=OFFSET(BLST, dir[Lights[0].shadow ? D.shadowMapNumActual() : 0]);
   }else
   {
      LightDir(Vec(0, -1, 0), VecZero).set(); // set dummy light
     _blst_light_offset=OFFSET(BLST, dir[0]);
   }

   // apply light in case of drawing fur, which samples the light buffer
   if(_fur_is)
   {
      if(_ao)
      {
         set(_lum_1s(), null, true);
         D.alpha(ALPHA_ADD);
         Sh.h_Color[0]->set(Vec4(D.ambientColor(), 0));
         Sh.h_Color[1]->set(Vec4Zero                 );
         Sh.GPU_API(h_DrawTexWC, h_DrawTexXC, h_DrawTexXC)->draw(_ao); // DX9 uses A8 while others use R8 RT
      }
      PrepareFur();
   }
  _ao.clear(); // '_ao' will not be used after this point

   D.stencilRef(STENCIL_REF_TERRAIN); // set in case draw codes will use stencil

   const Bool blend_affect_vel=true; // #BlendRT
   set(_col(), blend_affect_vel ? _vel() : null, null, null, _ds(), true); setDSLookup(); // 'setDSLookup' after 'set'
   D.alpha(ALPHA_BLEND_FACTOR);
   D.set3D(); D.depthWrite(false); D.depthFunc(FUNC_LESS_EQUAL); D.depth(true); mode(RM_BLEND); // use less equal for blend because we may want to draw blend graphics on top of existing pixels (for example world editor terrain highlight)
   SortBlendInstances();
   REPS(_eye, _eye_num)
   {
      setEyeViewport();
   #if 1
     _render(); DrawBlendInstances(); // first call '_render' to for example get 'getBackBuffer' and then draw objects in 'DrawBlendInstances'
   #else
      DrawBlendInstances(); _render();
   #endif
   }
   ClearBlendInstances();
  _SetHighlight(TRANSPARENT);
   D.set2D(); D.depthWrite(true); D.depthFunc(FUNC_LESS);

   D.stencil(STENCIL_NONE); // disable any stencil that might have been enabled

  _lum_1s.clear(); // '_lum_1s' will not be used after this point
}
void RendererClass::palette(Int index)
{
   if(D.colorPaletteAllow())
      if(C ImagePtr &palette=D._color_palette[index])
   {
      Image &ds=(_ds_1s ? *_ds_1s : *_ds); // Warning: this will disable applying palette only on terrain using STENCIL_REF_TERRAIN for multisampling
      Sky.setFracMulAdd();

      ImageRTPtr intensity(ImageRTDesc(_col->w(), _col->h(), IMAGERT_RGBA, ds.samples())); // we need to match depth multi-sampling, here Alpha is used for 4th palette channel
      D.stencilRef(STENCIL_REF_TERRAIN); // set in case draw codes will use stencil

      set(intensity(), &ds, true, WANT_DEPTH_READ); setDSLookup(); // we need depth-testing, but want depth-read for particle softing, 'setDSLookup' after 'set'
      D.clearCol();
      D.alpha(ALPHA_ADD);
      D.set3D(); D.depthWrite(false); mode(index ? RM_PALETTE1 : RM_PALETTE);
      REPS(_eye, _eye_num)
      {
         setEyeViewport();
        _render(); if(index)DrawPalette1Objects();else DrawPaletteObjects();
      }
      D.set2D(); D.depthWrite(true);

      D.stencil(STENCIL_NONE); // disable any stencil that might have been enabled

   #if !DX11
      if(intensity->multiSample()) // we need to resolve the multi-sampled surface first
      {
         ImageRTPtr src=intensity; intensity.get(ImageRTDesc(src->w(), src->h(), IMAGERT_RGBA)); src->copyHw(*intensity, false, D.viewRect()); // here Alpha is used for 4th palette channel
      }
   #endif
      set(_col(), null, true);
      D .alpha(ALPHA_BLEND_DEC);
      Sh.h_ImageCol[1]->set ( palette()); MaterialClear();
      Sh.h_PaletteDraw->draw(*intensity);
   }
   if(index)
   {
      Palette1Objects.clear();
      Palette1Areas  .clear();
   }else
   {
      PaletteObjects.clear();
      PaletteAreas  .clear();
   }
}
void RendererClass::behind()
{
   if(canReadDepth())
   {
      Sky.setFracMulAdd();

      set(_col(), _ds(), true, NEED_DEPTH_READ); // we will read from the depth buffer
      D.alpha(ALPHA_BLEND_DEC);
      D.set3D(); D.depthWrite(false); D.depthFunc(FUNC_GREATER); D.depth(true); mode(RM_BEHIND);
      REPS(_eye, _eye_num)
      {
         setEyeViewport();
        _render(); DrawBehindObjects();
      }
      D.set2D(); D.depthWrite(true); D.depthFunc(FUNC_LESS);
   }
   BehindObjects.clear();
}
void RendererClass::setOutline(C Color &color)
{
  _SetHighlight(color);
   if(!_outline) // not initialized at all
   {
     _outline_rt.get(ImageRTDesc(_col->w(), _col->h(), IMAGERT_RGBA, _col->samples())); // here Alpha is used for outline opacity
      set(_outline_rt(), _ds(), true);
      D.clearCol  ();
      D.alpha     (ALPHA_NONE);
      D.sampler3D ();
      D.depthFunc (FUNC_LESS_EQUAL);
      D.depthWrite(false);
      if(D.outlineMode()==EDGE_DETECT_THIN)D.stencil(STENCIL_OUTLINE_SET, STENCIL_REF_OUTLINE);
   }
   Int           outline_eye=(1<<_eye);
   if(!(_outline&outline_eye)) // not yet enabled for this eye
   {
     _outline|=outline_eye; // enable
      setEyeViewport(); // set viewport if needed
   }
}
void RendererClass::applyOutline()
{
   if(_outline_rt)
   {
     _SetHighlight(TRANSPARENT); // disable 'SetHighlight' which was called during mesh drawing
      D.sampler2D ();
      D.depthFunc (FUNC_LESS); // restore default
      D.depthWrite(true);

      resolveMultiSample(); // don't do 'downSample' here because 'edgeSoften' will be called later and it requires to operate on full-sampled data
   #if !DX11
      if(_outline_rt->multiSample()) // we need to resolve the multi-sampled surface first
      {
         ImageRTPtr src=_outline_rt; _outline_rt.get(ImageRTDesc(src->w(), src->h(), IMAGERT_RGBA)); src->copyHw(*_outline_rt, false, D.viewRect()); // here Alpha is used for outline opacity
      }
   #endif
      Image *ds=_ds_1s(); // we've resolved multi-sample so have to use 1-sample

      if(!Sh.h_Outline)
      {
         Sh.h_Outline     =Sh.get("Outline");
         Sh.h_OutlineDS   =Sh.get("OutlineDS");
         Sh.h_OutlineClip =Sh.get("OutlineClip");
         Sh.h_OutlineApply=Sh.get("OutlineApply");
      }

      switch(D.outlineMode())
      {
         case EDGE_DETECT_THIN: if(Sh.h_OutlineClip)
         {
            set(_col(), (ds && ds->compatible(*_col)) ? ds : null, true);
            D.depth2DOn ();
            D.stencil   ((_cur_ds==_ds()) ? STENCIL_OUTLINE_TEST : STENCIL_NONE); // we can use the stencil optimization only if we will use the DS to which we've written stencil to
            D.alpha     (ALPHA_BLEND_DEC);
            Sh.h_OutlineClip->draw(_outline_rt);
            D.stencil   (STENCIL_NONE);
            D.depth2DOff();
         }break;

         case EDGE_DETECT_FAT: if(Sh.h_Outline && Sh.h_OutlineDS && Sh.h_OutlineApply)
         {
            ImageRTPtr temp(ImageRTDesc(fxW(), fxH(), IMAGERT_RGBA)); // here Alpha is used for outline opacity
            set(temp(), null, true);
            D.alpha(ALPHA_NONE);
            ((temp->w()<_outline_rt->w()) ? Sh.h_OutlineDS : Sh.h_Outline)->draw(_outline_rt);
            set(_col(), (ds && ds->compatible(*_col)) ? ds : null, true);
            D .alpha    (ALPHA_BLEND_DEC);
            if(!D.outlineAffectSky())D.depth2DOn();
            Sh.h_OutlineApply->draw(temp);
            D.depth2DOff();
         }break;
      }
     _outline_rt.clear();
   }
}
void RendererClass::outline()
{
   // start outline
   if(D.outlineMode())
   {
      mode(RM_OUTLINE); // 'sampler3D/2D' is called in 'setOutline' and 'applyOutline'
      REPS(_eye, _eye_num)
      {
       //setEyeViewport(); viewport is set in 'setOutline' method
         DrawOutlineObjects(); _render();
      }
   }
   OutlineObjects.clear();
}
void RendererClass::resolveMultiSample() // !! assumes that 'finalizeGlow' was called !! this should be called before 'downSample'
{
   if(_col->multiSample())
   {
      ImageRTPtr src=_col; _col.get(ImageRTDesc(_col->w(), _col->h(), GetImageRTType(_has_glow, D.litColRTPrecision())));
   #if DX11
      src->copyMs(*_col, false, true, D.viewRect());
   #else
      src->copyHw(*_col, false, D.viewRect());
   #endif
   }
}
void RendererClass::downSample() // !! assumes that 'finalizeGlow' was called !!
{
   resolveMultiSample();
   if(_col->w()>_final->w()) // if down-sample is needed
   {
      ImageRTPtr src=_col; _col.get(ImageRTDesc(_final->w(), _final->h(), GetImageRTType(_has_glow, D.litColRTPrecision())));
      src->copyHw(*_col, false, D.viewRect());
   }
}
void RendererClass::edgeSoften() // !! assumes that 'finalizeGlow' was called !!
{
   if(hasEdgeSoften())
   {
      resolveMultiSample();
      D.alpha(ALPHA_NONE);
      if(!D._view_main.full)
      {
         const Int pixel_range=6; // currently FXAA/SMAA shaders use this range
         set(_col(), null, false); // need full viewport
         D.viewRect().drawBorder(TRANSPARENT, Renderer.pixelToScreenSize(-pixel_range)); // draw black border around the viewport to clear and prevent from potential artifacts on viewport edges
      }
      ImageRTPtr dest(ImageRTDesc(_col->w(), _col->h(), GetImageRTType(_has_glow, D.litColRTPrecision())));
      // D.depth2DOn/depth2DOff can't be applied here, this was tested and resulted in loss of softening at object/sky edges
      switch(D.edgeSoften())
      {
         case EDGE_SOFTEN_FXAA:
         {
            set(dest(), null, true); Sh.h_FXAA->draw(_col());
         }break;

      #if SUPPORT_MLAA
         case EDGE_SOFTEN_MLAA:
         {
           _col->copyHw(*dest, false, D.viewRect());
            D.stencil(STENCIL_EDGE_SOFT_SET, STENCIL_REF_EDGE_SOFT); // have to use '_ds_1s' in write mode to be able to use stencil
            ImageRTPtr edge (ImageRTDesc(_col->w(), _col->h(), IMAGERT_TWO )); set(edge (), _ds_1s(), true); D.clearCol(); Sh.h_MLAAEdge ->draw(_col ()); Sh.h_ImageCol[1]->set(_mlaa_area()); D.stencil(STENCIL_EDGE_SOFT_TEST);
            ImageRTPtr blend(ImageRTDesc(_col->w(), _col->h(), IMAGERT_RGBA)); set(blend(), _ds_1s(), true); D.clearCol(); Sh.h_MLAABlend->draw( edge()); Sh.h_ImageCol[1]->set( blend    ()); edge.clear();
                                                                               set(dest (), _ds_1s(), true);               Sh.h_MLAA     ->draw(_col ());                                      D.stencil(STENCIL_NONE          );
            MaterialClear();
         }break;
      #endif

         case EDGE_SOFTEN_SMAA:
         {
         #if GL // in GL 'ShaderImage.Sampler' does not affect filtering, so modify it manually
            D.texBind(GL_TEXTURE_2D, _smaa_search->_txtr);
            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
         #endif
            D.stencil(STENCIL_EDGE_SOFT_SET, STENCIL_REF_EDGE_SOFT); // have to use '_ds_1s' in write mode to be able to use stencil
            ImageRTPtr edge (ImageRTDesc(_col->w(), _col->h(), IMAGERT_TWO )); set(edge (), _ds_1s(), true); D.clearCol(); Sh.h_SMAAEdge ->draw(_col ()); Sh.h_ImageCol[1]->set(_smaa_area()); Sh.h_ImageCol[2]->set(_smaa_search()); Sh.h_ImageCol[2]->_sampler=&SamplerPoint; D.stencil(STENCIL_EDGE_SOFT_TEST);
            ImageRTPtr blend(ImageRTDesc(_col->w(), _col->h(), IMAGERT_RGBA)); set(blend(), _ds_1s(), true); D.clearCol(); Sh.h_SMAABlend->draw( edge()); Sh.h_ImageCol[1]->set( blend()    ); edge.clear();                          Sh.h_ImageCol[2]->_sampler=         null; D.stencil(STENCIL_NONE          );
                                                                               set(dest (),  null   , true);               Sh.h_SMAA     ->draw(_col ());
            MaterialClear();
         }break;
      }
      Swap(dest, _col);
   }
}
void RendererClass::volumetric()
{
   if(_vol)
   {
      downSample(); // we're modifying existing RT, so downSample if needed
      set(_col(), null, true);
      SPSet("VolMax", Vec(D.volMax()));
      D.alpha(D.volAdd() ? ALPHA_ADD        : ALPHA_BLEND_DEC);
             (D.volAdd() ? VL.h_VolumetricA : VL.h_Volumetric)->draw(_vol());
     _vol.clear();
   }
}
void RendererClass::refract() // !! assumes that 'finalizeGlow' was called !!
{
   if(Water._under_mtrl && canReadDepth() && !fastCombine())
   {
      WS.load();
    C WaterMtrl &under=*Water._under_mtrl;

      Flt    under_step =Sat(Water._under_step),
             refract_val=under_step*under.refract_underwater;
      Bool   refract=(refract_val>EPS_MATERIAL_BUMP);
      if(   !refract)downSample        (); // we're modifying existing RT, so downSample if needed
      else           resolveMultiSample(); // we're writing to new RT so resolve the old first
      ImageRTPtr src=_col;
      if(    refract)_col.get(ImageRTDesc(Min(_col->w(), _final->w()), Min(_col->h(), _final->h()), GetImageRTType(_has_glow, D.litColRTPrecision())));

      set(_col(), null, true);
      D .alpha(refract ? ALPHA_NONE : ALPHA_BLEND_DEC);
      Sh.h_Step->set(Time.time());
      SPSet("WaterPlnPos"   , Water._under_plane.pos   *CamMatrixInv      );
      SPSet("WaterPlnNrm"   , Water._under_plane.normal*CamMatrixInv.orn());
      SPSet("WaterUnder"    ,        under_step);
      SPSet("WaterUnderRfr" ,       refract_val);
      SPSet("WaterDns"      , Vec2(Mid(under.density_underwater , 0.0f, 1-EPS_GPU), under.density_underwater_add)); // avoid 1 in case "Pow(1-density, ..)" in shader would cause NaN or slow-downs
      SPSet("WaterUnderCol0",          under.  color_underwater0                                                 );
      SPSet("WaterUnderCol1",          under.  color_underwater1                                                 );
      REPS(_eye, _eye_num)WS.h_Under[refract]->draw(*src, setEyeParams());
   }
}
void RendererClass::postProcess()
{
   Bool  eye_adapt= hasEyeAdapt(),
         bloom    =(hasBloom   () || _has_glow),
         motion   = hasMotion  (),
         dof      = hasDof     (),
         combine  = slowCombine(), // shader combine
         upscale  =(_final->w()>_col->w() || _final->h()>_col->h()), // we're going to upscale at the end
         fx_dither=(D.dither() && !upscale), // allow post process dither only if we're not going to upscale the image (because it would look bad)
         alpha_set=fastCombine(); // if alpha channel is set properly in the RT, skip this if we're doing 'fastCombine' because we're rendering to existing RT which has its Alpha already set
   VecI2 size     =_col->size(); MIN(size.x, _final->w()); MIN(size.y, _final->h()); // don't do post-process at higher res than needed

   D.alpha(ALPHA_NONE);

   ImageRTPtr dest;

   if(eye_adapt || bloom || motion || dof || combine || _get_target)resolveMultiSample(); // we need to resolve the MS Image so it's smooth for the effects

   Int fxs=((upscale || _get_target) ? -1 : eye_adapt+bloom+motion+dof+combine); // this counter specifies how many effects are still left in the queue, and if we can render directly to '_final', when up sampling then don't render to '_final'
   if( D._view_main.full && !_get_target && !combine && _col!=_final)_final->discard();
   if(!D._view_main.full)Sh.h_ColClamp->setConditional(colClamp(size)); // set ColClamp that may be needed for Bloom, DoF, MotionBlur, this is the viewport rect within texture, so reading will be clamped to what was rendered inside the viewport

   if(eye_adapt)
   {
      if(!--fxs)dest=_final;else dest.get(ImageRTDesc(size.x, size.y, GetImageRTType(_has_glow, D.litColRTPrecision()))); // can't read and write to the same RT, glow requires Alpha channel
      T.adaptEye(*_col.rc(), *dest); Swap(_col, dest); // Eye Adaptation keeps Alpha
   }
   if(bloom) // bloom needs to be done before motion/dof especially because of per-pixel glow
   {
      if(!--fxs)dest=_final;else dest.get(ImageRTDesc(size.x, size.y, IMAGERT_RGB)); // can't read and write to the same RT
      T.bloom(*_col, *dest, fx_dither); alpha_set=true; Swap(_col, dest); // Bloom sets Alpha
   }
   if(motion) // tests have shown that it's better to do Motion Blur before Depth of Field
   {
      if(!--fxs)dest=_final;else dest.get(ImageRTDesc(size.x, size.y, IMAGERT_RGB)); // can't read and write to the same RT
      if(T.motionBlur(*_col, *dest, fx_dither))return; alpha_set=true; Swap(_col, dest); // Motion Blur sets Alpha
   }
   if(dof) // after Motion Blur
   {
      if(!--fxs)dest=_final;else dest.get(ImageRTDesc(size.x, size.y, IMAGERT_RGB)); // can't read and write to the same RT
      T.dof(*_col, *dest, fx_dither); alpha_set=true; Swap(_col, dest); // DoF sets Alpha
   }

   // 'upscale' will happen somewhere below

   if(combine)
   {
      T.Combine(); alpha_set=true; // Combine sets Alpha
   }

   if(!_get_target) // for '_get_target' leave the '_col' result for further processing
   {
      if(_col!=_final)
      {
      #if DX11
         if(_col->multiSample())
         {
            if(_col->size()==_final->size()){_col->copyMs(*_final, false, true, D.viewRect()); _col=_final;}else resolveMultiSample(); // if the size is the same then we can resolve directly into the '_final', otherwise resolve first to temp RT and copy will be done below
         }
         if(_col!=_final) // if after resolve this is still not equal, then
      #elif DX9 || GL // in DX9, GL we can't read from '_main'
         if(_col==&_main || _col->multiSample())_col->copyHw(*_final, false, D.viewRect());else
      #endif
         {
            D.alpha(ALPHA_NONE);
            set(_final(), null, true);
            Bool    dither=(D.dither() && !_final->highPrecision()); // disable dithering if destination has high precision
            Shader *shader=null;
            if(upscale)switch(D.densityFilter()) // remember that cubic shaders are optional and can be null if failed to load
            {
               case FILTER_NONE:
               {
               #if DX9
                  Sh.h_ImageCol[0]->_sampler=&SamplerPoint;
               #elif DX11
                  SamplerPoint.setPS(SSI_DEFAULT);
               #elif GL // in GL 'ShaderImage.Sampler' does not affect filtering, so modify it manually
                  D.texBind(GL_TEXTURE_2D, _col->_txtr); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
               #endif
               }break;

               case FILTER_CUBIC_FAST       :
               case FILTER_CUBIC_FAST_SMOOTH:
               case FILTER_CUBIC_FAST_SHARP : shader=(dither ? Sh.h_DrawTexCubicFastRGBD : Sh.h_DrawTexCubicFastRGB); break; // this doesn't need to check for "_col->highPrecision" because resizing and cubic filtering generates smooth values

               case FILTER_BEST       :
               case FILTER_CUBIC      :
               case FILTER_CUBIC_SHARP: Sh.loadCubicShaders(); shader=(dither ? Sh.h_DrawTexCubicRGBD : Sh.h_DrawTexCubicRGB); break; // this doesn't need to check for "_col->highPrecision" because resizing and cubic filtering generates smooth values
            }
            if(!shader)
            {
               if(dither && (_col->size()!=_final->size() || _col->highPrecision()))shader=Sh.h_Dither;  // allow dithering only if we're resizing (because that generates high precision too) or if the source has high precision
               else                                             {Sh.h_Step->set(1); shader=Sh.h_DrawA ;} // use 'DrawA' to set Alpha Channel
            }
            shader->draw(_col); alpha_set=true;
            if(upscale && D.densityFilter()==FILTER_NONE)
            {
            #if DX9
               Sh.h_ImageCol[0]->_sampler=null;
            #elif DX11
               SamplerLinearClamp.setPS(SSI_DEFAULT);
            #elif GL
               if(!GL_ES || ImageTI[_col->hwType()].precision<IMAGE_PRECISION_32) // GLES2/3 don't support filtering F32 textures, without this check reading from F32 textures will fail - https://www.khronos.org/registry/OpenGL-Refpages/es3.0/html/glTexImage2D.xhtml
                  {D.texBind(GL_TEXTURE_2D, _col->_txtr); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);}
            #endif
            }
         }
      }
     _col.clear(); // release as it's no longer needed
      if(!alpha_set && _back==_final) // if we need to have alpha channel set for back buffer effect
      {
         set(_final(), null, true);
         D.alpha(ALPHA_ADD);
         Sh.clear(Vec4(0, 0, 0, 1)); // force full alpha so back buffer effects can work ok
      }
   }
}
/******************************************************************************/
}
/******************************************************************************/