Procházet zdrojové kódy

embree: Update to 4.4.0

Rémi Verschelde před 3 měsíci
rodič
revize
57640dd806
53 změnil soubory, kde provedl 1446 přidání a 751 odebrání
  1. 1 1
      modules/raycast/godot_update_embree.py
  2. 1 2
      thirdparty/README.md
  3. 1 4
      thirdparty/embree/common/lexers/stringstream.cpp
  4. 2 2
      thirdparty/embree/common/math/vec2fa_sycl.h
  5. 10 2
      thirdparty/embree/common/math/vec3fa_sycl.h
  6. 56 140
      thirdparty/embree/common/sys/alloc.cpp
  7. 16 46
      thirdparty/embree/common/sys/alloc.h
  8. 0 12
      thirdparty/embree/common/sys/filename.cpp
  9. 0 3
      thirdparty/embree/common/sys/filename.h
  10. 2 5
      thirdparty/embree/common/sys/platform.h
  11. 8 13
      thirdparty/embree/common/sys/sycl.h
  12. 13 0
      thirdparty/embree/common/sys/sysinfo.cpp
  13. 1 1
      thirdparty/embree/common/sys/sysinfo.h
  14. 3 1
      thirdparty/embree/common/sys/vector.h
  15. 4 10
      thirdparty/embree/common/tasking/taskschedulerinternal.h
  16. 20 0
      thirdparty/embree/include/embree4/rtcore_buffer.h
  17. 26 4
      thirdparty/embree/include/embree4/rtcore_config.h
  18. 30 10
      thirdparty/embree/include/embree4/rtcore_device.h
  19. 11 1
      thirdparty/embree/include/embree4/rtcore_geometry.h
  20. 4 4
      thirdparty/embree/include/embree4/rtcore_ray.h
  21. 116 13
      thirdparty/embree/include/embree4/rtcore_scene.h
  22. 1 2
      thirdparty/embree/kernels/builders/bvh_builder_sah.h
  23. 3 5
      thirdparty/embree/kernels/builders/heuristic_spatial_array.h
  24. 3 3
      thirdparty/embree/kernels/bvh/bvh_builder_sah_spatial.cpp
  25. 1 2
      thirdparty/embree/kernels/bvh/bvh_statistics.cpp
  26. 4 10
      thirdparty/embree/kernels/common/alloc.h
  27. 220 79
      thirdparty/embree/kernels/common/buffer.h
  28. 99 45
      thirdparty/embree/kernels/common/device.cpp
  29. 48 4
      thirdparty/embree/kernels/common/device.h
  30. 1 1
      thirdparty/embree/kernels/common/geometry.cpp
  31. 14 5
      thirdparty/embree/kernels/common/geometry.h
  32. 1 1
      thirdparty/embree/kernels/common/ray.h
  33. 383 31
      thirdparty/embree/kernels/common/rtcore.cpp
  34. 80 10
      thirdparty/embree/kernels/common/scene.cpp
  35. 76 10
      thirdparty/embree/kernels/common/scene.h
  36. 3 1
      thirdparty/embree/kernels/common/scene_curves.h
  37. 8 1
      thirdparty/embree/kernels/common/scene_grid_mesh.h
  38. 2 1
      thirdparty/embree/kernels/common/scene_instance.h
  39. 3 2
      thirdparty/embree/kernels/common/scene_instance_array.h
  40. 3 1
      thirdparty/embree/kernels/common/scene_line_segments.h
  41. 3 1
      thirdparty/embree/kernels/common/scene_points.h
  42. 3 1
      thirdparty/embree/kernels/common/scene_quad_mesh.h
  43. 1 1
      thirdparty/embree/kernels/common/scene_subdiv_mesh.h
  44. 32 8
      thirdparty/embree/kernels/common/scene_triangle_mesh.cpp
  45. 17 13
      thirdparty/embree/kernels/common/scene_triangle_mesh.h
  46. 8 6
      thirdparty/embree/kernels/common/scene_user_geometry.h
  47. 13 5
      thirdparty/embree/kernels/common/state.cpp
  48. 11 2
      thirdparty/embree/kernels/common/state.h
  49. 1 1
      thirdparty/embree/kernels/hash.h
  50. 71 197
      thirdparty/embree/patches/0001-disable-exceptions.patch
  51. 4 4
      thirdparty/embree/patches/0002-godot-config.patch
  52. 4 4
      thirdparty/embree/patches/0003-emscripten-nthreads.patch
  53. 0 20
      thirdparty/embree/patches/0006-include-order-dllexport.patch

+ 1 - 1
modules/raycast/godot_update_embree.py

@@ -7,7 +7,7 @@ import subprocess
 import sys
 from typing import Any, Callable
 
-git_tag = "v4.3.1"
+git_tag = "v4.4.0"
 
 include_dirs = [
     "common/tasking",

+ 1 - 2
thirdparty/README.md

@@ -199,7 +199,7 @@ Files extracted from upstream source:
 ## embree
 
 - Upstream: https://github.com/embree/embree
-- Version: 4.3.1 (daa8de0e714e18ad5e5c9841b67c1950d9c91c51, 2024)
+- Version: 4.4.0 (ff9381774dc99fea81a932ad276677aad6a3d4dd, 2025)
 - License: Apache 2.0
 
 Files extracted from upstream:
@@ -216,7 +216,6 @@ Patches:
 - `0003-emscripten-nthreads.patch` (GH-69799)
 - `0004-mingw-no-cpuidex.patch` (GH-92488)
 - `0005-mingw-llvm-arm64.patch` (GH-93364)
-- `0006-include-order-dllexport.patch` (GH-94256)
 
 The `modules/raycast/godot_update_embree.py` script can be used to pull the
 relevant files from the latest Embree release and apply patches automatically.

+ 1 - 4
thirdparty/embree/common/lexers/stringstream.cpp

@@ -39,10 +39,7 @@ namespace embree
     std::vector<char> str; str.reserve(64);
     while (cin->peek() != EOF && !isSeparator(cin->peek())) {
       int c = cin->get();
-      //if (!isValidChar(c)) throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
-      if (!isValidChar(c)) {
-        abort();
-      }
+      if (!isValidChar(c)) abort(); //throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
       str.push_back((char)c);
     }
     str.push_back(0);

+ 2 - 2
thirdparty/embree/common/math/vec2fa_sycl.h

@@ -95,8 +95,8 @@ namespace embree
   __forceinline Vec2fa abs  ( const Vec2fa& a ) { return Vec2fa(sycl::fabs(a.x),sycl::fabs(a.y)); }
   __forceinline Vec2fa sign ( const Vec2fa& a ) { return Vec2fa(sycl::sign(a.x),sycl::sign(a.y)); }
 
-   //__forceinline Vec2fa rcp  ( const Vec2fa& a ) { return Vec2fa(sycl::recip(a.x),sycl::recip(a.y)); }
-  __forceinline Vec2fa rcp  ( const Vec2fa& a ) { return Vec2fa(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y)); }
+   //__forceinline Vec2fa rcp  ( const Vec2fa& a ) { return Vec2fa(sycl::native::recip(a.x),sycl::native::recip(a.y)); }
+  __forceinline Vec2fa rcp  ( const Vec2fa& a ) { return Vec2fa(sycl::native::recip(a.x),sycl::native::recip(a.y)); }
   __forceinline Vec2fa sqrt ( const Vec2fa& a ) { return Vec2fa(sycl::sqrt(a.x),sycl::sqrt(a.y)); }
   __forceinline Vec2fa sqr  ( const Vec2fa& a ) { return Vec2fa(a.x*a.x,a.y*a.y); }
   

+ 10 - 2
thirdparty/embree/common/math/vec3fa_sycl.h

@@ -92,7 +92,7 @@ namespace embree
   __forceinline Vec3fa sign ( const Vec3fa& a ) { return Vec3fa(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z)); }
 
   //__forceinline Vec3fa rcp  ( const Vec3fa& a ) { return Vec3fa(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
-  __forceinline Vec3fa rcp  ( const Vec3fa& a ) { return Vec3fa(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y),__sycl_std::__invoke_native_recip<float>(a.z)); }
+  __forceinline Vec3fa rcp  ( const Vec3fa& a ) { return Vec3fa(sycl::native::recip(a.x),sycl::native::recip(a.y),sycl::native::recip(a.z)); }
   __forceinline Vec3fa sqrt ( const Vec3fa& a ) { return Vec3fa(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z)); }
   __forceinline Vec3fa sqr  ( const Vec3fa& a ) { return Vec3fa(a.x*a.x,a.y*a.y,a.z*a.z); }
 
@@ -393,7 +393,7 @@ namespace embree
   __forceinline Vec3fx sign ( const Vec3fx& a ) { return Vec3fx(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z),sycl::sign(a.z)); }
 
   //__forceinline Vec3fx rcp  ( const Vec3fx& a ) { return Vec3fx(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
-  __forceinline Vec3fx rcp  ( const Vec3fx& a ) { return Vec3fx(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y),__sycl_std::__invoke_native_recip<float>(a.z),__sycl_std::__invoke_native_recip<float>(a.w)); }
+  __forceinline Vec3fx rcp  ( const Vec3fx& a ) { return Vec3fx(sycl::native::recip(a.x),sycl::native::recip(a.y),sycl::native::recip(a.z),sycl::native::recip(a.w)); }
   __forceinline Vec3fx sqrt ( const Vec3fx& a ) { return Vec3fx(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z),sycl::sqrt(a.w)); }
   __forceinline Vec3fx sqr  ( const Vec3fx& a ) { return Vec3fx(a.x*a.x,a.y*a.y,a.z*a.z,a.w*a.w); }
 
@@ -614,4 +614,12 @@ namespace embree
 
   //__forceinline Vec3ia::Vec3ia( const Vec3fx& a )
   //  : x((int)a.x), y((int)a.y), z((int)a.z) {}
+
+}
+
+#if __SYCL_COMPILER_VERSION >= 20210801
+namespace sycl {
+  template<> struct is_device_copyable<embree::Vec3fa> : std::true_type {};
+  template<> struct is_device_copyable<const embree::Vec3fa> : std::true_type {};
 }
+#endif

+ 56 - 140
thirdparty/embree/common/sys/alloc.cpp

@@ -12,69 +12,6 @@
   
 namespace embree
 {
-  size_t total_allocations = 0;
-
-#if defined(EMBREE_SYCL_SUPPORT)
-  
-  __thread sycl::context* tls_context_tutorial = nullptr;
-  __thread sycl::device* tls_device_tutorial = nullptr;
-  
-  __thread sycl::context* tls_context_embree = nullptr;
-  __thread sycl::device* tls_device_embree = nullptr;
-  
-  void enableUSMAllocEmbree(sycl::context* context, sycl::device* device)
-  {
-    //if (tls_context_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
-    //if (tls_device_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
-    if (tls_context_embree != nullptr) {
-      abort();
-    }
-    if (tls_device_embree != nullptr) {
-      abort();
-    }
-    tls_context_embree = context;
-    tls_device_embree = device;
-  }
-
-  void disableUSMAllocEmbree()
-  {
-    //if (tls_context_embree  == nullptr) throw std::runtime_error("USM allocation not enabled");
-    //if (tls_device_embree  == nullptr) throw std::runtime_error("USM allocation not enabled");
-    if (tls_context_embree  == nullptr) {
-      abort();
-    }
-    if (tls_device_embree  == nullptr) {
-      abort();
-    }
-    tls_context_embree = nullptr;
-    tls_device_embree = nullptr;
-  }
-
-  void enableUSMAllocTutorial(sycl::context* context, sycl::device* device)
-  {
-    //if (tls_context_tutorial != nullptr) throw std::runtime_error("USM allocation already enabled");
-    //if (tls_device_tutorial != nullptr) throw std::runtime_error("USM allocation already enabled");
-    tls_context_tutorial = context;
-    tls_device_tutorial = device;
-  }
-
-  void disableUSMAllocTutorial()
-  {
-    //if (tls_context_tutorial  == nullptr) throw std::runtime_error("USM allocation not enabled");
-    //if (tls_device_tutorial  == nullptr) throw std::runtime_error("USM allocation not enabled");
-    if (tls_context_tutorial  == nullptr) {
-      abort();
-    }
-    if (tls_device_tutorial  == nullptr) {
-      abort();
-    }
-    
-    tls_context_tutorial = nullptr;
-    tls_device_tutorial = nullptr;
-  }
-
-#endif
-  
   void* alignedMalloc(size_t size, size_t align)
   {
     if (size == 0)
@@ -82,18 +19,16 @@ namespace embree
 
     assert((align & (align-1)) == 0);
     void* ptr = _mm_malloc(size,align);
-    //if (size != 0 && ptr == nullptr)
-    //  throw std::bad_alloc();
-    if (size != 0 && ptr == nullptr) {
-      abort();
-    }
+    if (size != 0 && ptr == nullptr)
+      abort(); //throw std::bad_alloc();
     return ptr;
   }
 
   void alignedFree(void* ptr)
   {
-    if (ptr)
+    if (ptr) {
       _mm_free(ptr);
+    }
   }
 
 #if defined(EMBREE_SYCL_SUPPORT)
@@ -107,67 +42,66 @@ namespace embree
       return nullptr;
 
     assert((align & (align-1)) == 0);
-    total_allocations++;    
 
     void* ptr = nullptr;
-    if (mode == EMBREE_USM_SHARED_DEVICE_READ_ONLY)
+    if (mode == EmbreeUSMMode::DEVICE_READ_ONLY)
       ptr = sycl::aligned_alloc_shared(align,size,*device,*context,sycl::ext::oneapi::property::usm::device_read_only());
     else
       ptr = sycl::aligned_alloc_shared(align,size,*device,*context);
-      
-    //if (size != 0 && ptr == nullptr)
-    //  throw std::bad_alloc();
-    if (size != 0 && ptr == nullptr) {
-      abort();
-    }
+
+    if (size != 0 && ptr == nullptr)
+      abort(); //throw std::bad_alloc();
 
     return ptr;
   }
-  
-  static MutexSys g_alloc_mutex;
-  
-  void* alignedSYCLMalloc(size_t size, size_t align, EmbreeUSMMode mode)
-  {
-    if (tls_context_tutorial) return alignedSYCLMalloc(tls_context_tutorial, tls_device_tutorial, size, align, mode);
-    if (tls_context_embree  ) return alignedSYCLMalloc(tls_context_embree,   tls_device_embree,   size, align, mode);
-    return nullptr;
-  }
 
-  void alignedSYCLFree(sycl::context* context, void* ptr)
+  void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode, EmbreeMemoryType type)
   {
     assert(context);
-    if (ptr) {
-      sycl::free(ptr,*context);
-    }
-  }
+    assert(device);
+    
+    if (size == 0)
+      return nullptr;
 
-  void alignedSYCLFree(void* ptr)
-  {
-    if (tls_context_tutorial) return alignedSYCLFree(tls_context_tutorial, ptr);
-    if (tls_context_embree  ) return alignedSYCLFree(tls_context_embree, ptr);
-  }
+    assert((align & (align-1)) == 0);
 
-#endif
+    void* ptr = nullptr;
+    if (type == EmbreeMemoryType::USM_SHARED) {
+      if (mode == EmbreeUSMMode::DEVICE_READ_ONLY)
+        ptr = sycl::aligned_alloc_shared(align,size,*device,*context,sycl::ext::oneapi::property::usm::device_read_only());
+      else
+        ptr = sycl::aligned_alloc_shared(align,size,*device,*context);
+    }
+    else if (type == EmbreeMemoryType::USM_HOST) {
+      ptr = sycl::aligned_alloc_host(align,size,*context);
+    }
+    else if (type == EmbreeMemoryType::USM_DEVICE) {
+      ptr = sycl::aligned_alloc_device(align,size,*device,*context);
+    }
+    else {
+      ptr = alignedMalloc(size,align);
+    }
 
-  void* alignedUSMMalloc(size_t size, size_t align, EmbreeUSMMode mode)
+    if (size != 0 && ptr == nullptr)
+      abort(); //throw std::bad_alloc();
+
+    return ptr;
+  }
+  
+  void alignedSYCLFree(sycl::context* context, void* ptr)
   {
-#if defined(EMBREE_SYCL_SUPPORT)
-    if (tls_context_embree || tls_context_tutorial)
-      return alignedSYCLMalloc(size,align,mode);
-    else
-#endif
-      return alignedMalloc(size,align);
+    assert(context);
+    if (ptr) {
+      sycl::usm::alloc type = sycl::get_pointer_type(ptr, *context);
+      if (type == sycl::usm::alloc::host || type == sycl::usm::alloc::device || type == sycl::usm::alloc::shared)
+        sycl::free(ptr,*context);
+      else {
+        alignedFree(ptr);
+      }
+    }
   }
 
-  void alignedUSMFree(void* ptr)
-  {
-#if defined(EMBREE_SYCL_SUPPORT)
-    if (tls_context_embree || tls_context_tutorial)
-      return alignedSYCLFree(ptr);
-    else
 #endif
-      return alignedFree(ptr);
-  }
 
   static bool huge_pages_enabled = false;
   static MutexSys os_init_mutex;
@@ -265,10 +199,7 @@ namespace embree
     /* fall back to 4k pages */
     int flags = MEM_COMMIT | MEM_RESERVE;
     char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE);
-    //if (ptr == nullptr) throw std::bad_alloc();
-    if (ptr == nullptr) {
-      abort();
-    }
+    if (ptr == nullptr) abort(); //throw std::bad_alloc();
     hugepages = false;
     return ptr;
   }
@@ -284,11 +215,8 @@ namespace embree
     if (bytesNew >= bytesOld)
       return bytesOld;
 
-    //if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
-    //  throw std::bad_alloc();
-    if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT)) {
-      abort();
-    }
+    if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
+      abort(); //throw std::bad_alloc();
 
     return bytesNew;
   }
@@ -298,11 +226,8 @@ namespace embree
     if (bytes == 0) 
       return;
 
-    //if (!VirtualFree(ptr,0,MEM_RELEASE))
-    //  throw std::bad_alloc();
-    if (!VirtualFree(ptr,0,MEM_RELEASE)) {
-      abort();
-    }
+    if (!VirtualFree(ptr,0,MEM_RELEASE))
+      abort(); //throw std::bad_alloc();
   }
 
   void os_advise(void *ptr, size_t bytes)
@@ -406,10 +331,7 @@ namespace embree
 
     /* fallback to 4k pages */
     void* ptr = (char*) mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
-    //if (ptr == MAP_FAILED) throw std::bad_alloc();
-    if (ptr == MAP_FAILED) {
-      abort();
-    }
+    if (ptr == MAP_FAILED) abort(); //throw std::bad_alloc();
     hugepages = false;
 
     /* advise huge page hint for THP */
@@ -425,11 +347,8 @@ namespace embree
     if (bytesNew >= bytesOld)
       return bytesOld;
 
-    //if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
-    //  throw std::bad_alloc();
-    if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1) {
-      abort();
-    }
+    if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
+      abort(); //throw std::bad_alloc();
 
     return bytesNew;
   }
@@ -442,11 +361,8 @@ namespace embree
     /* for hugepages we need to also align the size */
     const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K;
     bytes = (bytes+pageSize-1) & ~(pageSize-1);
-    //if (munmap(ptr,bytes) == -1)
-    //  throw std::bad_alloc();
-    if (munmap(ptr,bytes) == -1) {
-      abort();
-    }
+    if (munmap(ptr,bytes) == -1)
+      abort(); //throw std::bad_alloc();
   }
 
   /* hint for transparent huge pages (THP) */

+ 16 - 46
thirdparty/embree/common/sys/alloc.h

@@ -9,71 +9,42 @@
 
 namespace embree
 {
-#if defined(EMBREE_SYCL_SUPPORT)
-
-  /* enables SYCL USM allocation */
-  void enableUSMAllocEmbree(sycl::context* context, sycl::device* device);
-  void enableUSMAllocTutorial(sycl::context* context, sycl::device* device);
-
-  /* disables SYCL USM allocation */
-  void disableUSMAllocEmbree();
-  void disableUSMAllocTutorial();
-
-#endif
-  
 #define ALIGNED_STRUCT_(align)                                            \
   void* operator new(size_t size) { return alignedMalloc(size,align); }   \
   void operator delete(void* ptr) { alignedFree(ptr); }                   \
   void* operator new[](size_t size) { return alignedMalloc(size,align); } \
   void operator delete[](void* ptr) { alignedFree(ptr); }
   
-#define ALIGNED_STRUCT_USM_(align)                                          \
-  void* operator new(size_t size) { return alignedUSMMalloc(size,align); }   \
-  void operator delete(void* ptr) { alignedUSMFree(ptr); }                   \
-  void* operator new[](size_t size) { return alignedUSMMalloc(size,align); } \
-  void operator delete[](void* ptr) { alignedUSMFree(ptr); }
-  
 #define ALIGNED_CLASS_(align)                                          \
  public:                                                               \
     ALIGNED_STRUCT_(align)                                             \
  private:
 
-#define ALIGNED_CLASS_USM_(align)                                          \
- public:                                                               \
-    ALIGNED_STRUCT_USM_(align)                                             \
- private:
-
-  enum EmbreeUSMMode {
-    EMBREE_USM_SHARED = 0,
-    EMBREE_USM_SHARED_DEVICE_READ_WRITE = 0,
-    EMBREE_USM_SHARED_DEVICE_READ_ONLY = 1
-  };
-  
   /*! aligned allocation */
   void* alignedMalloc(size_t size, size_t align);
   void alignedFree(void* ptr);
 
-  /*! aligned allocation using SYCL USM */
-  void* alignedUSMMalloc(size_t size, size_t align = 16, EmbreeUSMMode mode = EMBREE_USM_SHARED_DEVICE_READ_ONLY);
-  void alignedUSMFree(void* ptr);
+
+  enum class EmbreeUSMMode {
+    DEFAULT = 0,
+    DEVICE_READ_WRITE = 0,
+    DEVICE_READ_ONLY = 1
+  };
+
+  enum class EmbreeMemoryType {
+    USM_HOST = 0,
+    USM_DEVICE = 1,
+    USM_SHARED = 2,
+    MALLOC = 3
+  };
 
 #if defined(EMBREE_SYCL_SUPPORT)
-  
+
   /*! aligned allocation using SYCL USM */
   void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode);
+  void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode, EmbreeMemoryType type);
   void alignedSYCLFree(sycl::context* context, void* ptr);
 
-  // deleter functor to use as deleter in std unique or shared pointers that
-  // capture raw pointers created by sycl::malloc and it's variants
-  template<typename T>
-  struct sycl_deleter
-  {
-    void operator()(T const* ptr)
-    {
-      alignedUSMFree((void*)ptr);
-    }
-  };
-
 #endif
   
   /*! allocator that performs aligned allocations */
@@ -160,8 +131,7 @@ namespace embree
       typedef std::ptrdiff_t difference_type;
 
       __forceinline pointer allocate( size_type n ) {
-        //throw std::runtime_error("no allocation supported");
-        abort();
+        abort(); //throw std::runtime_error("no allocation supported");
       }
 
       __forceinline void deallocate( pointer p, size_type n ) {

+ 0 - 12
thirdparty/embree/common/sys/filename.cpp

@@ -35,18 +35,6 @@ namespace embree
       filename.resize(filename.size()-1);
   }
   
-  /*! returns path to home folder */
-  FileName FileName::homeFolder() 
-  {
-#ifdef __WIN32__
-    const char* home = getenv("UserProfile");
-#else
-    const char* home = getenv("HOME");
-#endif
-    if (home) return home;
-    return "";
-  }
-
   /*! returns path to executable */
   FileName FileName::executableFolder() {
     return FileName(getExecutableFileName()).path();

+ 0 - 3
thirdparty/embree/common/sys/filename.h

@@ -20,9 +20,6 @@ namespace embree
 
     /*! create a valid filename from a string */
     FileName (const std::string& filename);
-    
-    /*! returns path to home folder */
-    static FileName homeFolder();
 
     /*! returns path to executable */
     static FileName executableFolder();

+ 2 - 5
thirdparty/embree/common/sys/platform.h

@@ -213,15 +213,12 @@
 #define UPRINT4(x,y,z,w) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl
 
 #if defined(DEBUG) // only report file and line in debug mode
-  //#define THROW_RUNTIME_ERROR(str) \
-  //  throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
   #define THROW_RUNTIME_ERROR(str) \
     printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort();
+    //throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
 #else
-  //#define THROW_RUNTIME_ERROR(str) \
-  //  throw std::runtime_error(str);
   #define THROW_RUNTIME_ERROR(str) \
-    abort();
+    abort(); //throw std::runtime_error(str);
 #endif
 
 #define FATAL(x)   THROW_RUNTIME_ERROR(x)

+ 8 - 13
thirdparty/embree/common/sys/sycl.h

@@ -181,8 +181,15 @@ namespace embree
     return sycl::select(b,a,mask);
   }
   
+#define XSTR(x) STR(x)
+#define STR(x) #x
+
   __forceinline const sycl::sub_group this_sub_group() {
-    return sycl::ext::oneapi::experimental::this_sub_group(); 
+#if __LIBSYCL_MAJOR_VERSION >= 8
+    return sycl::ext::oneapi::this_work_item::get_sub_group();
+#else
+    return sycl::ext::oneapi::experimental::this_sub_group();
+#endif
   }
   
   __forceinline const uint32_t get_sub_group_local_id() {
@@ -275,18 +282,6 @@ namespace embree
     return sycl::inclusive_scan_over_group(this_sub_group(),x,binary_op,init);
   }
 
-  template <typename T> __forceinline T sub_group_shuffle(T x, sycl::id<1> local_id) {
-    return this_sub_group().shuffle(x, local_id);
-  }
-
-  template <typename T> __forceinline T sub_group_shuffle_down(T x, uint32_t delta) {
-    return this_sub_group().shuffle_down(x, delta);
-  }
-  
-  template <typename T> __forceinline T sub_group_shuffle_up(T x, uint32_t delta) {
-    return this_sub_group().shuffle_up(x, delta);
-  }
-
   template <typename T> __forceinline T sub_group_load(const void* src) {
     return this_sub_group().load(sycl::multi_ptr<T,sycl::access::address_space::global_space>((T*)src));
   }

+ 13 - 0
thirdparty/embree/common/sys/sysinfo.cpp

@@ -344,6 +344,19 @@ namespace embree
     if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512VL  ) cpu_features |= CPU_FEATURE_AVX512VL;
     if (cpuid_leaf_7[ECX] & CPU_FEATURE_BIT_AVX512VBMI) cpu_features |= CPU_FEATURE_AVX512VBMI;
 
+#if defined(__MACOSX__)
+    if (   (cpu_features & CPU_FEATURE_AVX512F)
+        || (cpu_features & CPU_FEATURE_AVX512DQ)
+        || (cpu_features & CPU_FEATURE_AVX512CD)
+        || (cpu_features & CPU_FEATURE_AVX512BW)
+        || (cpu_features & CPU_FEATURE_AVX512VL) )
+      {
+        // on macOS AVX512 will be enabled automatically by the kernel when the first AVX512 instruction is called
+        // see https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/osfmk/i386/fpu.c#L176
+        // therefore we ignore the state of XCR0
+        cpu_features |= CPU_FEATURE_ZMM_ENABLED;
+      }
+#endif
     return cpu_features;
 
 #elif defined(__ARM_NEON) || defined(__EMSCRIPTEN__)

+ 1 - 1
thirdparty/embree/common/sys/sysinfo.h

@@ -158,7 +158,7 @@ namespace embree
   static const int SSE41  = SSSE3 | CPU_FEATURE_SSE41;
   static const int SSE42  = SSE41 | CPU_FEATURE_SSE42 | CPU_FEATURE_POPCNT;
   static const int AVX    = SSE42 | CPU_FEATURE_AVX | CPU_FEATURE_YMM_ENABLED;
-  static const int AVXI   = AVX | CPU_FEATURE_F16C | CPU_FEATURE_RDRAND;
+  static const int AVXI   = AVX | CPU_FEATURE_F16C;
   static const int AVX2   = AVXI | CPU_FEATURE_AVX2 | CPU_FEATURE_FMA3 | CPU_FEATURE_BMI1 | CPU_FEATURE_BMI2 | CPU_FEATURE_LZCNT;
   static const int AVX512 = AVX2 | CPU_FEATURE_AVX512F | CPU_FEATURE_AVX512DQ | CPU_FEATURE_AVX512CD | CPU_FEATURE_AVX512BW | CPU_FEATURE_AVX512VL | CPU_FEATURE_ZMM_ENABLED;
   static const int NEON = CPU_FEATURE_NEON | CPU_FEATURE_SSE | CPU_FEATURE_SSE2;

+ 3 - 1
thirdparty/embree/common/sys/vector.h

@@ -124,8 +124,10 @@ namespace embree
 
       __forceinline       T* data()       { return items; };
       __forceinline const T* data() const { return items; };
+      
+      /* dangerous only use if you know what you're doing */
+      __forceinline void setDataPtr(T* data) { items = data; }
 
-     
       /******************** Modifiers **************************/
 
       __forceinline void push_back(const T& nt) 

+ 4 - 10
thirdparty/embree/common/tasking/taskschedulerinternal.h

@@ -130,11 +130,8 @@ namespace embree
       __forceinline void* alloc(size_t bytes, size_t align = 64)
       {
         size_t ofs = bytes + ((align - stackPtr) & (align-1));
-        //if (stackPtr + ofs > CLOSURE_STACK_SIZE)
-        //  throw std::runtime_error("closure stack overflow");
-        if (stackPtr + ofs > CLOSURE_STACK_SIZE) {
-          abort();
-        }
+        if (stackPtr + ofs > CLOSURE_STACK_SIZE)
+          abort(); //throw std::runtime_error("closure stack overflow");
         stackPtr += ofs;
         return &stack[stackPtr-bytes];
       }
@@ -142,11 +139,8 @@ namespace embree
       template<typename Closure>
       __forceinline void push_right(Thread& thread, const size_t size, const Closure& closure, TaskGroupContext* context)
       {
-        //if (right >= TASK_STACK_SIZE)
-        //  throw std::runtime_error("task stack overflow");
-        if (right >= TASK_STACK_SIZE) {
-          abort();
-        }
+        if (right >= TASK_STACK_SIZE)
+          abort(); //throw std::runtime_error("task stack overflow");
 
 	/* allocate new task on right side of stack */
         size_t oldStackPtr = stackPtr;

+ 20 - 0
thirdparty/embree/include/embree4/rtcore_buffer.h

@@ -38,12 +38,32 @@ typedef struct RTCBufferTy* RTCBuffer;
 /* Creates a new buffer. */
 RTC_API RTCBuffer rtcNewBuffer(RTCDevice device, size_t byteSize);
 
+/* Creates a new buffer using explicit host device memory. */
+RTC_API RTCBuffer rtcNewBufferHostDevice(RTCDevice device, size_t byteSize);
+
 /* Creates a new shared buffer. */
 RTC_API RTCBuffer rtcNewSharedBuffer(RTCDevice device, void* ptr, size_t byteSize);
 
+/* Creates a new shared buffer using explicit host device memory. */
+RTC_API RTCBuffer rtcNewSharedBufferHostDevice(RTCDevice device, void* ptr, size_t byteSize);
+
+/* Synchronize host and device memory by copying data from host to device. */
+RTC_API void rtcCommitBuffer(RTCBuffer buffer);
+
+#if defined(EMBREE_SYCL_SUPPORT) && defined(SYCL_LANGUAGE_VERSION)
+
+RTC_API_CPP sycl::event rtcCommitBufferWithQueue(RTCBuffer buffer, sycl::queue queue);
+
+#endif
+
 /* Returns a pointer to the buffer data. */
 RTC_API void* rtcGetBufferData(RTCBuffer buffer);
 
+/* Returns a pointer to the buffer data on the device. Returns the same pointer as
+  rtcGetBufferData if the device is no SYCL device or if Embree is executed on a
+  system with unified memory (e.g., iGPUs). */
+RTC_API void* rtcGetBufferDataDevice(RTCBuffer buffer);
+
 /* Retains the buffer (increments the reference count). */
 RTC_API void rtcRetainBuffer(RTCBuffer buffer);
 

+ 26 - 4
thirdparty/embree/include/embree4/rtcore_config.h

@@ -8,10 +8,10 @@
 #endif
 
 #define RTC_VERSION_MAJOR 4
-#define RTC_VERSION_MINOR 3
-#define RTC_VERSION_PATCH 1
-#define RTC_VERSION 40301
-#define RTC_VERSION_STRING "4.3.1"
+#define RTC_VERSION_MINOR 4
+#define RTC_VERSION_PATCH 0
+#define RTC_VERSION 40400
+#define RTC_VERSION_STRING "4.4.0"
 
 #define RTC_MAX_INSTANCE_LEVEL_COUNT 1
 
@@ -36,6 +36,7 @@
 #  define RTC_NAMESPACE_END }
 #  define RTC_NAMESPACE_USE using namespace;
 #  define RTC_API_EXTERN_C
+#  define RTC_API_EXTERN_CPP
 #  undef EMBREE_API_NAMESPACE
 #else
 #  define RTC_NAMESPACE_BEGIN
@@ -43,6 +44,7 @@
 #  define RTC_NAMESPACE_USE
 #  if defined(__cplusplus)
 #    define RTC_API_EXTERN_C extern "C"
+#    define RTC_API_EXTERN_CPP extern "C++"
 #  else
 #    define RTC_API_EXTERN_C
 #  endif
@@ -62,12 +64,32 @@
 #  define RTC_API_EXPORT RTC_API_EXTERN_C __attribute__ ((visibility ("default")))
 #endif
 
+#if defined(ISPC)
+#  define RTC_API_IMPORT_CPP extern "C++" unmasked
+#  define RTC_API_EXPORT_CPP extern "C++" unmasked
+#elif defined(EMBREE_STATIC_LIB)
+#  define RTC_API_IMPORT_CPP RTC_API_EXTERN_CPP
+#  define RTC_API_EXPORT_CPP RTC_API_EXTERN_CPP
+#elif defined(_WIN32)
+#  define RTC_API_IMPORT_CPP RTC_API_EXTERN_CPP __declspec(dllimport)
+#  define RTC_API_EXPORT_CPP RTC_API_EXTERN_CPP __declspec(dllexport)
+#else
+#  define RTC_API_IMPORT_CPP RTC_API_EXTERN_CPP
+#  define RTC_API_EXPORT_CPP RTC_API_EXTERN_CPP __attribute__ ((visibility ("default")))
+#endif
+
 #if defined(RTC_EXPORT_API)
 #  define RTC_API RTC_API_EXPORT
 #else
 #  define RTC_API RTC_API_IMPORT
 #endif
 
+#if defined(RTC_EXPORT_API)
+#  define RTC_API_CPP RTC_API_EXPORT_CPP
+#else
+#  define RTC_API_CPP RTC_API_IMPORT_CPP
+#endif
+
 #if defined(ISPC)
 #  define RTC_SYCL_INDIRECTLY_CALLABLE
 #elif defined(__SYCL_DEVICE_ONLY__)

+ 30 - 10
thirdparty/embree/include/embree4/rtcore_device.h

@@ -9,14 +9,18 @@ RTC_NAMESPACE_BEGIN
 
 /* Opaque device type */
 typedef struct RTCDeviceTy* RTCDevice;
+typedef struct RTCSceneTy* RTCScene;
 
 /* Creates a new Embree device. */
 RTC_API RTCDevice rtcNewDevice(const char* config);
 
 #if defined(EMBREE_SYCL_SUPPORT) && defined(SYCL_LANGUAGE_VERSION)
 
-
-/* Creates a new Embree SYCL device. */
+/*
+  Creates a new Embree SYCL device. It will internally select the first SYCL device of
+  the SYCL context as the default device for memory allocations. You can set a specific
+  SYCL device that's part of the SYCL context by calling rtcSetDeviceSYCLDevice.
+*/
 RTC_API_EXTERN_C RTCDevice rtcNewSYCLDevice(sycl::context context, const char* config);
 
 /* Checks if SYCL device is supported by Embree. */
@@ -28,6 +32,10 @@ RTC_API int rtcSYCLDeviceSelector(const sycl::device sycl_device);
 /* Set the SYCL device to be used to allocate data */
 RTC_API void rtcSetDeviceSYCLDevice(RTCDevice device, const sycl::device sycl_device);
 
+/* rtcCommitSceneWithQueue is asynchronous, user has to call queue.wait()
+   for synchronization. rtcCommitScene is blocking. */
+RTC_API_CPP sycl::event rtcCommitSceneWithQueue(RTCScene scene, sycl::queue queue);
+
 #endif
 
 
@@ -66,7 +74,10 @@ enum RTCDeviceProperty
 
   RTC_DEVICE_PROPERTY_TASKING_SYSTEM        = 128,
   RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED = 129,
-  RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED = 130
+  RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED = 130,
+
+  RTC_DEVICE_PROPERTY_CPU_DEVICE  = 140,
+  RTC_DEVICE_PROPERTY_SYCL_DEVICE = 141
 };
 
 /* Gets a device property. */
@@ -78,18 +89,27 @@ RTC_API void rtcSetDeviceProperty(RTCDevice device, const enum RTCDeviceProperty
 /* Error codes */
 enum RTCError
 {
-  RTC_ERROR_NONE              = 0,
-  RTC_ERROR_UNKNOWN           = 1,
-  RTC_ERROR_INVALID_ARGUMENT  = 2,
-  RTC_ERROR_INVALID_OPERATION = 3,
-  RTC_ERROR_OUT_OF_MEMORY     = 4,
-  RTC_ERROR_UNSUPPORTED_CPU   = 5,
-  RTC_ERROR_CANCELLED         = 6,
+  RTC_ERROR_NONE                                  = 0,
+  RTC_ERROR_UNKNOWN                               = 1,
+  RTC_ERROR_INVALID_ARGUMENT                      = 2,
+  RTC_ERROR_INVALID_OPERATION                     = 3,
+  RTC_ERROR_OUT_OF_MEMORY                         = 4,
+  RTC_ERROR_UNSUPPORTED_CPU                       = 5,
+  RTC_ERROR_CANCELLED                             = 6,
+  RTC_ERROR_LEVEL_ZERO_RAYTRACING_SUPPORT_MISSING = 7,
 };
 
+/* Returns the string representation for the error code. For example, for RTC_ERROR_UNKNOWN the string "RTC_ERROR_UNKNOWN" will be returned. */
+RTC_API const char* rtcGetErrorString(enum RTCError error);
+
 /* Returns the error code. */
 RTC_API enum RTCError rtcGetDeviceError(RTCDevice device);
 
+/* Returns a message corresponding to the last error code (returned by rtcGetDeviceError) which provides details about the error that happened.
+   The same message will be written to console when verbosity is > 0 or when an error callback function is set for the device.
+   However, when device creation itself fails this is the only way to get additional information about the error. */
+RTC_API const char* rtcGetDeviceLastErrorMessage(RTCDevice device);
+
 /* Error callback function */
 typedef void (*RTCErrorFunction)(void* userPtr, enum RTCError code, const char* str);
 

+ 11 - 1
thirdparty/embree/include/embree4/rtcore_geometry.h

@@ -171,16 +171,26 @@ RTC_API void rtcSetGeometryBuffer(RTCGeometry geometry, enum RTCBufferType type,
 /* Sets a shared geometry buffer. */
 RTC_API void rtcSetSharedGeometryBuffer(RTCGeometry geometry, enum RTCBufferType type, unsigned int slot, enum RTCFormat format, const void* ptr, size_t byteOffset, size_t byteStride, size_t itemCount);
 
+/* Sets a shared host/device geometry buffer pair. */
+RTC_API void rtcSetSharedGeometryBufferHostDevice(RTCGeometry geometry, enum RTCBufferType bufferType, unsigned int slot, enum RTCFormat format, const void* ptr, const void* dptr, size_t byteOffset, size_t byteStride, size_t itemCount);
+
 /* Creates and sets a new geometry buffer. */
 RTC_API void* rtcSetNewGeometryBuffer(RTCGeometry geometry, enum RTCBufferType type, unsigned int slot, enum RTCFormat format, size_t byteStride, size_t itemCount);
 
+/* Creates and sets a new host/device geometry buffer pair. */
+RTC_API void rtcSetNewGeometryBufferHostDevice(RTCGeometry geometry, enum RTCBufferType bufferType, unsigned int slot, enum RTCFormat format, size_t byteStride, size_t itemCount, void** ptr, void** dptr);
+
 /* Returns the pointer to the data of a buffer. */
 RTC_API void* rtcGetGeometryBufferData(RTCGeometry geometry, enum RTCBufferType type, unsigned int slot);
 
+/* Returns a pointer to the buffer data on the device. Returns the same pointer as
+  rtcGetGeometryBufferData if the device is no SYCL device or if Embree is executed on a
+  system with unified memory (e.g., iGPUs). */
+RTC_API void* rtcGetGeometryBufferDataDevice(RTCGeometry geometry, enum RTCBufferType type, unsigned int slot);
+
 /* Updates a geometry buffer. */
 RTC_API void rtcUpdateGeometryBuffer(RTCGeometry geometry, enum RTCBufferType type, unsigned int slot);
 
-
 /* Sets the intersection filter callback function of the geometry. */
 RTC_API void rtcSetGeometryIntersectFilterFunction(RTCGeometry geometry, RTCFilterFunctionN filter);
 

+ 4 - 4
thirdparty/embree/include/embree4/rtcore_ray.h

@@ -225,8 +225,8 @@ RTC_FORCEINLINE RTCRayN* RTCRayHitN_RayN(RTCRayHitN* rayhit, unsigned int N) { r
 RTC_FORCEINLINE RTCHitN* RTCRayHitN_HitN(RTCRayHitN* rayhit, unsigned int N) { return (RTCHitN*)&((float*)rayhit)[12*N]; }
 
 /* Helper structure for a ray packet of compile-time size N */
-template<int N>
-struct RTCRayNt
+template<unsigned int N>
+struct RTC_ALIGN((N && !(N & (N - 1)) ? (N * 4 > 16 ? N * 4 : 16) : 16)) RTCRayNt
 {
   float org_x[N];
   float org_y[N];
@@ -245,8 +245,8 @@ struct RTCRayNt
 };
 
 /* Helper structure for a hit packet of compile-time size N */
-template<int N>
-struct RTCHitNt
+template<unsigned int N>
+struct RTC_ALIGN((N && !(N & (N - 1)) ? (N * 4 > 16 ? N * 4 : 16) : 16)) RTCHitNt
 {
   float Ng_x[N];
   float Ng_y[N];

+ 116 - 13
thirdparty/embree/include/embree4/rtcore_scene.h

@@ -6,7 +6,10 @@
 #include "rtcore_device.h"
 
 RTC_NAMESPACE_BEGIN
-  
+
+/* Opaque traversable type */
+typedef struct RTCTraversableTy* RTCTraversable;
+
 /* Forward declarations for ray structures */
 struct RTCRayHit;
 struct RTCRayHit4;
@@ -16,11 +19,12 @@ struct RTCRayHit16;
 /* Scene flags */
 enum RTCSceneFlags
 {
-  RTC_SCENE_FLAG_NONE                    = 0,
-  RTC_SCENE_FLAG_DYNAMIC                 = (1 << 0),
-  RTC_SCENE_FLAG_COMPACT                 = (1 << 1),
-  RTC_SCENE_FLAG_ROBUST                  = (1 << 2),
-  RTC_SCENE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS = (1 << 3)
+  RTC_SCENE_FLAG_NONE                         = 0,
+  RTC_SCENE_FLAG_DYNAMIC                      = (1 << 0),
+  RTC_SCENE_FLAG_COMPACT                      = (1 << 1),
+  RTC_SCENE_FLAG_ROBUST                       = (1 << 2),
+  RTC_SCENE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS = (1 << 3),
+  RTC_SCENE_FLAG_PREFETCH_USM_SHARED_ON_GPU   = (1 << 4),
 };
 
 /* Additional arguments for rtcIntersect1/4/8/16 calls */
@@ -91,6 +95,8 @@ RTC_API void rtcRetainScene(RTCScene scene);
 /* Releases the scene (decrements the reference count). */
 RTC_API void rtcReleaseScene(RTCScene scene);
 
+/* Returns the traversable object of the scene which can be passed to ray queries. */
+RTC_API RTCTraversable rtcGetSceneTraversable(RTCScene scene);
 
 /* Attaches the geometry to a scene. */
 RTC_API unsigned int rtcAttachGeometry(RTCScene scene, RTCGeometry geometry);
@@ -107,12 +113,6 @@ RTC_API RTCGeometry rtcGetGeometry(RTCScene scene, unsigned int geomID);
 /* Gets a geometry handle from the scene. This function is thread safe and should NOT get used during rendering. */
 RTC_API RTCGeometry rtcGetGeometryThreadSafe(RTCScene scene, unsigned int geomID);
 
-/* Gets the user-defined data pointer of the geometry. This function is not thread safe and should get used during rendering. */
-RTC_SYCL_API void* rtcGetGeometryUserDataFromScene(RTCScene scene, unsigned int geomID);
-
-/* Returns the interpolated transformation of an instance for the specified time. */
-RTC_SYCL_API void rtcGetGeometryTransformFromScene(RTCScene scene, unsigned int geomID, float time, enum RTCFormat format, void* xfm);
-
 
 /* Commits the scene. */
 RTC_API void rtcCommitScene(RTCScene scene);
@@ -142,6 +142,13 @@ RTC_API void rtcGetSceneBounds(RTCScene scene, struct RTCBounds* bounds_o);
 /* Returns the linear axis-aligned bounds of the scene. */
 RTC_API void rtcGetSceneLinearBounds(RTCScene scene, struct RTCLinearBounds* bounds_o);
 
+#if !defined(__SYCL_DEVICE_ONLY__)
+
+/* Gets the user-defined data pointer of the geometry. This function is not thread safe and should get used during rendering. */
+RTC_SYCL_API void* rtcGetGeometryUserDataFromScene(RTCScene scene, unsigned int geomID);
+
+/* Returns the interpolated transformation of an instance for the specified time. */
+RTC_SYCL_API void rtcGetGeometryTransformFromScene(RTCScene scene, unsigned int geomID, float time, enum RTCFormat format, void* xfm);
 
 /* Perform a closest point query of the scene. */
 RTC_API bool rtcPointQuery(RTCScene scene, struct RTCPointQuery* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void* userPtr);
@@ -231,6 +238,102 @@ RTC_API void rtcForwardOccluded16(const int* valid, const struct RTCOccludedFunc
 /* Forwards occlusion ray packet of size 16 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
 RTC_API void rtcForwardOccluded16Ex(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay16* ray, unsigned int instID, unsigned int instPrimID);
 
+#endif
+
+/* Gets the user-defined data pointer of the geometry. This function is not thread safe and should get used during rendering. */
+RTC_SYCL_API void* rtcGetGeometryUserDataFromTraversable(RTCTraversable traversable, unsigned int geomID);
+
+/* Returns the interpolated transformation of an instance for the specified time. */
+RTC_SYCL_API void rtcGetGeometryTransformFromTraversable(RTCTraversable traversable, unsigned int geomID, float time, enum RTCFormat format, void* xfm);
+
+/* Perform a closest point query of the scene. */
+RTC_API bool rtcTraversablePointQuery(RTCTraversable traversable, struct RTCPointQuery* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void* userPtr);
+
+/* Perform a closest point query with a packet of 4 points with the scene. */
+RTC_API bool rtcTraversablePointQuery4(const int* valid, RTCTraversable traversable, struct RTCPointQuery4* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr);
+
+/* Perform a closest point query with a packet of 4 points with the scene. */
+RTC_API bool rtcTraversablePointQuery8(const int* valid, RTCTraversable traversable, struct RTCPointQuery8* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr);
+
+/* Perform a closest point query with a packet of 4 points with the scene. */
+RTC_API bool rtcTraversablePointQuery16(const int* valid, RTCTraversable traversable, struct RTCPointQuery16* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr);
+
+
+/* Intersects a single ray with the scene. */
+RTC_SYCL_API void rtcTraversableIntersect1(RTCTraversable traversable, struct RTCRayHit* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT);
+
+/* Intersects a packet of 4 rays with the scene. */
+RTC_API void rtcTraversableIntersect4(const int* valid, RTCTraversable traversable, struct RTCRayHit4* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT);
+
+/* Intersects a packet of 8 rays with the scene. */
+RTC_API void rtcTraversableIntersect8(const int* valid, RTCTraversable traversable, struct RTCRayHit8* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT);
+
+/* Intersects a packet of 16 rays with the scene. */
+RTC_API void rtcTraversableIntersect16(const int* valid, RTCTraversable traversable, struct RTCRayHit16* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT);
+
+
+/* Forwards ray inside user geometry callback. */
+RTC_SYCL_API void rtcTraversableForwardIntersect1(const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay* ray, unsigned int instID);
+
+/* Forwards ray inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
+RTC_SYCL_API void rtcTraversableForwardIntersect1Ex(const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay* ray, unsigned int instID, unsigned int instPrimID);
+
+/* Forwards ray packet of size 4 inside user geometry callback. */
+RTC_API void rtcTraversableForwardIntersect4(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay4* ray, unsigned int instID);
+
+/* Forwards ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
+RTC_API void rtcTraversableForwardIntersect4Ex(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay4* ray, unsigned int instID, unsigned int primInstID);
+
+/* Forwards ray packet of size 8 inside user geometry callback. */
+RTC_API void rtcTraversableForwardIntersect8(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay8* ray, unsigned int instID);
+
+/* Forwards ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
+RTC_API void rtcTraversableForwardIntersect8Ex(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay8* ray, unsigned int instID, unsigned int primInstID);
+
+/* Forwards ray packet of size 16 inside user geometry callback. */
+RTC_API void rtcTraversableForwardIntersect16(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay16* ray, unsigned int instID);
+
+/* Forwards ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
+RTC_API void rtcTraversableForwardIntersect16Ex(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay16* ray, unsigned int instID, unsigned int primInstID);
+
+
+/* Tests a single ray for occlusion with the scene. */
+RTC_SYCL_API void rtcTraversableOccluded1(RTCTraversable traversable, struct RTCRay* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT);
+
+/* Tests a packet of 4 rays for occlusion occluded with the scene. */
+RTC_API void rtcTraversableOccluded4(const int* valid, RTCTraversable traversable, struct RTCRay4* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT);
+
+/* Tests a packet of 8 rays for occlusion with the scene. */
+RTC_API void rtcTraversableOccluded8(const int* valid, RTCTraversable traversable, struct RTCRay8* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT);
+
+/* Tests a packet of 16 rays for occlusion with the scene. */
+RTC_API void rtcTraversableOccluded16(const int* valid, RTCTraversable traversable, struct RTCRay16* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT);
+
+
+/* Forwards single occlusion ray inside user geometry callback. */
+RTC_SYCL_API void rtcTraversableForwardOccluded1(const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay* ray, unsigned int instID);
+
+/* Forwards single occlusion ray inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
+RTC_SYCL_API void rtcTraversableForwardOccluded1Ex(const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay* ray, unsigned int instID, unsigned int instPrimID);
+
+/* Forwards occlusion ray packet of size 4 inside user geometry callback. */
+RTC_API void rtcTraversableForwardOccluded4(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay4* ray, unsigned int instID);
+
+/* Forwards occlusion ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
+RTC_API void rtcTraversableForwardOccluded4Ex(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay4* ray, unsigned int instID, unsigned int instPrimID);
+
+/* Forwards occlusion ray packet of size 8 inside user geometry callback. */
+RTC_API void rtcTraversableForwardOccluded8(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay8* ray, unsigned int instID);
+
+/* Forwards occlusion ray packet of size 8 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
+RTC_API void rtcTraversableForwardOccluded8Ex(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay8* ray, unsigned int instID, unsigned int instPrimID);
+
+/* Forwards occlusion ray packet of size 16 inside user geometry callback. */
+RTC_API void rtcTraversableForwardOccluded16(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay16* ray, unsigned int instID);
+
+/* Forwards occlusion ray packet of size 16 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
+RTC_API void rtcTraversableForwardOccluded16Ex(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay16* ray, unsigned int instID, unsigned int instPrimID);
+
 
 /*! collision callback */
 struct RTCCollision { unsigned int geomID0; unsigned int primID0; unsigned int geomID1; unsigned int primID1; };
@@ -238,7 +341,7 @@ typedef void (*RTCCollideFunc) (void* userPtr, struct RTCCollision* collisions,
 
 /*! Performs collision detection of two scenes */
 RTC_API void rtcCollide (RTCScene scene0, RTCScene scene1, RTCCollideFunc callback, void* userPtr);
- 
+
 #if defined(__cplusplus)
 
 /* Helper for easily combining scene flags */

+ 1 - 2
thirdparty/embree/kernels/builders/bvh_builder_sah.h

@@ -48,10 +48,9 @@ namespace embree
         }
 
         Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold, size_t primrefarrayalloc = inf)
-        : branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize),
+        : branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(min(minLeafSize,maxLeafSize)), maxLeafSize(maxLeafSize),
           travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold), primrefarrayalloc(primrefarrayalloc)
         {
-          minLeafSize = min(minLeafSize,maxLeafSize);
         }
 
       public:

+ 3 - 5
thirdparty/embree/kernels/builders/heuristic_spatial_array.h

@@ -283,11 +283,9 @@ namespace embree
 
                 if (likely(splits <= 1)) continue; /* todo: does this ever happen ? */
 
-                //int bin0 = split.mapping.bin(prims0[i].lower)[split.dim];
-                //int bin1 = split.mapping.bin(prims0[i].upper)[split.dim];
-                //if (unlikely(bin0 < split.pos && bin1 >= split.pos))
-
-                if (unlikely(prims0[i].lower[split.dim] < fpos && prims0[i].upper[split.dim] > fpos))
+                const int bin0 = split.mapping.bin(prims0[i].lower)[split.dim];
+                const int bin1 = split.mapping.bin(prims0[i].upper)[split.dim];
+                if (unlikely(bin0 < split.pos && bin1 >= split.pos))
                 {
                   assert(splits > 1);
 

+ 3 - 3
thirdparty/embree/kernels/bvh/bvh_builder_sah_spatial.cpp

@@ -179,9 +179,9 @@ namespace embree
 
 #if defined(EMBREE_GEOMETRY_TRIANGLE)
 
-    Builder* BVH4Triangle4SceneBuilderFastSpatialSAH  (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
-    Builder* BVH4Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
-    Builder* BVH4Triangle4iSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4i,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
+    Builder* BVH4Triangle4SceneBuilderFastSpatialSAH  (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,scene->device->max_triangles_per_leaf,mode); }
+    Builder* BVH4Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,scene->device->max_triangles_per_leaf,mode); }
+    Builder* BVH4Triangle4iSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4i,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,scene->device->max_triangles_per_leaf,mode); }
 
 #if defined(__AVX__)
     Builder* BVH8Triangle4SceneBuilderFastSpatialSAH  (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }

+ 1 - 2
thirdparty/embree/kernels/bvh/bvh_statistics.cpp

@@ -150,8 +150,7 @@ namespace embree
       }
     }
     else {
-      //throw std::runtime_error("not supported node type in bvh_statistics");
-      abort();
+      abort(); //throw std::runtime_error("not supported node type in bvh_statistics");
     }
     return s;
   } 

+ 4 - 10
thirdparty/embree/kernels/common/alloc.h

@@ -189,11 +189,8 @@ namespace embree
       , atype(osAllocation ? EMBREE_OS_MALLOC : ALIGNED_MALLOC)
       , primrefarray(device,0)
     {
-      //if (osAllocation && useUSM)
-      //  throw std::runtime_error("USM allocation cannot be combined with OS allocation.");
-      if (osAllocation && useUSM) {
-        abort();
-      }
+      if (osAllocation && useUSM)
+        abort(); //throw std::runtime_error("USM allocation cannot be combined with OS allocation.");
 
       for (size_t i=0; i<MAX_THREAD_USED_BLOCK_SLOTS; i++)
       {
@@ -505,11 +502,8 @@ namespace embree
         Block* myUsedBlocks = threadUsedBlocks[slot];
         if (myUsedBlocks) {
           void* ptr = myUsedBlocks->malloc(device,bytes,align,partial);
-          //if (ptr == nullptr && !blockAllocation)
-          //  throw std::bad_alloc();
-          if (ptr == nullptr && !blockAllocation) {
-            abort();
-          }
+          if (ptr == nullptr && !blockAllocation)
+            abort(); //throw std::bad_alloc();
           if (ptr) return ptr;
         }
 

+ 220 - 79
thirdparty/embree/kernels/common/buffer.h

@@ -8,120 +8,232 @@
 
 namespace embree
 {
+  enum class BufferDataPointerType {
+    HOST = 0,
+    DEVICE = 1,
+    UNKNOWN = 2
+  };
+
   /*! Implements an API data buffer object. This class may or may not own the data. */
   class Buffer : public RefCount
   {
+  private:
+    char* alloc(void* ptr_in, bool &shared, EmbreeMemoryType memoryType)
+    {
+      if (ptr_in)
+      {
+        shared = true;
+        return (char*)ptr_in;
+      }
+      else
+      {
+        shared = false;
+        device->memoryMonitor(this->bytes(), false);
+        size_t b = (this->bytes()+15) & ssize_t(-16);
+        return (char*)device->malloc(b,16,memoryType);
+      }
+    }
+
   public:
-    /*! Buffer construction */
-    //Buffer() 
-    //: device(nullptr), ptr(nullptr), numBytes(0), shared(false) {}
+    Buffer(Device* device, size_t numBytes_in, void* ptr_in)
+      : device(device), numBytes(numBytes_in)
+    {
+      device->refInc();
 
-    /*! Buffer construction */
-    Buffer(Device* device, size_t numBytes_in, void* ptr_in = nullptr)
+      ptr = alloc(ptr_in, shared, EmbreeMemoryType::USM_SHARED);
+#if defined(EMBREE_SYCL_SUPPORT)
+      dshared = true;
+      dptr = ptr;
+      modified = true;
+#endif
+    }
+
+    Buffer(Device* device, size_t numBytes_in, void* ptr_in, void* dptr_in)
       : device(device), numBytes(numBytes_in)
     {
       device->refInc();
-      
-      if (ptr_in)
+
+#if defined(EMBREE_SYCL_SUPPORT)
+      modified = true;
+      if (device->is_gpu() && !device->has_unified_memory())
       {
-        shared = true;
-        ptr = (char*)ptr_in;
+        ptr  = alloc( ptr_in,  shared, EmbreeMemoryType::MALLOC);
+        dptr = alloc(dptr_in, dshared, EmbreeMemoryType::USM_DEVICE);
+      }
+      else if (device->is_gpu() && device->has_unified_memory())
+      {
+        ptr = alloc(ptr_in, shared, EmbreeMemoryType::USM_SHARED);
+
+        if (device->get_memory_type(ptr) != EmbreeMemoryType::USM_SHARED)
+        {
+          dptr = alloc(dptr_in, dshared, EmbreeMemoryType::USM_DEVICE);
+        }
+        else
+        {
+          dshared = true;
+          dptr = ptr;
+        }
       }
       else
+#endif
       {
-        shared = false;
-        alloc();
+        ptr = alloc(ptr_in, shared, EmbreeMemoryType::MALLOC);
+#if defined(EMBREE_SYCL_SUPPORT)
+        dshared = true;
+        dptr = ptr;
+#endif
       }
     }
-    
+
     /*! Buffer destruction */
-    ~Buffer() {
+    virtual ~Buffer() {
       free();
       device->refDec();
     }
-    
+
     /*! this class is not copyable */
   private:
     Buffer(const Buffer& other) DELETED; // do not implement
     Buffer& operator =(const Buffer& other) DELETED; // do not implement
-    
+
   public:
-    /* inits and allocates the buffer */
-    void create(Device* device_in, size_t numBytes_in)
-    {
-      init(device_in, numBytes_in);
-      alloc();
-    }
-    
-    /* inits the buffer */
-    void init(Device* device_in, size_t numBytes_in)
-    {
-      free();
-      device = device_in;
-      ptr = nullptr;
-      numBytes = numBytes_in;
-      shared = false;
-    }
 
-    /*! sets shared buffer */
-    void set(Device* device_in, void* ptr_in, size_t numBytes_in)
-    {
-      free();
-      device = device_in;
-      ptr = (char*)ptr_in;
-      if (numBytes_in != (size_t)-1)
-        numBytes = numBytes_in;
-      shared = true;
-    }
-    
-    /*! allocated buffer */
-    void alloc()
-    {
-      device->memoryMonitor(this->bytes(), false);
-      size_t b = (this->bytes()+15) & ssize_t(-16);
-      ptr = (char*)device->malloc(b,16);
-    }
-    
     /*! frees the buffer */
-    void free()
+    virtual void free()
     {
-      if (shared) return;
-      device->free(ptr); 
-      device->memoryMonitor(-ssize_t(this->bytes()), true);
-      ptr = nullptr;
+      if (!shared && ptr) {
+#if defined(EMBREE_SYCL_SUPPORT)
+        if (dptr == ptr) {
+          dptr = nullptr;
+        }
+#endif
+        device->free(ptr);
+        device->memoryMonitor(-ssize_t(this->bytes()), true);
+        ptr = nullptr;
+      }
+#if defined(EMBREE_SYCL_SUPPORT)
+      if (!dshared && dptr) {
+        device->free(dptr);
+        device->memoryMonitor(-ssize_t(this->bytes()), true);
+        dptr = nullptr;
+      }
+#endif
     }
-    
+
     /*! gets buffer pointer */
     void* data()
     {
       /* report error if buffer is not existing */
       if (!device)
         throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer specified");
-      
+
+      /* return buffer */
+      return ptr;
+    }
+
+    /*! gets buffer pointer */
+    void* dataDevice()
+    {
+      /* report error if buffer is not existing */
+      if (!device)
+        throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer specified");
+
       /* return buffer */
+#if defined(EMBREE_SYCL_SUPPORT)
+      return dptr;
+#else
       return ptr;
+#endif
     }
 
     /*! returns pointer to first element */
-    __forceinline char* getPtr() const {
+    __forceinline char* getPtr(BufferDataPointerType type) const
+    {
+      if (type == BufferDataPointerType::HOST) return getHostPtr();
+      else if (type == BufferDataPointerType::DEVICE) return getDevicePtr();
+
+      throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer data pointer type specified");
+      return nullptr;
+    }
+
+    /*! returns pointer to first element */
+    __forceinline virtual char* getHostPtr() const {
       return ptr;
     }
 
+    /*! returns pointer to first element */
+    __forceinline virtual char* getDevicePtr() const {
+#if defined(EMBREE_SYCL_SUPPORT)
+      return dptr;
+#else
+      return ptr;
+#endif
+    }
+
     /*! returns the number of bytes of the buffer */
-    __forceinline size_t bytes() const { 
+    __forceinline size_t bytes() const {
       return numBytes;
     }
-    
+
     /*! returns true of the buffer is not empty */
-    __forceinline operator bool() const { 
-      return ptr; 
+    __forceinline operator bool() const {
+      return ptr;
+    }
+
+    __forceinline void commit() {
+#if defined(EMBREE_SYCL_SUPPORT)
+      DeviceGPU* gpu_device = dynamic_cast<DeviceGPU*>(device);
+      if (gpu_device) {
+        sycl::queue queue(gpu_device->getGPUDevice());
+        commit(queue);
+        queue.wait_and_throw();
+      }
+      modified = false;
+#endif
+    }
+
+#if defined(EMBREE_SYCL_SUPPORT)
+    __forceinline sycl::event commit(sycl::queue queue) {
+      if (dptr == ptr)
+        return sycl::event();
+
+      modified = false;
+      return queue.memcpy(dptr, ptr, numBytes);
+    }
+#endif
+
+    __forceinline bool needsCommit() const {
+#if defined(EMBREE_SYCL_SUPPORT)
+     return (dptr == ptr) ? false : modified;
+#else
+      return false;
+#endif
+    }
+
+    __forceinline void setNeedsCommit(bool isModified = true) {
+#if defined(EMBREE_SYCL_SUPPORT)
+      modified = isModified;
+#endif
+    }
+
+    __forceinline void commitIfNeeded() {
+      if (needsCommit()) {
+        commit();
+      }
     }
 
   public:
-    Device* device;  //!< device to report memory usage to
-    char* ptr;       //!< pointer to buffer data
-    size_t numBytes; //!< number of bytes in the buffer
-    bool shared;     //!< set if memory is shared with application
+    Device* device;      //!< device to report memory usage to
+    size_t numBytes;     //!< number of bytes in the buffer
+    char* ptr;           //!< pointer to buffer data
+#if defined(EMBREE_SYCL_SUPPORT)
+    char* dptr;          //!< pointer to buffer data on device
+#endif
+    bool shared;         //!< set if memory is shared with application
+#if defined(EMBREE_SYCL_SUPPORT)
+    bool dshared;        //!< set if device memory is shared with application
+    bool modified;       //!< to be set when host memory has been modified and dev needs update
+#endif
   };
 
   /*! An untyped contiguous range of a buffer. This class does not own the buffer content. */
@@ -130,7 +242,7 @@ namespace embree
   public:
     /*! Buffer construction */
     RawBufferView()
-      : ptr_ofs(nullptr), stride(0), num(0), format(RTC_FORMAT_UNDEFINED), modCounter(1), modified(true), userData(0) {}
+      : ptr_ofs(nullptr), dptr_ofs(nullptr), stride(0), num(0), format(RTC_FORMAT_UNDEFINED), modCounter(1), modified(true), userData(0) {}
 
   public:
     /*! sets the buffer view */
@@ -139,7 +251,8 @@ namespace embree
       if ((offset_in + stride_in * num_in) > (stride_in * buffer_in->numBytes))
         throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "buffer range out of bounds");
 
-      ptr_ofs = buffer_in->ptr + offset_in;
+      ptr_ofs = buffer_in->getHostPtr() + offset_in;
+      dptr_ofs = buffer_in->getDevicePtr() + offset_in;
       stride = stride_in;
       num = num_in;
       format = format_in;
@@ -148,28 +261,48 @@ namespace embree
       buffer = buffer_in;
     }
 
+    /*! returns pointer to the i'th element */
+    __forceinline char* getPtr(BufferDataPointerType pointerType) const
+    {
+      if (pointerType == BufferDataPointerType::HOST)
+        return ptr_ofs;
+      else if (pointerType == BufferDataPointerType::DEVICE)
+        return dptr_ofs;
+
+      throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer data pointer type specified");
+      return nullptr;
+    }
+
     /*! returns pointer to the first element */
     __forceinline char* getPtr() const {
-      return ptr_ofs;
+      #if defined(__SYCL_DEVICE_ONLY__)
+        return dptr_ofs;
+      #else
+        return ptr_ofs;
+      #endif
     }
 
     /*! returns pointer to the i'th element */
     __forceinline char* getPtr(size_t i) const
     {
-      assert(i<num);
-      return ptr_ofs + i*stride;
+      #if defined(__SYCL_DEVICE_ONLY__)
+        assert(i<num);
+        return dptr_ofs + i*stride;
+      #else
+        return ptr_ofs + i*stride;
+      #endif
     }
 
     /*! returns the number of elements of the buffer */
-    __forceinline size_t size() const { 
-      return num; 
+    __forceinline size_t size() const {
+      return num;
     }
 
     /*! returns the number of bytes of the buffer */
-    __forceinline size_t bytes() const { 
-      return num*stride; 
+    __forceinline size_t bytes() const {
+      return num*stride;
     }
-    
+
     /*! returns the buffer stride */
     __forceinline unsigned getStride() const
     {
@@ -186,6 +319,7 @@ namespace embree
     __forceinline void setModified() {
       modCounter++;
       modified = true;
+      if (buffer) buffer->setNeedsCommit();
     }
 
     /*! mark buffer as modified or unmodified */
@@ -205,7 +339,7 @@ namespace embree
 
     /*! returns true of the buffer is not empty */
     __forceinline operator bool() const { 
-      return ptr_ofs; 
+      return ptr_ofs;
     }
 
     /*! checks padding to 16 byte check, fails hard */
@@ -217,6 +351,7 @@ namespace embree
 
   public:
     char* ptr_ofs;      //!< base pointer plus offset
+    char* dptr_ofs;     //!< base pointer plus offset in device memory
     size_t stride;      //!< stride of the buffer in bytes
     size_t num;         //!< number of elements in the buffer
     RTCFormat format;   //!< format of the buffer
@@ -233,9 +368,15 @@ namespace embree
   public:
     typedef T value_type;
 
+#if defined(__SYCL_DEVICE_ONLY__)
+    /*! access to the ith element of the buffer */
+    __forceinline       T& operator [](size_t i)       { assert(i<num); return *(T*)(dptr_ofs + i*stride); }
+    __forceinline const T& operator [](size_t i) const { assert(i<num); return *(T*)(dptr_ofs + i*stride); }
+#else
     /*! access to the ith element of the buffer */
     __forceinline       T& operator [](size_t i)       { assert(i<num); return *(T*)(ptr_ofs + i*stride); }
     __forceinline const T& operator [](size_t i) const { assert(i<num); return *(T*)(ptr_ofs + i*stride); }
+#endif
   };
 
   template<>
@@ -250,14 +391,14 @@ namespace embree
     __forceinline const Vec3fa operator [](size_t i) const
     {
       assert(i<num);
-      return Vec3fa::loadu(ptr_ofs + i*stride);
+      return Vec3fa::loadu(dptr_ofs + i*stride);
     }
     
     /*! writes the i'th element */
     __forceinline void store(size_t i, const Vec3fa& v)
     {
       assert(i<num);
-      Vec3fa::storeu(ptr_ofs + i*stride, v);
+      Vec3fa::storeu(dptr_ofs + i*stride, v);
     }
     
 #else

+ 99 - 45
thirdparty/embree/kernels/common/device.cpp

@@ -229,6 +229,7 @@ namespace embree
 #endif
     std::cout << std::endl;
 
+#if defined(__X86_64__)
     /* check of FTZ and DAZ flags are set in CSR */
     if (!hasFTZ || !hasDAZ) 
     {
@@ -252,57 +253,68 @@ namespace embree
         std::cout << std::endl;
       }
     }
+#endif
     std::cout << std::endl;
   }
 
-  void Device::setDeviceErrorCode(RTCError error)
+  void Device::setDeviceErrorCode(RTCError error, std::string const& msg)
   {
-    RTCError* stored_error = errorHandler.error();
-    if (*stored_error == RTC_ERROR_NONE)
-      *stored_error = error;
+    RTCErrorMessage* stored_error = errorHandler.error();
+    if (stored_error->error == RTC_ERROR_NONE) {
+      stored_error->error = error;
+      if (msg != "")
+        stored_error->msg = msg;
+    }
   }
 
   RTCError Device::getDeviceErrorCode()
   {
-    RTCError* stored_error = errorHandler.error();
-    RTCError error = *stored_error;
-    *stored_error = RTC_ERROR_NONE;
-    return error;
+    RTCErrorMessage* stored_error = errorHandler.error();
+    RTCErrorMessage error = *stored_error;
+    stored_error->error = RTC_ERROR_NONE;
+    return error.error;
   }
 
-  void Device::setThreadErrorCode(RTCError error)
+  const char* Device::getDeviceLastErrorMessage()
   {
-    RTCError* stored_error = g_errorHandler.error();
-    if (*stored_error == RTC_ERROR_NONE)
-      *stored_error = error;
+    RTCErrorMessage* stored_error = errorHandler.error();
+    return stored_error->msg.c_str();
+  }
+
+  void Device::setThreadErrorCode(RTCError error, std::string const& msg)
+  {
+    RTCErrorMessage* stored_error = g_errorHandler.error();
+    if (stored_error->error == RTC_ERROR_NONE) {
+      stored_error->error = error;
+      if (msg != "")
+        stored_error->msg = msg;
+    }
   }
 
   RTCError Device::getThreadErrorCode()
   {
-    RTCError* stored_error = g_errorHandler.error();
-    RTCError error = *stored_error;
-    *stored_error = RTC_ERROR_NONE;
-    return error;
+    RTCErrorMessage* stored_error = g_errorHandler.error();
+    RTCErrorMessage error = *stored_error;
+    stored_error->error = RTC_ERROR_NONE;
+    return error.error;
+  }
+
+  const char* Device::getThreadLastErrorMessage()
+  {
+    RTCErrorMessage* stored_error = g_errorHandler.error();
+    return stored_error->msg.c_str();
   }
 
   void Device::process_error(Device* device, RTCError error, const char* str)
-  { 
+  {
     /* store global error code when device construction failed */
     if (!device)
-      return setThreadErrorCode(error);
+      return setThreadErrorCode(error, str ? std::string(str) : std::string());
 
     /* print error when in verbose mode */
-    if (device->verbosity(1)) 
+    if (device->verbosity(1))
     {
-      switch (error) {
-      case RTC_ERROR_NONE         : std::cerr << "Embree: No error"; break;
-      case RTC_ERROR_UNKNOWN    : std::cerr << "Embree: Unknown error"; break;
-      case RTC_ERROR_INVALID_ARGUMENT : std::cerr << "Embree: Invalid argument"; break;
-      case RTC_ERROR_INVALID_OPERATION: std::cerr << "Embree: Invalid operation"; break;
-      case RTC_ERROR_OUT_OF_MEMORY    : std::cerr << "Embree: Out of memory"; break;
-      case RTC_ERROR_UNSUPPORTED_CPU  : std::cerr << "Embree: Unsupported CPU"; break;
-      default                   : std::cerr << "Embree: Invalid error code"; break;                   
-      };
+      std::cerr << "Embree: " << getErrorString(error);
       if (str) std::cerr << ", (" << str << ")";
       std::cerr << std::endl;
     }
@@ -312,7 +324,7 @@ namespace embree
       device->error_function(device->error_function_userptr,error,str); 
 
     /* record error code */
-    device->setDeviceErrorCode(error);
+    device->setDeviceErrorCode(error, str ? std::string(str) : std::string());
   }
 
   void Device::memoryMonitor(ssize_t bytes, bool post)
@@ -570,6 +582,22 @@ namespace embree
     case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 0;
 #endif
 
+#if defined(EMBREE_SYCL_SUPPORT)
+    case RTC_DEVICE_PROPERTY_CPU_DEVICE:  {
+      if (!dynamic_cast<DeviceGPU*>(this))
+        return 1;
+      return 0;
+    };
+    case RTC_DEVICE_PROPERTY_SYCL_DEVICE: {
+      if (!dynamic_cast<DeviceGPU*>(this))
+        return 0;
+      return 1;
+    };
+#else
+    case RTC_DEVICE_PROPERTY_CPU_DEVICE:  return 1;
+    case RTC_DEVICE_PROPERTY_SYCL_DEVICE: return 0;
+#endif
+
     default: throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown readable property"); break;
     };
   }
@@ -578,10 +606,31 @@ namespace embree
     return alignedMalloc(size,align);
   }
 
+  void* Device::malloc(size_t size, size_t align, EmbreeMemoryType type) {
+    return alignedMalloc(size,align);
+  }
+
   void Device::free(void* ptr) {
     alignedFree(ptr);
   }
 
+  const std::vector<std::string> Device::error_strings = {
+    "No Error",
+    "Unknown error",
+    "Invalid argument",
+    "Invalid operation",
+    "Out of Memory",
+    "Unsupported CPU",
+    "Build cancelled",
+    "Level Zero raytracing support missing"
+  };
+
+  const char* Device::getErrorString(RTCError error) {
+    if (error >= 0 && error < error_strings.size()) {
+      return error_strings.at(error).c_str();
+    }
+    return "Invalid error code";
+  }
 
 #if defined(EMBREE_SYCL_SUPPORT)
 
@@ -613,7 +662,6 @@ namespace embree
     if (result != ZE_RESULT_SUCCESS)
       throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
 
-#if defined(EMBREE_SYCL_L0_RTAS_BUILDER)
     bool ze_rtas_builder = false;
     for (uint32_t i=0; i<extensions.size(); i++)
     {
@@ -621,23 +669,18 @@ namespace embree
         ze_rtas_builder = true;
     }
     if (!ze_rtas_builder)
-      throw_RTCError(RTC_ERROR_UNKNOWN, "ZE_experimental_rtas_builder extension not found");
+      throw_RTCError(RTC_ERROR_LEVEL_ZERO_RAYTRACING_SUPPORT_MISSING, "ZE_experimental_rtas_builder extension not found. Please install a recent driver. On Linux, make sure that the package intel-level-zero-gpu-raytracing is installed");
 
-    result = ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::LEVEL_ZERO);
-    if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
-      throw_RTCError(RTC_ERROR_UNKNOWN, "cannot load ZE_experimental_rtas_builder extension");
+    result = ZeWrapper::initRTASBuilder(hDriver);
+    if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE) {
+      throw_RTCError(RTC_ERROR_LEVEL_ZERO_RAYTRACING_SUPPORT_MISSING, "cannot load ZE_experimental_rtas_builder extension. Please install a recent driver. On Linux, make sure that the package intel-level-zero-gpu-raytracing is installed");
+    }
     if (result != ZE_RESULT_SUCCESS)
       throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZE_experimental_rtas_builder extension");
-#else
-    ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::INTERNAL);
-#endif
 
     if (State::verbosity(1))
     {
-      if (ZeWrapper::rtas_builder == ZeWrapper::INTERNAL)
-        std::cout << "  Internal RTAS Builder" << std::endl;
-      else
-        std::cout << "  Level Zero RTAS Builder" << std::endl;
+      std::cout << "  Level Zero RTAS Builder" << std::endl;
     }
 
     /* check if extension library can get loaded */
@@ -670,15 +713,17 @@ namespace embree
   }
 
   void DeviceGPU::enter() {
-    enableUSMAllocEmbree(&gpu_context,&gpu_device);
   }
 
   void DeviceGPU::leave() {
-    disableUSMAllocEmbree();
   }
 
   void* DeviceGPU::malloc(size_t size, size_t align) {
-    return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EMBREE_USM_SHARED_DEVICE_READ_ONLY);
+    return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EmbreeUSMMode::DEVICE_READ_ONLY);
+  }
+
+  void* DeviceGPU::malloc(size_t size, size_t align, EmbreeMemoryType type) {
+    return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EmbreeUSMMode::DEVICE_READ_ONLY,type);
   }
 
   void DeviceGPU::free(void* ptr) {
@@ -688,7 +733,16 @@ namespace embree
   void DeviceGPU::setSYCLDevice(const sycl::device sycl_device_in) {
     gpu_device = sycl_device_in;
   }
-  
+
+  // turn off deprecation warning for host_unified_memory property usage.
+  // there is currently no equivalent SYCL aspect that replaces this property.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+  bool DeviceGPU::has_unified_memory() const {
+    return gpu_device.get_info<sycl::info::device::host_unified_memory>();
+  }
+#pragma GCC diagnostic pop
+
 #endif
 
   DeviceEnterLeave::DeviceEnterLeave (RTCDevice hdevice)

+ 48 - 4
thirdparty/embree/kernels/common/device.h

@@ -38,7 +38,7 @@ namespace embree
       
       __forceinline pointer allocate( size_type n ) {
         assert(device);
-        return (pointer) device->malloc(n*sizeof(T),alignment);
+        return (pointer) device->malloc(n*sizeof(T),alignment,EmbreeMemoryType::MALLOC);
       }
       
       __forceinline void deallocate( pointer p, size_type n ) {
@@ -75,17 +75,27 @@ namespace embree
     void print();
 
     /*! sets the error code */
-    void setDeviceErrorCode(RTCError error);
+    void setDeviceErrorCode(RTCError error, std::string const& msg = "");
 
     /*! returns and clears the error code */
     RTCError getDeviceErrorCode();
 
+    /*! Returns the string representation for the error code. For example, for RTC_ERROR_UNKNOWN the string "RTC_ERROR_UNKNOWN" will be returned. */
+    static char* getDeviceErrorString();
+
+    /*! returns the last error message */
+    const char* getDeviceLastErrorMessage();
+
     /*! sets the error code */
-    static void setThreadErrorCode(RTCError error);
+    static void setThreadErrorCode(RTCError error, std::string const& msg = "");
 
     /*! returns and clears the error code */
     static RTCError getThreadErrorCode();
 
+
+    /*! returns the last error message */
+    static const char* getThreadLastErrorMessage();
+
     /*! processes error codes, do not call directly */
     static void process_error(Device* device, RTCError error, const char* str);
 
@@ -107,12 +117,23 @@ namespace embree
     /*! leave device by setting up some global state */
     virtual void leave() {}
 
-    /*! buffer allocation */
+    /*! buffer allocation - using USM shared */
     virtual void* malloc(size_t size, size_t align);
 
+    /*! buffer allocation */
+    virtual void* malloc(size_t size, size_t align, EmbreeMemoryType type);
+
     /*! buffer deallocation */
     virtual void free(void* ptr);
 
+    /*! returns true if device is of type DeviceGPU */
+    virtual bool is_gpu() const { return false; }
+
+    /*! returns true if device and host have shared memory system (e.g., integrated GPU) */
+    virtual bool has_unified_memory() const { return true; }
+
+    virtual EmbreeMemoryType get_memory_type(void* ptr) const { return EmbreeMemoryType::MALLOC; }
+
   private:
 
     /*! initializes the tasking system */
@@ -140,6 +161,13 @@ namespace embree
 #if defined(EMBREE_TARGET_SIMD8)
     std::unique_ptr<BVH8Factory> bvh8_factory;
 #endif
+
+  private:
+    static const std::vector<std::string> error_strings;
+
+  public:
+    static const char* getErrorString(RTCError error);
+
   };
 
 #if defined(EMBREE_SYCL_SUPPORT)
@@ -154,11 +182,27 @@ namespace embree
     virtual void enter() override;
     virtual void leave() override;
     virtual void* malloc(size_t size, size_t align) override;
+    virtual void* malloc(size_t size, size_t align, EmbreeMemoryType type) override;
     virtual void free(void* ptr) override;
 
     /* set SYCL device */
     void setSYCLDevice(const sycl::device sycl_device);
 
+    /*! returns true if device is of type DeviceGPU */
+    virtual bool is_gpu() const override { return true; }
+
+    /*! returns true if device and host have shared memory system (e.g., integrated GPU) */
+    virtual bool has_unified_memory() const override;
+
+    virtual EmbreeMemoryType get_memory_type(void* ptr) const override {
+      switch(sycl::get_pointer_type(ptr, gpu_context)) {
+        case sycl::usm::alloc::host: return EmbreeMemoryType::USM_HOST;
+        case sycl::usm::alloc::device: return EmbreeMemoryType::USM_DEVICE;
+        case sycl::usm::alloc::shared: return EmbreeMemoryType::USM_SHARED;
+        default: return EmbreeMemoryType::MALLOC;
+      }
+    }
+
   private:
     sycl::context gpu_context;
     sycl::device  gpu_device;

+ 1 - 1
thirdparty/embree/kernels/common/geometry.cpp

@@ -116,7 +116,7 @@ namespace embree
   {
   }
 
-  void Geometry::enable () 
+  void Geometry::enable ()
   {
     if (isEnabled()) 
       return;

+ 14 - 5
thirdparty/embree/kernels/common/geometry.h

@@ -126,10 +126,8 @@ namespace embree
   };
 
   /*! Base class all geometries are derived from */
-  class Geometry : public RefCount
+  class __aligned(16) Geometry : public RefCount
   {
-    ALIGNED_CLASS_USM_(16);
-    
     friend class Scene;
   public:
 
@@ -372,7 +370,7 @@ namespace embree
 
     /*! called before every build */
     virtual void preCommit();
-  
+
     /*! called after every build */
     virtual void postCommit();
 
@@ -469,7 +467,7 @@ namespace embree
     }
 
     /*! Gets specified buffer. */
-    virtual void* getBuffer(RTCBufferType type, unsigned int slot) {
+    virtual void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType) {
       throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
     }
 
@@ -543,6 +541,17 @@ namespace embree
       return numTimeSteps-1;
     }
 
+  public:
+
+    /*! methods for converting host geometry data to device geometry data */
+    virtual size_t getGeometryDataDeviceByteSize() const {
+      throw_RTCError(RTC_ERROR_INVALID_OPERATION,"getGeometryDataDeviceByteSize not implemented for this geometry");
+    }
+
+    virtual void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const {
+      throw_RTCError(RTC_ERROR_INVALID_OPERATION,"convertToDeviceRepresentation not implemented for this geometry");
+    }
+
   public:
 
     virtual PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const {

+ 1 - 1
thirdparty/embree/kernels/common/ray.h

@@ -143,7 +143,7 @@ namespace embree
       vbool<K> valid = valid0 & geomID != vuint<K>(RTC_INVALID_GEOMETRY_ID);
       const vbool<K> vt = (abs(tfar) <= vfloat<K>(FLT_LARGE)) | (tfar == vfloat<K>(neg_inf));
       const vbool<K> vu = (abs(u) <= vfloat<K>(FLT_LARGE));
-      const vbool<K> vv = (abs(u) <= vfloat<K>(FLT_LARGE));
+      const vbool<K> vv = (abs(v) <= vfloat<K>(FLT_LARGE));
       const vbool<K> vnx = abs(Ng.x) <= vfloat<K>(FLT_LARGE);
       const vbool<K> vny = abs(Ng.y) <= vfloat<K>(FLT_LARGE);
       const vbool<K> vnz = abs(Ng.z) <= vfloat<K>(FLT_LARGE);

+ 383 - 31
thirdparty/embree/kernels/common/rtcore.cpp

@@ -46,19 +46,23 @@ RTC_NAMESPACE_BEGIN;
 
   RTC_API bool rtcIsSYCLDeviceSupported(const sycl::device device)
   {
-    RTC_CATCH_BEGIN;
-    RTC_TRACE(rtcIsSYCLDeviceSupported);
-    return rthwifIsSYCLDeviceSupported(device) > 0;
-    RTC_CATCH_END(nullptr);
+    try {
+      RTC_TRACE(rtcIsSYCLDeviceSupported);
+      return rthwifIsSYCLDeviceSupported(device) > 0;
+    } catch (...) {
+      return false;
+    }
     return false;
   }
 
   RTC_API int rtcSYCLDeviceSelector(const sycl::device device)
   {
-    RTC_CATCH_BEGIN;
-    RTC_TRACE(rtcSYCLDeviceSelector);
-    return rthwifIsSYCLDeviceSupported(device);
-    RTC_CATCH_END(nullptr);
+    try {
+      RTC_TRACE(rtcSYCLDeviceSelector);
+      return rthwifIsSYCLDeviceSupported(device);
+    } catch (...) {
+      return -1;
+    }
     return -1;
   }
 
@@ -79,6 +83,29 @@ RTC_NAMESPACE_BEGIN;
     RTC_CATCH_END(nullptr);
   }
 
+  RTC_API_CPP sycl::event rtcCommitSceneWithQueue (RTCScene hscene, sycl::queue queue)
+  {
+    Scene* scene = (Scene*) hscene;
+    RTC_CATCH_BEGIN;
+    RTC_TRACE(rtcCommitSceneWithQueue);
+    RTC_VERIFY_HANDLE(hscene);
+    RTC_ENTER_DEVICE(hscene);
+    return scene->commit(false, queue);
+    RTC_CATCH_END2(scene);
+    return sycl::event();
+  }
+
+  RTC_API_CPP sycl::event rtcCommitBufferWithQueue(RTCBuffer hbuffer, sycl::queue queue) {
+    Buffer* buffer = (Buffer*)hbuffer;
+    RTC_CATCH_BEGIN;
+    RTC_TRACE(rtcCommitBufferWithQueue);
+    RTC_VERIFY_HANDLE(hbuffer);
+    RTC_ENTER_DEVICE(hbuffer);
+    return buffer->commit(queue);
+    RTC_CATCH_END2(buffer);
+    return sycl::event();
+  }
+
 #endif
 
   RTC_API void rtcRetainDevice(RTCDevice hdevice) 
@@ -138,6 +165,17 @@ RTC_NAMESPACE_BEGIN;
     return RTC_ERROR_UNKNOWN;
   }
 
+  RTC_API const char* rtcGetDeviceLastErrorMessage(RTCDevice hdevice)
+  {
+    Device* device = (Device*) hdevice;
+    RTC_CATCH_BEGIN;
+    RTC_TRACE(rtcGetDeviceLastErrorMessage);
+    if (device == nullptr) return Device::getThreadLastErrorMessage();
+    else                   return device->getDeviceLastErrorMessage();
+    RTC_CATCH_END(device);
+    return "";
+  }
+
   RTC_API void rtcSetDeviceErrorFunction(RTCDevice hdevice, RTCErrorFunction error, void* userPtr)
   {
     Device* device = (Device*) hdevice;
@@ -163,7 +201,19 @@ RTC_NAMESPACE_BEGIN;
     RTC_TRACE(rtcNewBuffer);
     RTC_VERIFY_HANDLE(hdevice);
     RTC_ENTER_DEVICE(hdevice);
-    Buffer* buffer = new Buffer((Device*)hdevice, byteSize);
+    Buffer* buffer = new Buffer((Device*)hdevice, byteSize, nullptr);
+    return (RTCBuffer)buffer->refInc();
+    RTC_CATCH_END((Device*)hdevice);
+    return nullptr;
+  }
+
+  RTC_API RTCBuffer rtcNewBufferHostDevice(RTCDevice hdevice, size_t byteSize)
+  {
+    RTC_CATCH_BEGIN;
+    RTC_TRACE(rtcNewBufferHostDevice);
+    RTC_VERIFY_HANDLE(hdevice);
+    RTC_ENTER_DEVICE(hdevice);
+    Buffer* buffer = new Buffer((Device*)hdevice, byteSize, nullptr, nullptr);
     return (RTCBuffer)buffer->refInc();
     RTC_CATCH_END((Device*)hdevice);
     return nullptr;
@@ -181,6 +231,30 @@ RTC_NAMESPACE_BEGIN;
     return nullptr;
   }
 
+  RTC_API RTCBuffer rtcNewSharedBufferHostDevice(RTCDevice hdevice, void* ptr, size_t byteSize)
+  {
+    RTC_CATCH_BEGIN;
+    RTC_TRACE(rtcNewSharedBufferHostDevice);
+    RTC_VERIFY_HANDLE(hdevice);
+    RTC_ENTER_DEVICE(hdevice);
+    Buffer* buffer = new Buffer((Device*)hdevice, byteSize, ptr, nullptr);
+    return (RTCBuffer)buffer->refInc();
+    RTC_CATCH_END((Device*)hdevice);
+    return nullptr;
+  }
+
+  RTC_API void* rtcGetBufferDataDevice(RTCBuffer hbuffer)
+  {
+    Buffer* buffer = (Buffer*)hbuffer;
+    RTC_CATCH_BEGIN;
+    RTC_TRACE(rtcGetBufferDataDevice);
+    RTC_VERIFY_HANDLE(hbuffer);
+    RTC_ENTER_DEVICE(hbuffer);
+    return buffer->dataDevice();
+    RTC_CATCH_END2(buffer);
+    return nullptr;
+  }
+
   RTC_API void* rtcGetBufferData(RTCBuffer hbuffer)
   {
     Buffer* buffer = (Buffer*)hbuffer;
@@ -215,6 +289,16 @@ RTC_NAMESPACE_BEGIN;
     RTC_CATCH_END2(buffer);
   }
 
+  RTC_API void rtcCommitBuffer(RTCBuffer hbuffer) {
+    Buffer* buffer = (Buffer*)hbuffer;
+    RTC_CATCH_BEGIN;
+    RTC_TRACE(rtcCommitBuffer);
+    RTC_VERIFY_HANDLE(hbuffer);
+    RTC_ENTER_DEVICE(hbuffer);
+    buffer->commit();
+    RTC_CATCH_END2(buffer);
+  }
+
   RTC_API RTCScene rtcNewScene (RTCDevice hdevice) 
   {
     RTC_CATCH_BEGIN;
@@ -238,6 +322,20 @@ RTC_NAMESPACE_BEGIN;
     return (RTCDevice)nullptr;
   }
 
+  RTC_API RTCTraversable rtcGetSceneTraversable(RTCScene hscene)
+  {
+    Scene* scene = (Scene*) hscene;
+    RTC_CATCH_BEGIN;
+    RTC_TRACE(rtcGetSceneTraversable);
+    RTC_VERIFY_HANDLE(hscene);
+    RTCTraversable traversable = (RTCTraversable)scene->getTraversable();
+    if (!traversable)
+      throw_RTCError(RTC_ERROR_INVALID_OPERATION,"Traversable is NULL. The scene has to be committed first.");
+    return traversable;
+    RTC_CATCH_END2(scene);
+    return (RTCTraversable)nullptr;
+  }
+
   RTC_API void rtcSetSceneProgressMonitorFunction(RTCScene hscene, RTCProgressMonitorFunction progress, void* ptr) 
   {
     Scene* scene = (Scene*) hscene;
@@ -257,15 +355,10 @@ RTC_NAMESPACE_BEGIN;
     RTC_TRACE(rtcSetSceneBuildQuality);
     RTC_VERIFY_HANDLE(hscene);
     RTC_ENTER_DEVICE(hscene);
-    //if (quality != RTC_BUILD_QUALITY_LOW &&
-    //    quality != RTC_BUILD_QUALITY_MEDIUM &&
-    //    quality != RTC_BUILD_QUALITY_HIGH)
-    //  throw std::runtime_error("invalid build quality");
     if (quality != RTC_BUILD_QUALITY_LOW &&
         quality != RTC_BUILD_QUALITY_MEDIUM &&
-        quality != RTC_BUILD_QUALITY_HIGH) {
-      abort();
-    }
+        quality != RTC_BUILD_QUALITY_HIGH)
+      abort(); //throw std::runtime_error("invalid build quality");
     scene->setBuildQuality(quality);
     RTC_CATCH_END2(scene);
   }
@@ -287,20 +380,28 @@ RTC_NAMESPACE_BEGIN;
     RTC_CATCH_BEGIN;
     RTC_TRACE(rtcGetSceneFlags);
     RTC_VERIFY_HANDLE(hscene);
-    RTC_ENTER_DEVICE(hscene);
+    //RTC_ENTER_DEVICE(hscene);
     return scene->getSceneFlags();
     RTC_CATCH_END2(scene);
     return RTC_SCENE_FLAG_NONE;
   }
-  
-  RTC_API void rtcCommitScene (RTCScene hscene) 
+
+  RTC_API_EXTERN_C bool prefetchUSMSharedOnGPU(RTCScene scene);
+
+  RTC_API void rtcCommitScene (RTCScene hscene)
   {
     Scene* scene = (Scene*) hscene;
     RTC_CATCH_BEGIN;
     RTC_TRACE(rtcCommitScene);
     RTC_VERIFY_HANDLE(hscene);
     RTC_ENTER_DEVICE(hscene);
+    
     scene->commit(false);
+
+#if defined(EMBREE_SYCL_SUPPORT)
+    //prefetchUSMSharedOnGPU(hscene);
+#endif
+
     RTC_CATCH_END2(scene);
   }
 
@@ -311,6 +412,7 @@ RTC_NAMESPACE_BEGIN;
     RTC_TRACE(rtcJoinCommitScene);
     RTC_VERIFY_HANDLE(hscene);
     RTC_ENTER_DEVICE(hscene);
+    
     scene->commit(true);
     RTC_CATCH_END2(scene);
   }
@@ -1130,7 +1232,159 @@ RTC_NAMESPACE_BEGIN;
     rtcForwardOccludedN<RTCRay16,16>(valid, args, hscene, iray, instID, instPrimID);
     RTC_CATCH_END2(scene);
   }
-  
+
+  RTC_API bool rtcTraversablePointQuery(RTCTraversable htraversable, RTCPointQuery* query, RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void* userPtr)
+  {
+    return rtcPointQuery((RTCScene)htraversable, query, userContext, queryFunc, userPtr);
+  }
+
+  RTC_API bool rtcTraversablePointQuery4 (const int* valid, RTCTraversable htraversable, RTCPointQuery4* query, struct RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void** userPtrN)
+  {
+    return rtcPointQuery4(valid, (RTCScene)htraversable, query, userContext, queryFunc, userPtrN);
+  }
+
+  RTC_API bool rtcTraversablePointQuery8 (const int* valid, RTCTraversable htraversable, RTCPointQuery8* query, struct RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void** userPtrN)
+  {
+    return rtcPointQuery8(valid, (RTCScene)htraversable, query, userContext, queryFunc, userPtrN);
+  }
+
+  RTC_API bool rtcTraversablePointQuery16 (const int* valid, RTCTraversable htraversable, RTCPointQuery16* query, struct RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void** userPtrN)
+  {
+    return rtcPointQuery16(valid, (RTCScene)htraversable, query, userContext, queryFunc, userPtrN);
+  }
+
+  RTC_API void rtcTraversableIntersect1 (RTCTraversable htraversable, RTCRayHit* rayhit, RTCIntersectArguments* args)
+  {
+    rtcIntersect1((RTCScene)htraversable, rayhit, args);
+  }
+
+  RTC_API void rtcTraversableForwardIntersect1 (const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay* iray_, unsigned int instID)
+  {
+    rtcForwardIntersect1(args, (RTCScene)htraversable, iray_, instID);
+  }
+
+  RTC_API void rtcTraversableForwardIntersect1Ex(const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay* iray_, unsigned int instID, unsigned int instPrimID)
+  {
+    rtcForwardIntersect1Ex(args, (RTCScene)htraversable, iray_, instID, instPrimID);
+  }
+
+  RTC_API void rtcTraversableIntersect4 (const int* valid, RTCTraversable htraversable, RTCRayHit4* rayhit, RTCIntersectArguments* args)
+  {
+    rtcIntersect4(valid, (RTCScene)htraversable, rayhit, args);
+  }
+
+  template<typename RTCRay, typename RTCRayHit, int N>
+  __forceinline void rtcTraversableForwardIntersectN(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay* iray, unsigned int instID, unsigned int instPrimID)
+  {
+    rtcForwardIntersetN(valid, args, (RTCScene)htraversable, iray, instID, instPrimID);
+  }
+
+  RTC_API void rtcTraversableForwardIntersect4(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay4* iray, unsigned int instID)
+  {
+    rtcForwardIntersect4(valid, args, (RTCScene)htraversable, iray, instID);
+  }
+
+  RTC_API void rtcTraversableForwardIntersect4Ex(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay4* iray, unsigned int instID, unsigned int instPrimID)
+  {
+    rtcForwardIntersect4Ex(valid, args, (RTCScene)htraversable, iray, instID, instPrimID);
+  }
+
+  RTC_API void rtcTraversableIntersect8 (const int* valid, RTCTraversable htraversable, RTCRayHit8* rayhit, RTCIntersectArguments* args)
+  {
+    rtcIntersect8(valid, (RTCScene)htraversable, rayhit, args);
+  }
+
+  RTC_API void rtcTraversableForwardIntersect8(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay8* iray, unsigned int instID)
+  {
+    rtcForwardIntersect8(valid, args, (RTCScene)htraversable, iray, instID);
+  }
+
+  RTC_API void rtcTraversableForwardIntersect8Ex(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay8* iray, unsigned int instID, unsigned int instPrimID)
+  {
+    rtcForwardIntersect8Ex(valid, args, (RTCScene)htraversable, iray, instID, instPrimID);
+  }
+
+  RTC_API void rtcTraversableIntersect16 (const int* valid, RTCTraversable htraversable, RTCRayHit16* rayhit, RTCIntersectArguments* args)
+  {
+    rtcIntersect16(valid, (RTCScene)htraversable, rayhit, args);
+  }
+
+  RTC_API void rtcTraversableForwardIntersect16(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay16* iray, unsigned int instID)
+  {
+    rtcForwardIntersect16(valid, args, (RTCScene)htraversable, iray, instID);
+  }
+
+  RTC_API void rtcTraversableForwardIntersect16Ex(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay16* iray, unsigned int instID, unsigned int instPrimID)
+  {
+    rtcForwardIntersect16Ex(valid, args, (RTCScene)htraversable, iray, instID, instPrimID);
+  }
+
+  RTC_API void rtcTraversableOccluded1 (RTCTraversable htraversable, RTCRay* ray, RTCOccludedArguments* args)
+  {
+    rtcOccluded1((RTCScene)htraversable, ray, args);
+  }
+
+  RTC_API void rtcTraversableForwardOccluded1 (const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay* iray_, unsigned int instID)
+  {
+    rtcForwardOccluded1(args, (RTCScene)htraversable, iray_, instID);
+  }
+
+  RTC_API void rtcTraversableForwardOccluded1Ex(const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay* iray_, unsigned int instID, unsigned int instPrimID)
+  {
+    rtcForwardOccluded1Ex(args, (RTCScene)htraversable, iray_, instID, instPrimID);
+  }
+
+  RTC_API void rtcTraversableOccluded4 (const int* valid, RTCTraversable htraversable, RTCRay4* ray, RTCOccludedArguments* args)
+  {
+    rtcOccluded4(valid, (RTCScene)htraversable, ray, args);
+  }
+
+  template<typename RTCRay, int N>
+  __forceinline void rtcTraversableForwardOccludedN (const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay* iray, unsigned int instID, unsigned int instPrimID)
+  {
+    rtcForwardOccludedN(valid, args, (RTCScene)htraversable, iray, instID, instPrimID);
+  }
+
+  RTC_API void rtcTraversableForwardOccluded4(const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay4* iray, unsigned int instID)
+  {
+    rtcForwardOccluded4(valid, args, (RTCScene)htraversable, iray, instID);
+  }
+
+  RTC_API void rtcTraversableForwardOccluded4Ex(const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay4* iray, unsigned int instID, unsigned int instPrimID)
+  {
+    rtcForwardOccluded4Ex(valid, args, (RTCScene)htraversable, iray, instID, instPrimID);
+  }
+
+  RTC_API void rtcTraversableOccluded8 (const int* valid, RTCTraversable htraversable, RTCRay8* ray, RTCOccludedArguments* args)
+  {
+    rtcOccluded8(valid, (RTCScene)htraversable, ray, args);
+  }
+
+  RTC_API void rtcTraversableForwardOccluded8(const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay8* iray, unsigned int instID)
+  {
+    rtcForwardOccluded8(valid, args, (RTCScene)htraversable, iray, instID);
+  }
+
+  RTC_API void rtcTraversableForwardOccluded8Ex(const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay8* iray, unsigned int instID, unsigned int instPrimID)
+  {
+    rtcForwardOccluded8Ex(valid, args, (RTCScene)htraversable, iray, instID, instPrimID);
+  }
+
+  RTC_API void rtcTraversableOccluded16 (const int* valid, RTCTraversable htraversable, RTCRay16* ray, RTCOccludedArguments* args)
+  {
+    rtcOccluded16(valid, (RTCScene)htraversable, ray, args);
+  }
+
+  RTC_API void rtcTraversableForwardOccluded16(const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay16* iray, unsigned int instID)
+  {
+    rtcForwardOccluded16(valid, args, (RTCScene)htraversable, iray, instID);
+  }
+
+  RTC_API void rtcTraversableForwardOccluded16Ex(const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay16* iray, unsigned int instID, unsigned int instPrimID)
+  {
+    rtcForwardOccluded16Ex(valid, args, (RTCScene)htraversable, iray, instID, instPrimID);
+  }
+
   RTC_API void rtcRetainScene (RTCScene hscene) 
   {
     Scene* scene = (Scene*) hscene;
@@ -1292,6 +1546,11 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
     RTC_CATCH_END2(scene);
   }
 
+  RTC_API void rtcGetGeometryTransformFromTraversable(RTCTraversable htraversable, unsigned int geomID, float time, RTCFormat format, void* xfm)
+  {
+    rtcGetGeometryTransformFromScene((RTCScene)htraversable, geomID, time, format, xfm);
+  }
+
   RTC_API void rtcInvokeIntersectFilterFromGeometry(const struct RTCIntersectFunctionNArguments* const args_i, const struct RTCFilterFunctionNArguments* filter_args)
   {
     IntersectFunctionNArguments* args = (IntersectFunctionNArguments*) args_i;
@@ -1568,17 +1827,11 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
     RTC_TRACE(rtcSetGeometryBuildQuality);
     RTC_VERIFY_HANDLE(hgeometry);
     RTC_ENTER_DEVICE(hgeometry);
-    //if (quality != RTC_BUILD_QUALITY_LOW &&
-    //    quality != RTC_BUILD_QUALITY_MEDIUM &&
-    //    quality != RTC_BUILD_QUALITY_HIGH &&
-    //    quality != RTC_BUILD_QUALITY_REFIT)
-    //  throw std::runtime_error("invalid build quality");
     if (quality != RTC_BUILD_QUALITY_LOW &&
         quality != RTC_BUILD_QUALITY_MEDIUM &&
         quality != RTC_BUILD_QUALITY_HIGH &&
-        quality != RTC_BUILD_QUALITY_REFIT) {
-      abort();
-    }
+        quality != RTC_BUILD_QUALITY_REFIT)
+      abort(); //throw std::runtime_error("invalid build quality");
     geometry->setBuildQuality(quality);
     RTC_CATCH_END2(geometry);
   }
@@ -1667,6 +1920,37 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
     RTC_CATCH_END2(geometry);
   }
 
+  RTC_API void rtcSetSharedGeometryBufferHostDevice(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot, RTCFormat format, const void* ptr, const void* dptr, size_t byteOffset, size_t byteStride, size_t itemCount)
+  {
+    Geometry* geometry = (Geometry*) hgeometry;
+    RTC_CATCH_BEGIN;
+    RTC_TRACE(rtcSetSharedGeometryBufferHostDevice);
+    RTC_VERIFY_HANDLE(hgeometry);
+    RTC_ENTER_DEVICE(hgeometry);
+
+#if defined(EMBREE_SYCL_SUPPORT)
+    if (geometry->device->is_gpu())
+    {
+      if (itemCount > 0xFFFFFFFFu)
+        throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"buffer too large");
+
+      if ((ptr == nullptr) || (dptr == nullptr))
+        throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"host and device pointer may not be NULL pointers when using SYCL devices");
+
+      Ref<Buffer> buffer = new Buffer(geometry->device, itemCount*byteStride, (char*)ptr + byteOffset, (char*)dptr + byteOffset);
+      geometry->setBuffer(type, slot, format, buffer, 0, byteStride, (unsigned int)itemCount);
+    }
+    else
+#endif
+    {
+      if (dptr != nullptr)
+        throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"Embree device is no SYCL device. Device pointer argument must not be valid, pass NULL instead");
+      rtcSetSharedGeometryBuffer(hgeometry, type, slot, format, ptr, byteOffset, byteStride, itemCount);
+    }
+
+    RTC_CATCH_END2(geometry);
+  }
+
   RTC_API void* rtcSetNewGeometryBuffer(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot, RTCFormat format, size_t byteStride, size_t itemCount)
   {
     Geometry* geometry = (Geometry*) hgeometry;
@@ -1683,13 +1967,54 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
     if (type == RTC_BUFFER_TYPE_VERTEX || type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
       bytes += (16 - (byteStride%16))%16;
       
-    Ref<Buffer> buffer = new Buffer(geometry->device, bytes);
+    Ref<Buffer> buffer = new Buffer(geometry->device, bytes, nullptr);
     geometry->setBuffer(type, slot, format, buffer, 0, byteStride, (unsigned int)itemCount);
     return buffer->data();
     RTC_CATCH_END2(geometry);
     return nullptr;
   }
 
+  RTC_API void rtcSetNewGeometryBufferHostDevice(RTCGeometry hgeometry, RTCBufferType bufferType, unsigned int slot, RTCFormat format, size_t byteStride, size_t itemCount, void** ptr, void** dptr)
+  {
+    Geometry* geometry = (Geometry*) hgeometry;
+    RTC_CATCH_BEGIN;
+    RTC_TRACE(rtcSetNewGeometryBufferHostDevice);
+    RTC_VERIFY_HANDLE(hgeometry);
+    RTC_ENTER_DEVICE(hgeometry);
+
+#if defined(EMBREE_SYCL_SUPPORT)
+    if (geometry->device->is_gpu())
+    {
+      if (itemCount > 0xFFFFFFFFu)
+        throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"buffer too large");
+
+      /* vertex buffers need to get overallocated slightly as elements are accessed using SSE loads */
+      size_t bytes = itemCount*byteStride;
+      if (bufferType == RTC_BUFFER_TYPE_VERTEX || bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
+        bytes += (16 - (byteStride%16))%16;
+
+      Ref<Buffer> buffer = new Buffer(geometry->device, bytes, nullptr, nullptr);
+      geometry->setBuffer(bufferType, slot, format, buffer, 0, byteStride, (unsigned int)itemCount);
+
+      if(ptr)
+        *ptr = buffer->getHostPtr();
+      if (dptr)
+        *dptr = buffer->getDevicePtr();
+    }
+    else
+#endif
+    {
+      void* tmp = rtcSetNewGeometryBuffer(hgeometry, bufferType, slot, format, byteStride, itemCount);
+      if(ptr)
+        *ptr = tmp;
+      if (dptr) {
+        *dptr = tmp;
+      }
+    }
+
+    RTC_CATCH_END2(geometry);
+  }
+
   RTC_API void* rtcGetGeometryBufferData(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot)
   {
     Geometry* geometry = (Geometry*) hgeometry;
@@ -1697,7 +2022,19 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
     RTC_TRACE(rtcGetGeometryBufferData);
     RTC_VERIFY_HANDLE(hgeometry);
     RTC_ENTER_DEVICE(hgeometry);
-    return geometry->getBuffer(type, slot);
+    return geometry->getBufferData(type, slot, BufferDataPointerType::HOST);
+    RTC_CATCH_END2(geometry);
+    return nullptr;
+  }
+
+  RTC_API void* rtcGetGeometryBufferDataDevice(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot)
+  {
+    Geometry* geometry = (Geometry*) hgeometry;
+    RTC_CATCH_BEGIN;
+    RTC_TRACE(rtcGetGeometryBufferDataDevice);
+    RTC_VERIFY_HANDLE(hgeometry);
+    RTC_ENTER_DEVICE(hgeometry);
+    return geometry->getBufferData(type, slot, BufferDataPointerType::DEVICE);
     RTC_CATCH_END2(geometry);
     return nullptr;
   }
@@ -1713,7 +2050,7 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
     RTC_CATCH_END2(geometry);
   }
 
-  RTC_API void rtcUpdateGeometryBuffer (RTCGeometry hgeometry, RTCBufferType type, unsigned int slot) 
+  RTC_API void rtcUpdateGeometryBuffer (RTCGeometry hgeometry, RTCBufferType type, unsigned int slot)
   {
     Geometry* geometry = (Geometry*) hgeometry;
     RTC_CATCH_BEGIN;
@@ -1784,6 +2121,11 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
     return nullptr;
   }
 
+  RTC_API void* rtcGetGeometryUserDataFromTraversable (RTCTraversable htraversable, unsigned int geomID)
+  {
+    return rtcGetGeometryUserDataFromScene((RTCScene)htraversable, geomID);
+  }
+
   RTC_API void rtcSetGeometryBoundsFunction (RTCGeometry hgeometry, RTCBoundsFunction bounds, void* userPtr)
   {
     Geometry* geometry = (Geometry*) hgeometry;
@@ -2060,4 +2402,14 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
     return nullptr;
   }
 
+  RTC_API const char* rtcGetErrorString(RTCError error)
+  {
+    RTC_CATCH_BEGIN;
+    RTC_TRACE(rtcGetErrorString);
+    return Device::getErrorString(error);
+    RTC_CATCH_END(nullptr);
+    return nullptr;
+  }
+
+
 RTC_NAMESPACE_END

+ 80 - 10
thirdparty/embree/kernels/common/scene.cpp

@@ -42,10 +42,21 @@ namespace embree
 
   Scene::Scene (Device* device)
     : device(device),
+      scene_device(nullptr),
       flags_modified(true), enabled_geometry_types(0),
       scene_flags(RTC_SCENE_FLAG_NONE),
       quality_flags(RTC_BUILD_QUALITY_MEDIUM),
       modified(true),
+      maxTimeSegments(0),
+#if defined(EMBREE_SYCL_SUPPORT)
+      geometries_device(nullptr),
+      geometry_data_device(nullptr),
+      num_geometries(0),
+      geometry_data_byte_size(0),
+      offsets(nullptr),
+      geometries_host(nullptr),
+      geometry_data_host(nullptr),
+#endif
       taskGroup(new TaskGroup()),
       progressInterface(this), progress_monitor_function(nullptr), progress_monitor_ptr(nullptr), progress_monitor_counter(0)
   {
@@ -55,8 +66,8 @@ namespace embree
 
     /* use proper device and context for SYCL allocations */
 #if defined(EMBREE_SYCL_SUPPORT)
-    if (DeviceGPU* gpu_device = dynamic_cast<DeviceGPU*>(device))
-      hwaccel = AccelBuffer(AccelAllocator<char>(device,gpu_device->getGPUDevice(),gpu_device->getGPUContext()),0);
+    if (dynamic_cast<DeviceGPU*>(device))
+      accelBuffer = AccelBuffer(device);
 #endif
        
     /* one can overwrite flags through device for debugging */
@@ -68,6 +79,27 @@ namespace embree
 
   Scene::~Scene() noexcept
   {
+#if defined(EMBREE_SYCL_SUPPORT)
+    if (geometry_data_device) {
+      device->free(geometry_data_device);
+    }
+    if (geometries_device) {
+      device->free(geometries_device);
+    }
+    if (scene_device) {
+      device->free(scene_device);
+    }
+    if (offsets) {
+      device->free(offsets);
+    }
+    if (geometries_host) {
+      device->free(geometries_host);
+    }
+    if (geometry_data_host) {
+      device->free(geometry_data_host);
+    }
+#endif
+
     device->refDec();
   }
   
@@ -788,9 +820,8 @@ namespace embree
   void Scene::build_gpu_accels()
   {
 #if defined(EMBREE_SYCL_SUPPORT)
-    const BBox3f aabb = rthwifBuild(this,hwaccel);
-    bounds = LBBox<embree::Vec3fa>(aabb);
-    hwaccel_bounds = aabb;
+    accelBuffer.build(this);
+    bounds = LBBox<embree::Vec3fa>(accelBuffer.getBounds());
 #endif
   }
 
@@ -799,6 +830,7 @@ namespace embree
     checkIfModifiedAndSet();
     if (!isModified()) return;
     
+
     /* print scene statistics */
     if (device->verbosity(2))
       printStatistics();
@@ -825,8 +857,18 @@ namespace embree
       std::plus<GeometryCounts>()
     );
 
+    /* calculate maximal number of motion blur time segments in scene */
+    maxTimeSegments = 1;
+    for (size_t geomID=0; geomID<size(); geomID++)
+    {
+      Geometry* geom = get(geomID);
+      if (geom == nullptr) continue;
+      maxTimeSegments = std::max(maxTimeSegments, geom->numTimeSegments());
+    }
+
 #if defined(EMBREE_SYCL_SUPPORT)
-    if (DeviceGPU* gpu_device = dynamic_cast<DeviceGPU*>(device))
+    DeviceGPU* gpu_device = dynamic_cast<DeviceGPU*>(device);
+    if (gpu_device)
       build_gpu_accels();
     else
 #endif
@@ -865,10 +907,36 @@ namespace embree
   RTCSceneFlags Scene::getSceneFlags() const {
     return scene_flags;
   }
-                   
+
+#if defined(EMBREE_SYCL_SUPPORT)
+  sycl::event Scene::commit (bool join, sycl::queue queue)
+  {
+    commit_internal(join);
+    return syncWithDevice(queue);
+  }
+#endif
+
+  void Scene::commit (bool join)
+  {
+    commit_internal(join);
+
+#if defined(EMBREE_SYCL_SUPPORT)
+    syncWithDevice();
+#endif
+  }
+
+  Scene* Scene::getTraversable() {
+#if defined(EMBREE_SYCL_SUPPORT)
+    if(device->is_gpu()) {
+      return scene_device;
+    }
+#endif
+    return this;
+  }
+
 #if defined(TASKING_INTERNAL)
 
-  void Scene::commit (bool join) 
+  void Scene::commit_internal (bool join)
   {
     Lock<MutexSys> buildLock(buildMutex,false);
 
@@ -910,7 +978,7 @@ namespace embree
 
 #if defined(TASKING_TBB)
 
-  void Scene::commit (bool join) 
+  void Scene::commit_internal (bool join) 
   {    
 #if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR < 8)
     if (join)
@@ -974,7 +1042,7 @@ namespace embree
 
 #if defined(TASKING_PPL)
 
-  void Scene::commit (bool join) 
+  void Scene::commit_internal (bool join)
   {
 #if defined(TASKING_PPL)
     if (join)
@@ -1011,6 +1079,7 @@ namespace embree
       accels_clear();
       throw;
     }
+
   }
 #endif
 
@@ -1029,4 +1098,5 @@ namespace embree
       }
     }
   }
+  
 }

+ 76 - 10
thirdparty/embree/kernels/common/scene.h

@@ -25,15 +25,18 @@
 #include "../sycl/rthwif_embree_builder.h"
 #endif
 
+#if !defined(EMBREE_SYCL_SUPPORT)
+namespace sycl {
+  struct queue;
+}
+#endif
 namespace embree
 {
   struct TaskGroup;
 
   /*! Base class all scenes are derived from */
-  class Scene : public AccelN
+  class __aligned(16) Scene : public AccelN
   {
-    ALIGNED_CLASS_USM_(std::alignment_of<Scene>::value);
-
   public:
     template<typename Ty, bool mblur = false>
       class Iterator
@@ -59,10 +62,6 @@ namespace embree
         return at(i);
       }
 
-      __forceinline size_t size() const {
-        return scene->size();
-      }
-      
       __forceinline size_t numPrimitives() const {
         return scene->getNumPrimitives(Ty::geom_type,mblur);
       }
@@ -99,6 +98,11 @@ namespace embree
         }
         return ret;
       }
+
+      __forceinline size_t size() const {
+        return scene->size();
+      }
+      
       
     private:
       Scene* scene;
@@ -188,12 +192,22 @@ namespace embree
 
     void build_cpu_accels();
     void build_gpu_accels();
+    void commit_internal (bool join);
+#if defined(EMBREE_SYCL_SUPPORT)
+    sycl::event commit (bool join, sycl::queue queue);
+#endif
     void commit (bool join);
     void commit_task ();
     void build () {}
 
+    Scene* getTraversable();
+
     /* return number of geometries */
+#if defined(__SYCL_DEVICE_ONLY__)
+    __forceinline size_t size() const { return num_geometries; }
+#else
     __forceinline size_t size() const { return geometries.size(); }
+#endif
     
     /* bind geometry to the scene */
     unsigned int bind (unsigned geomID, Ref<Geometry> geometry);
@@ -206,12 +220,18 @@ namespace embree
       modified = f; 
     }
 
+    __forceinline bool hasMotionBlur() const { return maxTimeSegments > 1; };
+
+    __forceinline uint32_t getMaxTimeSegments() const { return maxTimeSegments; };
+
+    #if !defined(__SYCL_DEVICE_ONLY__)
     __forceinline bool isGeometryModified(size_t geomID)
     {
       Ref<Geometry>& g = geometries[geomID];
       if (!g) return false;
       return g->getModCounter() > geometryModCounters_[geomID];
     }
+    #endif
 
   protected:
 
@@ -219,6 +239,27 @@ namespace embree
 
   public:
 
+#if defined(__SYCL_DEVICE_ONLY__)
+    /* get mesh by ID */
+    __forceinline       Geometry* get(size_t i)       { return geometries_device[i]; }
+    __forceinline const Geometry* get(size_t i) const { return geometries_device[i]; }
+
+    template<typename Mesh>
+      __forceinline       Mesh* get(size_t i)       { 
+      return (Mesh*)geometries_device[i]; 
+    }
+    template<typename Mesh>
+      __forceinline const Mesh* get(size_t i) const { 
+      return (Mesh*)geometries_device[i]; 
+    }
+
+    template<typename Mesh>
+    __forceinline Mesh* getSafe(size_t i) {
+      if (geometries_device[i] == nullptr) return nullptr;
+      if (!(geometries_device[i]->getTypeMask() & Mesh::geom_type)) return nullptr;
+      else return (Mesh*) geometries_device[i];
+    }
+#else
     /* get mesh by ID */
     __forceinline       Geometry* get(size_t i)       { assert(i < geometries.size()); return geometries[i].ptr; }
     __forceinline const Geometry* get(size_t i) const { assert(i < geometries.size()); return geometries[i].ptr; }
@@ -243,12 +284,16 @@ namespace embree
       if (!(geometries[i]->getTypeMask() & Mesh::geom_type)) return nullptr;
       else return (Mesh*) geometries[i].ptr;
     }
+#endif
+
 
+    #if !defined(__SYCL_DEVICE_ONLY__)
     __forceinline Ref<Geometry> get_locked(size_t i)  {
       Lock<MutexSys> lock(geometriesMutex);
       assert(i < geometries.size()); 
       return geometries[i]; 
     }
+    #endif
 
     /* flag decoding */
     __forceinline bool isFastAccel() const { return !isCompactAccel() && !isRobustAccel(); }
@@ -270,9 +315,16 @@ namespace embree
     }
     
     void* createQBVH6Accel();
+    
+#if defined(EMBREE_SYCL_SUPPORT)
+  private:
+    void syncWithDevice();
+    sycl::event syncWithDevice(sycl::queue queue);
+#endif
 
   public:
     Device* device;
+    Scene* scene_device;
 
   public:
     IDPool<unsigned,0xFFFFFFFE> id_pool;
@@ -292,12 +344,26 @@ namespace embree
 
 #if defined(EMBREE_SYCL_SUPPORT)
   public:
-    BBox3f hwaccel_bounds = empty;
-    AccelBuffer hwaccel;
+    AccelBuffer accelBuffer;
 #endif
     
   private:
-    bool modified;                   //!< true if scene got modified
+    bool modified;            //!< true if scene got modified
+    uint32_t maxTimeSegments; //!< maximal number of motion blur time segments in scene
+
+#if defined(EMBREE_SYCL_SUPPORT)
+    Geometry** geometries_device; //!< list of all geometries on device
+    char* geometry_data_device; //!< data buffer of all geometries on device
+    size_t num_geometries;
+    size_t geometry_data_byte_size;
+
+    // host buffers used for creating representation of scene/geometry for device
+    // will be freed after scene commit if the scene is static, otherwise the
+    // buffers will stay for quicker rebuild.
+    size_t *offsets;
+    Geometry **geometries_host;
+    char *geometry_data_host;
+#endif
 
   public:
 

+ 3 - 1
thirdparty/embree/kernels/common/scene_curves.h

@@ -31,13 +31,15 @@ namespace embree
     void setNumTimeSteps (unsigned int numTimeSteps);
     void setVertexAttributeCount (unsigned int N);
     void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
-    void* getBuffer(RTCBufferType type, unsigned int slot);
+    void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType);
     void updateBuffer(RTCBufferType type, unsigned int slot);
     void commit();
     bool verify();
     void setTessellationRate(float N);
     void setMaxRadiusScale(float s);
     void addElementsToCount (GeometryCounts & counts) const;
+    size_t getGeometryDataDeviceByteSize() const;
+    void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const;
 
   public:
     

+ 8 - 1
thirdparty/embree/kernels/common/scene_grid_mesh.h

@@ -50,12 +50,19 @@ namespace embree
     void setNumTimeSteps (unsigned int numTimeSteps);
     void setVertexAttributeCount (unsigned int N);
     void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
-    void* getBuffer(RTCBufferType type, unsigned int slot);
+    void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType);
     void updateBuffer(RTCBufferType type, unsigned int slot);
     void commit();
     bool verify();
     void interpolate(const RTCInterpolateArguments* const args);
 
+#if defined(EMBREE_SYCL_SUPPORT)
+
+    size_t getGeometryDataDeviceByteSize() const;
+    void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const;
+
+#endif
+
     template<int N>
     void interpolate_impl(const RTCInterpolateArguments* const args)
     {

+ 2 - 1
thirdparty/embree/kernels/common/scene_instance.h

@@ -13,7 +13,6 @@ namespace embree
   /*! Instanced acceleration structure */
   struct Instance : public Geometry
   {
-    //ALIGNED_STRUCT_(16);
     static const Geometry::GTypeMask geom_type = Geometry::MTY_INSTANCE;
 
   public:
@@ -55,6 +54,8 @@ namespace embree
     virtual void build() {}
     virtual void addElementsToCount (GeometryCounts & counts) const override;
     virtual void commit() override;
+    virtual size_t getGeometryDataDeviceByteSize() const override;
+    virtual void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const override;
 
   public:
 

+ 3 - 2
thirdparty/embree/kernels/common/scene_instance_array.h

@@ -13,7 +13,6 @@ namespace embree
   /*! Instanced acceleration structure */
   struct InstanceArray : public Geometry
   {
-    //ALIGNED_STRUCT_(16);
     static const Geometry::GTypeMask geom_type = Geometry::MTY_INSTANCE_ARRAY;
 
   public:
@@ -48,7 +47,7 @@ namespace embree
   public:
 
     virtual void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num) override;
-    virtual void* getBuffer(RTCBufferType type, unsigned int slot) override;
+    virtual void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType) override;
     virtual void updateBuffer(RTCBufferType type, unsigned int slot) override;
 
     virtual void setNumTimeSteps (unsigned int numTimeSteps) override;
@@ -59,6 +58,8 @@ namespace embree
     virtual void build() {}
     virtual void addElementsToCount (GeometryCounts & counts) const override;
     virtual void commit() override;
+    size_t getGeometryDataDeviceByteSize() const override;
+    void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const override;
 
   public:
 

+ 3 - 1
thirdparty/embree/kernels/common/scene_line_segments.h

@@ -25,7 +25,7 @@ namespace embree
     void setNumTimeSteps (unsigned int numTimeSteps);
     void setVertexAttributeCount (unsigned int N);
     void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
-    void* getBuffer(RTCBufferType type, unsigned int slot);
+    void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType);
     void updateBuffer(RTCBufferType type, unsigned int slot);
     void commit();
     bool verify ();
@@ -33,6 +33,8 @@ namespace embree
     void setTessellationRate(float N);
     void setMaxRadiusScale(float s);
     void addElementsToCount (GeometryCounts & counts) const;
+    size_t getGeometryDataDeviceByteSize() const;
+    void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const;
 
     template<int N>
     void interpolate_impl(const RTCInterpolateArguments* const args)

+ 3 - 1
thirdparty/embree/kernels/common/scene_points.h

@@ -30,12 +30,14 @@ namespace embree
                    size_t offset,
                    size_t stride,
                    unsigned int num);
-    void* getBuffer(RTCBufferType type, unsigned int slot);
+    void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType);
     void updateBuffer(RTCBufferType type, unsigned int slot);
     void commit();
     bool verify();
     void setMaxRadiusScale(float s);
     void addElementsToCount (GeometryCounts & counts) const;
+    size_t getGeometryDataDeviceByteSize() const;
+    void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const;
 
    public:
     /*! returns the number of vertices */

+ 3 - 1
thirdparty/embree/kernels/common/scene_quad_mesh.h

@@ -42,12 +42,14 @@ namespace embree
     void setNumTimeSteps (unsigned int numTimeSteps);
     void setVertexAttributeCount (unsigned int N);
     void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
-    void* getBuffer(RTCBufferType type, unsigned int slot);
+    void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType);
     void updateBuffer(RTCBufferType type, unsigned int slot);
     void commit();
     bool verify();
     void interpolate(const RTCInterpolateArguments* const args);
     void addElementsToCount (GeometryCounts & counts) const;
+    size_t getGeometryDataDeviceByteSize() const;
+    void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const;
 
     template<int N>
       void interpolate_impl(const RTCInterpolateArguments* const args)

+ 1 - 1
thirdparty/embree/kernels/common/scene_subdiv_mesh.h

@@ -61,7 +61,7 @@ namespace embree
     void setVertexAttributeCount (unsigned int N);
     void setTopologyCount (unsigned int N);
     void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
-    void* getBuffer(RTCBufferType type, unsigned int slot);
+    void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType);
     void updateBuffer(RTCBufferType type, unsigned int slot);
     void setTessellationRate(float N);
     bool verify();

+ 32 - 8
thirdparty/embree/kernels/common/scene_triangle_mesh.cpp

@@ -35,7 +35,7 @@ namespace embree
   void TriangleMesh::setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num)
   {
     /* verify that all accesses are 4 bytes aligned */
-    if (((size_t(buffer->getPtr()) + offset) & 0x3) || (stride & 0x3)) 
+    if (((size_t(buffer->getHostPtr()) + offset) & 0x3) || (stride & 0x3))
       throw_RTCError(RTC_ERROR_INVALID_OPERATION, "data must be 4 bytes aligned");
 
     if (type == RTC_BUFFER_TYPE_VERTEX)
@@ -79,25 +79,25 @@ namespace embree
       throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown buffer type");
   }
 
-  void* TriangleMesh::getBuffer(RTCBufferType type, unsigned int slot)
+  void* TriangleMesh::getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType)
   {
     if (type == RTC_BUFFER_TYPE_INDEX)
     {
       if (slot != 0)
         throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
-      return triangles.getPtr();
+      return triangles.getPtr(pointerType);
     }
     else if (type == RTC_BUFFER_TYPE_VERTEX)
     {
       if (slot >= vertices.size())
         throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
-      return vertices[slot].getPtr();
+      return vertices[slot].getPtr(pointerType);
     }
     else if (type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
     {
       if (slot >= vertexAttribs.size())
         throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
-      return vertexAttribs[slot].getPtr();
+      return vertexAttribs[slot].getPtr(pointerType);
     }
     else
     {
@@ -137,10 +137,12 @@ namespace embree
   void TriangleMesh::commit()
   {
     /* verify that stride of all time steps are identical */
-    for (unsigned int t=0; t<numTimeSteps; t++)
+    for (unsigned int t=0; t<numTimeSteps; t++) {
       if (vertices[t].getStride() != vertices[0].getStride())
         throw_RTCError(RTC_ERROR_INVALID_OPERATION,"stride of vertex buffers have to be identical for each time step");
-
+      if (vertices[t]) vertices[t].buffer->commitIfNeeded();
+    }
+    if (triangles) triangles.buffer->commitIfNeeded();
     Geometry::commit();
   }
 
@@ -182,7 +184,29 @@ namespace embree
   void TriangleMesh::interpolate(const RTCInterpolateArguments* const args) {
     interpolate_impl<4>(args);
   }
- 
+
+  size_t TriangleMesh::getGeometryDataDeviceByteSize() const {
+    size_t byte_size = sizeof(TriangleMesh);
+    byte_size += numTimeSteps * sizeof(BufferView<Vec3fa>);
+    return 16 * ((byte_size + 15) / 16);
+  }
+
+  void TriangleMesh::convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const {
+    TriangleMesh* mesh = (TriangleMesh*)(data_host + offset);
+    std::memcpy(data_host + offset, (void*)this, sizeof(TriangleMesh));
+    offset += sizeof(TriangleMesh);
+
+    // store offset for overriding vertices pointer with device pointer after copying
+    const size_t offsetVertices = offset;
+    // copy vertices BufferViews for each time step
+    for (size_t t = 0; t < numTimeSteps; ++t) {
+      std::memcpy(data_host + offset, &(vertices[t]), sizeof(BufferView<Vec3fa>));
+      offset += sizeof(BufferView<Vec3fa>);
+    }
+    // override vertices pointer with device ptr
+    mesh->vertices.setDataPtr((BufferView<Vec3fa>*)(data_device + offsetVertices));
+  }
+
 #endif
 
   namespace isa

+ 17 - 13
thirdparty/embree/kernels/common/scene_triangle_mesh.h

@@ -32,16 +32,18 @@ namespace embree
 
     /* geometry interface */
   public:
-    void setMask(unsigned mask);
-    void setNumTimeSteps (unsigned int numTimeSteps);
-    void setVertexAttributeCount (unsigned int N);
-    void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
-    void* getBuffer(RTCBufferType type, unsigned int slot);
-    void updateBuffer(RTCBufferType type, unsigned int slot);
-    void commit();
-    bool verify();
-    void interpolate(const RTCInterpolateArguments* const args);
-    void addElementsToCount (GeometryCounts & counts) const;
+    virtual void setMask(unsigned mask) override;
+    virtual void setNumTimeSteps (unsigned int numTimeSteps) override;
+    virtual void setVertexAttributeCount (unsigned int N) override;
+    virtual void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num) override;
+    virtual void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType) override;
+    virtual void updateBuffer(RTCBufferType type, unsigned int slot) override;
+    virtual void commit() override;
+    virtual bool verify() override;
+    virtual void interpolate(const RTCInterpolateArguments* const args) override;
+    virtual void addElementsToCount (GeometryCounts & counts) const override;
+    virtual size_t getGeometryDataDeviceByteSize() const override;
+    virtual void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const override;
 
     template<int N>
     void interpolate_impl(const RTCInterpolateArguments* const args)
@@ -98,12 +100,12 @@ namespace embree
     }
     
   public:
-    
+
     /*! returns number of vertices */
     __forceinline size_t numVertices() const {
       return vertices[0].size();
     }
-    
+
     /*! returns i'th triangle*/
     __forceinline const Triangle& triangle(size_t i) const {
       return triangles[i];
@@ -246,7 +248,7 @@ namespace embree
     }
 
     /*! get fast access to first vertex buffer */
-    __forceinline float * getCompactVertexArray () const {
+    __forceinline float * getCompactVertexArray () const override {
       return (float*) vertices0.getPtr();
     }
 
@@ -283,6 +285,7 @@ namespace embree
       TriangleMeshISA (Device* device)
         : TriangleMesh(device) {}
 
+#if !defined(__SYCL_DEVICE_ONLY__)
       LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
         return linearBounds(primID,time_range);
       }
@@ -344,6 +347,7 @@ namespace embree
         }
         return pinfo;
       }
+#endif
     };
   }
 

+ 8 - 6
thirdparty/embree/kernels/common/scene_user_geometry.h

@@ -15,12 +15,14 @@ namespace embree
 
   public:
     UserGeometry (Device* device, unsigned int items = 0, unsigned int numTimeSteps = 1);
-    virtual void setMask (unsigned mask);
-    virtual void setBoundsFunction (RTCBoundsFunction bounds, void* userPtr);
-    virtual void setIntersectFunctionN (RTCIntersectFunctionN intersect);
-    virtual void setOccludedFunctionN (RTCOccludedFunctionN occluded);
-    virtual void build() {}
-    virtual void addElementsToCount (GeometryCounts & counts) const;
+    virtual void setMask (unsigned mask) override;
+    virtual void setBoundsFunction (RTCBoundsFunction bounds, void* userPtr) override;
+    virtual void setIntersectFunctionN (RTCIntersectFunctionN intersect) override;
+    virtual void setOccludedFunctionN (RTCOccludedFunctionN occluded) override;
+    virtual void build() override {}
+    virtual void addElementsToCount (GeometryCounts & counts) const override;
+    virtual size_t getGeometryDataDeviceByteSize() const override;
+    virtual void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const override;
 
     __forceinline float projectedPrimitiveArea(const size_t i) const { return 0.0f; }
   };

+ 13 - 5
thirdparty/embree/kernels/common/state.cpp

@@ -16,19 +16,22 @@ namespace embree
   State::ErrorHandler::~ErrorHandler()
   {
     Lock<MutexSys> lock(errors_mutex);
-    for (size_t i=0; i<thread_errors.size(); i++)
+    for (size_t i=0; i<thread_errors.size(); i++) {
       delete thread_errors[i];
+    }
     destroyTls(thread_error);
     thread_errors.clear();
   }
 
-  RTCError* State::ErrorHandler::error() 
+  RTCErrorMessage* State::ErrorHandler::error()
   {
-    RTCError* stored_error = (RTCError*) getTls(thread_error);
-    if (stored_error) return stored_error;
+    RTCErrorMessage* stored_error = (RTCErrorMessage*) getTls(thread_error);
+    if (stored_error) {
+      return stored_error;
+    }
 
     Lock<MutexSys> lock(errors_mutex);
-    stored_error = new RTCError(RTC_ERROR_NONE);
+    stored_error = new RTCErrorMessage(RTC_ERROR_NONE, "");
     thread_errors.push_back(stored_error);
     setTls(thread_error,stored_error);
     return stored_error;
@@ -84,6 +87,8 @@ namespace embree
     max_spatial_split_replications = 1.2f;
     useSpatialPreSplits = false;
 
+    max_triangles_per_leaf = inf;
+
     tessellation_cache_size = 128*1024*1024;
 
     subdiv_accel = "default";
@@ -428,6 +433,9 @@ namespace embree
       else if (tok == Token::Id("max_spatial_split_replications") && cin->trySymbol("="))
         max_spatial_split_replications = cin->get().Float();
 
+      else if (tok == Token::Id("max_triangles_per_leaf") && cin->trySymbol("="))
+        max_triangles_per_leaf = cin->get().Float();
+
       else if (tok == Token::Id("presplits") && cin->trySymbol("="))
         useSpatialPreSplits = cin->get().Int() != 0 ? true : false;
 

+ 11 - 2
thirdparty/embree/kernels/common/state.h

@@ -9,6 +9,14 @@ namespace embree
 {
   /* mutex to make printing to cout thread safe */
   extern MutexSys g_printMutex;
+  struct RTCErrorMessage
+  {
+    RTCErrorMessage(RTCError error, std::string const& msg)
+      : error(error), msg(msg) {}
+
+    RTCError error;
+    std::string msg;
+  };
 
   struct State : public RefCount
   {
@@ -109,6 +117,7 @@ namespace embree
     float max_spatial_split_replications;  //!< maximally replications*N many primitives in accel for spatial splits
     bool useSpatialPreSplits;              //!< use spatial pre-splits instead of the full spatial split builder
     size_t tessellation_cache_size;        //!< size of the shared tessellation cache 
+    size_t max_triangles_per_leaf;
 
   public:
     size_t instancing_open_min;            //!< instancing opens tree to minimally that number of subtrees
@@ -163,11 +172,11 @@ namespace embree
     public:
       ErrorHandler();
       ~ErrorHandler();
-      RTCError* error();
+      RTCErrorMessage* error();
 
     public:
       tls_t thread_error;
-      std::vector<RTCError*> thread_errors;
+      std::vector<RTCErrorMessage*> thread_errors;
       MutexSys errors_mutex;
     };
     ErrorHandler errorHandler;

+ 1 - 1
thirdparty/embree/kernels/hash.h

@@ -1,4 +1,4 @@
 // Copyright 2009-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
-#define RTC_HASH "daa8de0e714e18ad5e5c9841b67c1950d9c91c51"
+#define RTC_HASH "ff9381774dc99fea81a932ad276677aad6a3d4dd"

+ 71 - 197
thirdparty/embree/patches/0001-disable-exceptions.patch

@@ -23,216 +23,131 @@ index b52b1e2e13..fbff38f660 100644
    #endif
  #else // TASKING_PPL
 diff --git a/thirdparty/embree/common/lexers/stringstream.cpp b/thirdparty/embree/common/lexers/stringstream.cpp
-index 42ffb10176..fa4266d0b9 100644
+index 42ffb10176..9779fc74c7 100644
 --- a/thirdparty/embree/common/lexers/stringstream.cpp
 +++ b/thirdparty/embree/common/lexers/stringstream.cpp
-@@ -39,7 +39,10 @@ namespace embree
+@@ -39,7 +39,7 @@ namespace embree
      std::vector<char> str; str.reserve(64);
      while (cin->peek() != EOF && !isSeparator(cin->peek())) {
        int c = cin->get();
 -      if (!isValidChar(c)) throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
-+      //if (!isValidChar(c)) throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
-+      if (!isValidChar(c)) {
-+        abort();
-+      }
++      if (!isValidChar(c)) abort(); //throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
        str.push_back((char)c);
      }
      str.push_back(0);
 diff --git a/thirdparty/embree/common/sys/alloc.cpp b/thirdparty/embree/common/sys/alloc.cpp
-index de225fafc6..8e83646031 100644
+index c92bb99ece..2288df76ef 100644
 --- a/thirdparty/embree/common/sys/alloc.cpp
 +++ b/thirdparty/embree/common/sys/alloc.cpp
-@@ -24,16 +24,28 @@ namespace embree
-   
-   void enableUSMAllocEmbree(sycl::context* context, sycl::device* device)
-   {
--    if (tls_context_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
--    if (tls_device_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
-+    //if (tls_context_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
-+    //if (tls_device_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
-+    if (tls_context_embree != nullptr) {
-+      abort();
-+    }
-+    if (tls_device_embree != nullptr) {
-+      abort();
-+    }
-     tls_context_embree = context;
-     tls_device_embree = device;
-   }
- 
-   void disableUSMAllocEmbree()
-   {
--    if (tls_context_embree  == nullptr) throw std::runtime_error("USM allocation not enabled");
--    if (tls_device_embree  == nullptr) throw std::runtime_error("USM allocation not enabled");
-+    //if (tls_context_embree  == nullptr) throw std::runtime_error("USM allocation not enabled");
-+    //if (tls_device_embree  == nullptr) throw std::runtime_error("USM allocation not enabled");
-+    if (tls_context_embree  == nullptr) {
-+      abort();
-+    }
-+    if (tls_device_embree  == nullptr) {
-+      abort();
-+    }
-     tls_context_embree = nullptr;
-     tls_device_embree = nullptr;
-   }
-@@ -48,8 +60,14 @@ namespace embree
- 
-   void disableUSMAllocTutorial()
-   {
--    if (tls_context_tutorial  == nullptr) throw std::runtime_error("USM allocation not enabled");
--    if (tls_device_tutorial  == nullptr) throw std::runtime_error("USM allocation not enabled");
-+    //if (tls_context_tutorial  == nullptr) throw std::runtime_error("USM allocation not enabled");
-+    //if (tls_device_tutorial  == nullptr) throw std::runtime_error("USM allocation not enabled");
-+    if (tls_context_tutorial  == nullptr) {
-+      abort();
-+    }
-+    if (tls_device_tutorial  == nullptr) {
-+      abort();
-+    }
-     
-     tls_context_tutorial = nullptr;
-     tls_device_tutorial = nullptr;
-@@ -64,8 +82,11 @@ namespace embree
- 
+@@ -20,7 +20,7 @@ namespace embree
      assert((align & (align-1)) == 0);
      void* ptr = _mm_malloc(size,align);
--    if (size != 0 && ptr == nullptr)
+     if (size != 0 && ptr == nullptr)
 -      throw std::bad_alloc();
-+    //if (size != 0 && ptr == nullptr)
-+    //  throw std::bad_alloc();
-+    if (size != 0 && ptr == nullptr) {
-+      abort();
-+    }
++      abort(); //throw std::bad_alloc();
      return ptr;
    }
  
-@@ -94,8 +115,11 @@ namespace embree
-     else
+@@ -50,7 +50,7 @@ namespace embree
        ptr = sycl::aligned_alloc_shared(align,size,*device,*context);
-       
--    if (size != 0 && ptr == nullptr)
+ 
+     if (size != 0 && ptr == nullptr)
 -      throw std::bad_alloc();
-+    //if (size != 0 && ptr == nullptr)
-+    //  throw std::bad_alloc();
-+    if (size != 0 && ptr == nullptr) {
-+      abort();
-+    }
++      abort(); //throw std::bad_alloc();
  
      return ptr;
    }
-@@ -241,7 +265,10 @@ namespace embree
+@@ -83,7 +83,7 @@ namespace embree
+     }
+ 
+     if (size != 0 && ptr == nullptr)
+-      throw std::bad_alloc();
++      abort(); //throw std::bad_alloc();
+ 
+     return ptr;
+   }
+@@ -199,7 +199,7 @@ namespace embree
      /* fall back to 4k pages */
      int flags = MEM_COMMIT | MEM_RESERVE;
      char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE);
 -    if (ptr == nullptr) throw std::bad_alloc();
-+    //if (ptr == nullptr) throw std::bad_alloc();
-+    if (ptr == nullptr) {
-+      abort();
-+    }
++    if (ptr == nullptr) abort(); //throw std::bad_alloc();
      hugepages = false;
      return ptr;
    }
-@@ -257,8 +284,11 @@ namespace embree
-     if (bytesNew >= bytesOld)
+@@ -216,7 +216,7 @@ namespace embree
        return bytesOld;
  
--    if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
+     if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
 -      throw std::bad_alloc();
-+    //if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
-+    //  throw std::bad_alloc();
-+    if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT)) {
-+      abort();
-+    }
++      abort(); //throw std::bad_alloc();
  
      return bytesNew;
    }
-@@ -268,8 +298,11 @@ namespace embree
-     if (bytes == 0) 
+@@ -227,7 +227,7 @@ namespace embree
        return;
  
--    if (!VirtualFree(ptr,0,MEM_RELEASE))
+     if (!VirtualFree(ptr,0,MEM_RELEASE))
 -      throw std::bad_alloc();
-+    //if (!VirtualFree(ptr,0,MEM_RELEASE))
-+    //  throw std::bad_alloc();
-+    if (!VirtualFree(ptr,0,MEM_RELEASE)) {
-+      abort();
-+    }
++      abort(); //throw std::bad_alloc();
    }
  
    void os_advise(void *ptr, size_t bytes)
-@@ -373,7 +406,10 @@ namespace embree
+@@ -331,7 +331,7 @@ namespace embree
  
      /* fallback to 4k pages */
      void* ptr = (char*) mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
 -    if (ptr == MAP_FAILED) throw std::bad_alloc();
-+    //if (ptr == MAP_FAILED) throw std::bad_alloc();
-+    if (ptr == MAP_FAILED) {
-+      abort();
-+    }
++    if (ptr == MAP_FAILED) abort(); //throw std::bad_alloc();
      hugepages = false;
  
      /* advise huge page hint for THP */
-@@ -389,8 +425,11 @@ namespace embree
-     if (bytesNew >= bytesOld)
+@@ -348,7 +348,7 @@ namespace embree
        return bytesOld;
  
--    if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
+     if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
 -      throw std::bad_alloc();
-+    //if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
-+    //  throw std::bad_alloc();
-+    if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1) {
-+      abort();
-+    }
++      abort(); //throw std::bad_alloc();
  
      return bytesNew;
    }
-@@ -403,8 +442,11 @@ namespace embree
-     /* for hugepages we need to also align the size */
+@@ -362,7 +362,7 @@ namespace embree
      const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K;
      bytes = (bytes+pageSize-1) & ~(pageSize-1);
--    if (munmap(ptr,bytes) == -1)
+     if (munmap(ptr,bytes) == -1)
 -      throw std::bad_alloc();
-+    //if (munmap(ptr,bytes) == -1)
-+    //  throw std::bad_alloc();
-+    if (munmap(ptr,bytes) == -1) {
-+      abort();
-+    }
++      abort(); //throw std::bad_alloc();
    }
  
    /* hint for transparent huge pages (THP) */
 diff --git a/thirdparty/embree/common/sys/alloc.h b/thirdparty/embree/common/sys/alloc.h
-index e19c2c221a..e2c942049a 100644
+index 5c63d0bfaf..1f6f230ed3 100644
 --- a/thirdparty/embree/common/sys/alloc.h
 +++ b/thirdparty/embree/common/sys/alloc.h
-@@ -160,7 +160,8 @@ namespace embree
+@@ -131,7 +131,7 @@ namespace embree
        typedef std::ptrdiff_t difference_type;
  
        __forceinline pointer allocate( size_type n ) {
 -        throw std::runtime_error("no allocation supported");
-+        //throw std::runtime_error("no allocation supported");
-+        abort();
++        abort(); //throw std::runtime_error("no allocation supported");
        }
  
        __forceinline void deallocate( pointer p, size_type n ) {
 diff --git a/thirdparty/embree/common/sys/platform.h b/thirdparty/embree/common/sys/platform.h
-index 6dc0cf3318..9f08cd1516 100644
+index 6dc0cf3318..1e5b02550e 100644
 --- a/thirdparty/embree/common/sys/platform.h
 +++ b/thirdparty/embree/common/sys/platform.h
-@@ -213,11 +213,15 @@
- #define UPRINT4(x,y,z,w) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl
+@@ -214,10 +214,11 @@
  
  #if defined(DEBUG) // only report file and line in debug mode
-+  //#define THROW_RUNTIME_ERROR(str) \
-+  //  throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
    #define THROW_RUNTIME_ERROR(str) \
 -    throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
 +    printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort();
++    //throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
  #else
-+  //#define THROW_RUNTIME_ERROR(str) \
-+  //  throw std::runtime_error(str);
    #define THROW_RUNTIME_ERROR(str) \
 -    throw std::runtime_error(str);
-+    abort();
++    abort(); //throw std::runtime_error(str);
  #endif
  
  #define FATAL(x)   THROW_RUNTIME_ERROR(x)
@@ -261,122 +176,81 @@ index 83ead95122..e89ae04f8b 100644
        add_dependencies(-1);
      }
 diff --git a/thirdparty/embree/common/tasking/taskschedulerinternal.h b/thirdparty/embree/common/tasking/taskschedulerinternal.h
-index b01bebf7c3..4a04323b80 100644
+index b01bebf7c3..d4e0c7386b 100644
 --- a/thirdparty/embree/common/tasking/taskschedulerinternal.h
 +++ b/thirdparty/embree/common/tasking/taskschedulerinternal.h
-@@ -130,8 +130,11 @@ namespace embree
-       __forceinline void* alloc(size_t bytes, size_t align = 64)
+@@ -131,7 +131,7 @@ namespace embree
        {
          size_t ofs = bytes + ((align - stackPtr) & (align-1));
--        if (stackPtr + ofs > CLOSURE_STACK_SIZE)
+         if (stackPtr + ofs > CLOSURE_STACK_SIZE)
 -          throw std::runtime_error("closure stack overflow");
-+        //if (stackPtr + ofs > CLOSURE_STACK_SIZE)
-+        //  throw std::runtime_error("closure stack overflow");
-+        if (stackPtr + ofs > CLOSURE_STACK_SIZE) {
-+          abort();
-+        }
++          abort(); //throw std::runtime_error("closure stack overflow");
          stackPtr += ofs;
          return &stack[stackPtr-bytes];
        }
-@@ -139,8 +142,11 @@ namespace embree
-       template<typename Closure>
+@@ -140,7 +140,7 @@ namespace embree
        __forceinline void push_right(Thread& thread, const size_t size, const Closure& closure, TaskGroupContext* context)
        {
--        if (right >= TASK_STACK_SIZE)
+         if (right >= TASK_STACK_SIZE)
 -          throw std::runtime_error("task stack overflow");
-+        //if (right >= TASK_STACK_SIZE)
-+        //  throw std::runtime_error("task stack overflow");
-+        if (right >= TASK_STACK_SIZE) {
-+          abort();
-+        }
++          abort(); //throw std::runtime_error("task stack overflow");
  
  	/* allocate new task on right side of stack */
          size_t oldStackPtr = stackPtr;
 diff --git a/thirdparty/embree/kernels/bvh/bvh_statistics.cpp b/thirdparty/embree/kernels/bvh/bvh_statistics.cpp
-index 40f9043736..7ea9736c5c 100644
+index 40f9043736..f3b93e5925 100644
 --- a/thirdparty/embree/kernels/bvh/bvh_statistics.cpp
 +++ b/thirdparty/embree/kernels/bvh/bvh_statistics.cpp
-@@ -150,7 +150,8 @@ namespace embree
+@@ -150,7 +150,7 @@ namespace embree
        }
      }
      else {
 -      throw std::runtime_error("not supported node type in bvh_statistics");
-+      //throw std::runtime_error("not supported node type in bvh_statistics");
-+      abort();
++      abort(); //throw std::runtime_error("not supported node type in bvh_statistics");
      }
      return s;
    } 
 diff --git a/thirdparty/embree/kernels/common/alloc.h b/thirdparty/embree/kernels/common/alloc.h
-index 2bd292de4d..8ac22e53ec 100644
+index 2bd292de4d..10f629a244 100644
 --- a/thirdparty/embree/kernels/common/alloc.h
 +++ b/thirdparty/embree/kernels/common/alloc.h
-@@ -189,8 +189,11 @@ namespace embree
-       , atype(osAllocation ? EMBREE_OS_MALLOC : ALIGNED_MALLOC)
+@@ -190,7 +190,7 @@ namespace embree
        , primrefarray(device,0)
      {
--      if (osAllocation && useUSM)
+       if (osAllocation && useUSM)
 -        throw std::runtime_error("USM allocation cannot be combined with OS allocation.");
-+      //if (osAllocation && useUSM)
-+      //  throw std::runtime_error("USM allocation cannot be combined with OS allocation.");
-+      if (osAllocation && useUSM) {
-+        abort();
-+      }
++        abort(); //throw std::runtime_error("USM allocation cannot be combined with OS allocation.");
  
        for (size_t i=0; i<MAX_THREAD_USED_BLOCK_SLOTS; i++)
        {
-@@ -502,8 +505,11 @@ namespace embree
-         Block* myUsedBlocks = threadUsedBlocks[slot];
+@@ -503,7 +503,7 @@ namespace embree
          if (myUsedBlocks) {
            void* ptr = myUsedBlocks->malloc(device,bytes,align,partial);
--          if (ptr == nullptr && !blockAllocation)
+           if (ptr == nullptr && !blockAllocation)
 -            throw std::bad_alloc();
-+          //if (ptr == nullptr && !blockAllocation)
-+          //  throw std::bad_alloc();
-+          if (ptr == nullptr && !blockAllocation) {
-+            abort();
-+          }
++            abort(); //throw std::bad_alloc();
            if (ptr) return ptr;
          }
  
 diff --git a/thirdparty/embree/kernels/common/rtcore.cpp b/thirdparty/embree/kernels/common/rtcore.cpp
-index 8dc5d7045b..e19c243bf6 100644
+index 8da5c9d86c..a34eb2a0c4 100644
 --- a/thirdparty/embree/kernels/common/rtcore.cpp
 +++ b/thirdparty/embree/kernels/common/rtcore.cpp
-@@ -257,10 +257,15 @@ RTC_NAMESPACE_BEGIN;
-     RTC_TRACE(rtcSetSceneBuildQuality);
-     RTC_VERIFY_HANDLE(hscene);
-     RTC_ENTER_DEVICE(hscene);
-+    //if (quality != RTC_BUILD_QUALITY_LOW &&
-+    //    quality != RTC_BUILD_QUALITY_MEDIUM &&
-+    //    quality != RTC_BUILD_QUALITY_HIGH)
-+    //  throw std::runtime_error("invalid build quality");
+@@ -358,7 +358,7 @@ RTC_NAMESPACE_BEGIN;
      if (quality != RTC_BUILD_QUALITY_LOW &&
          quality != RTC_BUILD_QUALITY_MEDIUM &&
--        quality != RTC_BUILD_QUALITY_HIGH)
+         quality != RTC_BUILD_QUALITY_HIGH)
 -      throw std::runtime_error("invalid build quality");
-+        quality != RTC_BUILD_QUALITY_HIGH) {
-+      abort();
-+    }
++      abort(); //throw std::runtime_error("invalid build quality");
      scene->setBuildQuality(quality);
      RTC_CATCH_END2(scene);
    }
-@@ -1563,11 +1568,17 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
-     RTC_TRACE(rtcSetGeometryBuildQuality);
-     RTC_VERIFY_HANDLE(hgeometry);
-     RTC_ENTER_DEVICE(hgeometry);
-+    //if (quality != RTC_BUILD_QUALITY_LOW &&
-+    //    quality != RTC_BUILD_QUALITY_MEDIUM &&
-+    //    quality != RTC_BUILD_QUALITY_HIGH &&
-+    //    quality != RTC_BUILD_QUALITY_REFIT)
-+    //  throw std::runtime_error("invalid build quality");
-     if (quality != RTC_BUILD_QUALITY_LOW &&
+@@ -1831,7 +1831,7 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
          quality != RTC_BUILD_QUALITY_MEDIUM &&
          quality != RTC_BUILD_QUALITY_HIGH &&
--        quality != RTC_BUILD_QUALITY_REFIT)
+         quality != RTC_BUILD_QUALITY_REFIT)
 -      throw std::runtime_error("invalid build quality");
-+        quality != RTC_BUILD_QUALITY_REFIT) {
-+      abort();
-+    }
++      abort(); //throw std::runtime_error("invalid build quality");
      geometry->setBuildQuality(quality);
      RTC_CATCH_END2(geometry);
    }
@@ -421,10 +295,10 @@ index 73a061de11..cd7a6f4395 100644
  
  #define RTC_BUILD_ARGUMENTS_HAS(settings,member) \
 diff --git a/thirdparty/embree/kernels/common/scene.cpp b/thirdparty/embree/kernels/common/scene.cpp
-index fda8dd938a..706cc512df 100644
+index 84a84f8c69..3bfcebd298 100644
 --- a/thirdparty/embree/kernels/common/scene.cpp
 +++ b/thirdparty/embree/kernels/common/scene.cpp
-@@ -894,16 +894,16 @@ namespace embree
+@@ -962,16 +962,16 @@ namespace embree
      }
  
      /* initiate build */
@@ -450,10 +324,10 @@ index fda8dd938a..706cc512df 100644
  
  #endif
 diff --git a/thirdparty/embree/kernels/common/state.cpp b/thirdparty/embree/kernels/common/state.cpp
-index 4e3ab6ddfb..8e83c95bd7 100644
+index cce5eafce1..509dbc7120 100644
 --- a/thirdparty/embree/kernels/common/state.cpp
 +++ b/thirdparty/embree/kernels/common/state.cpp
-@@ -194,13 +194,13 @@ namespace embree
+@@ -199,13 +199,13 @@ namespace embree
    bool State::parseFile(const FileName& fileName)
    { 
      Ref<Stream<int> > file;

+ 4 - 4
thirdparty/embree/patches/0002-godot-config.patch

@@ -1,5 +1,5 @@
 diff --git a/thirdparty/embree/include/embree4/rtcore_config.h b/thirdparty/embree/include/embree4/rtcore_config.h
-index 8abd6954c3..cb3a8678a7 100644
+index eba966989e..91cf129dc6 100644
 --- a/thirdparty/embree/include/embree4/rtcore_config.h
 +++ b/thirdparty/embree/include/embree4/rtcore_config.h
 @@ -4,7 +4,7 @@
@@ -12,8 +12,8 @@ index 8abd6954c3..cb3a8678a7 100644
  
  #define RTC_VERSION_MAJOR 4
 @@ -13,28 +13,28 @@
- #define RTC_VERSION 40301
- #define RTC_VERSION_STRING "4.3.1"
+ #define RTC_VERSION 40400
+ #define RTC_VERSION_STRING "4.4.0"
  
 -#define RTC_MAX_INSTANCE_LEVEL_COUNT @EMBREE_MAX_INSTANCE_LEVEL_COUNT@
 +#define RTC_MAX_INSTANCE_LEVEL_COUNT 1
@@ -47,8 +47,8 @@ index 8abd6954c3..cb3a8678a7 100644
 -#  define RTC_NAMESPACE_USE using namespace @EMBREE_API_NAMESPACE@;
 +#  define RTC_NAMESPACE_USE using namespace;
  #  define RTC_API_EXTERN_C
+ #  define RTC_API_EXTERN_CPP
  #  undef EMBREE_API_NAMESPACE
- #else
 diff --git a/thirdparty/embree/kernels/config.h b/thirdparty/embree/kernels/config.h
 index 1669c4af72..5979b543c9 100644
 --- a/thirdparty/embree/kernels/config.h

+ 4 - 4
thirdparty/embree/patches/0003-emscripten-nthreads.patch

@@ -1,8 +1,8 @@
 diff --git a/thirdparty/embree/common/sys/sysinfo.cpp b/thirdparty/embree/common/sys/sysinfo.cpp
-index 61256b34f2..4583e49b1c 100644
+index 5f375cd95c..3c23fb1610 100644
 --- a/thirdparty/embree/common/sys/sysinfo.cpp
 +++ b/thirdparty/embree/common/sys/sysinfo.cpp
-@@ -646,6 +646,10 @@ namespace embree
+@@ -659,6 +659,10 @@ namespace embree
  
  #if defined(__EMSCRIPTEN__)
  #include <emscripten.h>
@@ -13,7 +13,7 @@ index 61256b34f2..4583e49b1c 100644
  #endif
  
  namespace embree
-@@ -659,6 +663,8 @@ namespace embree
+@@ -672,6 +676,8 @@ namespace embree
      nThreads = sysconf(_SC_NPROCESSORS_ONLN); // does not work in Linux LXC container
      assert(nThreads);
  #elif defined(__EMSCRIPTEN__)
@@ -22,7 +22,7 @@ index 61256b34f2..4583e49b1c 100644
      // WebAssembly supports pthreads, but not pthread_getaffinity_np. Get the number of logical
      // threads from the browser or Node.js using JavaScript.
      nThreads = MAIN_THREAD_EM_ASM_INT({
-@@ -674,6 +680,7 @@ namespace embree
+@@ -687,6 +693,7 @@ namespace embree
              return 1;
          }
      });

+ 0 - 20
thirdparty/embree/patches/0006-include-order-dllexport.patch

@@ -1,20 +0,0 @@
-diff --git a/thirdparty/embree/common/tasking/taskschedulerinternal.h b/thirdparty/embree/common/tasking/taskschedulerinternal.h
-index e72d3b72ba..8e3befb739 100644
---- a/thirdparty/embree/common/tasking/taskschedulerinternal.h
-+++ b/thirdparty/embree/common/tasking/taskschedulerinternal.h
-@@ -3,6 +3,7 @@
- 
- #pragma once
- 
-+#include "../../include/embree4/rtcore.h"
- #include "../sys/platform.h"
- #include "../sys/alloc.h"
- #include "../sys/barrier.h"
-@@ -12,7 +13,6 @@
- #include "../sys/ref.h"
- #include "../sys/atomic.h"
- #include "../math/range.h"
--#include "../../include/embree4/rtcore.h"
- 
- #include <list>
-