Преглед на файлове

added alternative versions for kernel launches and resource acquiring that do not return an event. This improves the performance.

shamanDevel преди 9 години
родител
ревизия
54113f35e0

+ 37 - 1
jme3-core/src/main/java/com/jme3/opencl/Buffer.java

@@ -377,6 +377,29 @@ public abstract class Buffer extends AbstractOpenCLObject {
      * @return the event object
      */
     public abstract Event acquireBufferForSharingAsync(CommandQueue queue);
+    
+    /**
+     * Aquires this buffer object for using. Only call this method if this buffer
+     * represents a shared object from OpenGL, created with e.g.
+     * {@link Context#bindVertexBuffer(com.jme3.scene.VertexBuffer, com.jme3.opencl.MemoryAccess) }.
+     * This method must be called before the buffer is used. After the work is
+     * done, the buffer must be released by calling
+     * {@link #releaseBufferForSharingAsync(com.jme3.opencl.CommandQueue) }
+     * so that OpenGL can use the VertexBuffer again.
+     * 
+     * The generated event object is directly released.
+     * This brings a performance improvement when the resource is e.g. directly
+     * used by a kernel afterwards on the same queue (this implicitly waits for
+     * this action). If you need the event, use 
+     * {@link #acquireBufferForSharingAsync(com.jme3.opencl.CommandQueue) } instead.
+     * 
+     * @param queue the command queue
+     */
+    public void acquireBufferForSharingNoEvent(CommandQueue queue) {
+        //default implementation, overwrite for better performance
+        acquireBufferForSharingAsync(queue).release();
+    }
+    
     /**
      * Releases a shared buffer object.
      * Call this method after the buffer object was acquired by
@@ -387,5 +410,18 @@ public abstract class Buffer extends AbstractOpenCLObject {
      */
     public abstract Event releaseBufferForSharingAsync(CommandQueue queue);
     
-    //TODO: add variants of the above two methods that don't create the event object, but release the event immediately
+    /**
+     * Releases a shared buffer object.
+     * Call this method after the buffer object was acquired by
+     * {@link #acquireBufferForSharingAsync(com.jme3.opencl.CommandQueue) }
+     * to hand the control back to OpenGL.
+     * The generated event object is directly released, resulting in 
+     * performance improvements.
+     * @param queue the command queue
+     */
+    public void releaseBufferForSharingNoEvent(CommandQueue queue) {
+        //default implementation, overwrite for better performance
+        releaseBufferForSharingAsync(queue).release();
+    }
+    
 }

+ 38 - 0
jme3-core/src/main/java/com/jme3/opencl/Image.java

@@ -485,6 +485,30 @@ memory layout in which channels are stored in the image.
      * @return the event object
      */
     public abstract Event acquireImageForSharingAsync(CommandQueue queue);
+    
+    /**
+     * Aquires this image object for using. Only call this method if this image
+     * represents a shared object from OpenGL, created with e.g.
+     * {@link Context#bindImage(com.jme3.texture.Image, com.jme3.texture.Texture.Type, int, com.jme3.opencl.MemoryAccess) }
+     * or variations.
+     * This method must be called before the image is used. After the work is
+     * done, the image must be released by calling
+     * {@link #releaseImageForSharingAsync(com.jme3.opencl.CommandQueue)  }
+     * so that OpenGL can use the image/texture/renderbuffer again.
+     * 
+     * The generated event object is directly released.
+     * This brings a performance improvement when the resource is e.g. directly
+     * used by a kernel afterwards on the same queue (this implicitly waits for
+     * this action). If you need the event, use 
+     * {@link #acquireImageForSharingAsync(com.jme3.opencl.CommandQueue) }.
+     * 
+     * @param queue the command queue
+     */
+    public void acquireImageForSharingNoEvent(CommandQueue queue) {
+        //Default implementation, overwrite for performance
+        acquireImageForSharingAsync(queue).release();
+    }
+    
     /**
      * Releases a shared image object.
      * Call this method after the image object was acquired by
@@ -495,5 +519,19 @@ memory layout in which channels are stored in the image.
      */
     public abstract Event releaseImageForSharingAsync(CommandQueue queue);
     
+    /**
+     * Releases a shared image object.
+     * Call this method after the image object was acquired by
+     * {@link #acquireImageForSharingAsync(com.jme3.opencl.CommandQueue) }
+     * to hand the control back to OpenGL.
+     * The generated event object is directly released, resulting in 
+     * performance improvements.
+     * @param queue the command queue
+     */
+    public void releaseImageForSharingNoEvent(CommandQueue queue) {
+        //default implementation, overwrite it for performance improvements
+        releaseImageForSharingAsync(queue).release();
+    }
+    
     //TODO: add variants of the above two methods that don't create the event object, but release the event immediately
 }

+ 60 - 2
jme3-core/src/main/java/com/jme3/opencl/Kernel.java

@@ -299,6 +299,9 @@ public abstract class Kernel extends AbstractOpenCLObject {
     /**
      * Launches the kernel with the current global work size, work group size
      * and arguments.
+     * If the returned event object is not needed and would otherwise be
+     * released immediately, {@link #RunNoEvent(com.jme3.opencl.CommandQueue) }
+     * might bring a better performance.
      * @param queue the command queue
      * @return an event object indicating when the kernel is finished
      * @see #setGlobalWorkSize(com.jme3.opencl.Kernel.WorkSize) 
@@ -306,6 +309,22 @@ public abstract class Kernel extends AbstractOpenCLObject {
      * @see #setArg(int, java.lang.Object) 
      */
     public abstract Event Run(CommandQueue queue);
+    
+    /**
+     * Launches the kernel with the current global work size, work group size
+     * and arguments without returning an event object.
+     * The generated event is directly released. Therefore, the performance
+     * is better, but there is no way to detect when the kernel execution
+     * has finished. For this purpose, use {@link #Run(com.jme3.opencl.CommandQueue) }.
+     * @param queue the command queue
+     * @see #setGlobalWorkSize(com.jme3.opencl.Kernel.WorkSize) 
+     * @see #setWorkGroupSize(com.jme3.opencl.Kernel.WorkSize) 
+     * @see #setArg(int, java.lang.Object) 
+     */
+    public void RunNoEvent(CommandQueue queue) {
+        //Default implementation, overwrite to not allocate the event object
+        Run(queue).release();
+    }
 
     /**
      * Sets the work sizes and arguments in one call and launches the kernel.
@@ -325,6 +344,28 @@ public abstract class Kernel extends AbstractOpenCLObject {
         setArgs(args);
         return Run(queue);
     }
+    
+    /**
+     * Sets the work sizes and arguments in one call and launches the kernel.
+     * The global work size is set to the specified size. The work group
+     * size is automatically determined by the driver.
+     * Each object in the argument array is sent to the kernel by
+     * {@link #setArg(int, java.lang.Object) }.
+     * The generated event is directly released. Therefore, the performance
+     * is better, but there is no way to detect when the kernel execution
+     * has finished. For this purpose, use 
+     * {@link #Run1(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...) }.
+     * @param queue the command queue
+     * @param globalWorkSize the global work size
+     * @param args the kernel arguments
+     * @see #Run2(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...) 
+     */
+    public void Run1NoEvent(CommandQueue queue, WorkSize globalWorkSize, Object... args) {
+        setGlobalWorkSize(globalWorkSize);
+        setWorkGroupSizeToNull();
+        setArgs(args);
+        RunNoEvent(queue);
+    }
 
     /**
      * Sets the work sizes and arguments in one call and launches the kernel.
@@ -342,8 +383,25 @@ public abstract class Kernel extends AbstractOpenCLObject {
         return Run(queue);
     }
 
-    //TODO: add variants of the above three methods that don't create the event object, but release the event immediately
-
+    /**
+     * Sets the work sizes and arguments in one call and launches the kernel.
+     * The generated event is directly released. Therefore, the performance
+     * is better, but there is no way to detect when the kernel execution
+     * has finished. For this purpose, use 
+     * {@link #Run2(com.jme3.opencl.CommandQueue, com.jme3.opencl.Kernel.WorkSize, com.jme3.opencl.Kernel.WorkSize, java.lang.Object...) }.
+     * @param queue the command queue
+     * @param globalWorkSize the global work size
+     * @param workGroupSize the work group size
+     * @param args the kernel arguments
+     */
+    public void Run2NoEvent(CommandQueue queue, WorkSize globalWorkSize,
+            WorkSize workGroupSize, Object... args) {
+        setGlobalWorkSize(globalWorkSize);
+        setWorkGroupSize(workGroupSize);
+        setArgs(args);
+        RunNoEvent(queue);
+    }
+    
     /**
      * A placeholder for kernel arguments representing local kernel memory.
      * This defines the size of available shared memory of a {@code __shared} kernel

+ 14 - 0
jme3-core/src/main/java/com/jme3/opencl/package-info.java

@@ -89,6 +89,16 @@
  * These async calls all return {@link com.jme3.opencl.Event} objects.
  * These events can be used to check (non-blocking) if the action has completed, e.g. a memory copy
  * is finished, or to block the execution until the action has finished.
+ * <br>
+ * Some methods have the suffix {@code -NoEvent}. This means that these methods
+ * don't return an event object even if the OpenCL function would return an event.
+ * There exists always an alternative version that does return an event.
+ * These methods exist to increase the performance: since all actions (like multiple kernel calls)
+ * that are sent to the same command queue are executed in order, there is no
+ * need for intermediate events. (These intermediate events would be released
+ * immediately). Therefore, the no-event alternatives increase the performance
+ * because no additional event object has to be allocated and less system calls
+ * are neccessary.
  * 
  * <p>
  * <b>Interoperability between OpenCL and jME3:</b><br>
@@ -142,6 +152,10 @@
  * thrown. The exception always records the error code and error name and the 
  * OpenCL function call where the error was detected. Please check the official
  * OpenCL specification for the meanings of these errors for that particular function.</li>
+ * <li>{@code UnsupportedOperationException}: the OpenCL implementation does not
+ * support some operations. This is currently only an issue for Jogamp's Jogl
+ * renderer, since Jocl only supports OpenCL 1.1. LWJGL has full support for
+ * OpenCL 1.2 and 2.0.
  * </ul>
  */
 package com.jme3.opencl;

+ 3 - 3
jme3-examples/src/main/java/jme3test/opencl/TestVertexBufferSharing.java

@@ -135,15 +135,15 @@ public class TestVertexBufferSharing extends SimpleApplication {
         time += tpf;
         
         //aquire resource
-        buffer.acquireBufferForSharingAsync(clQueue).release();
+        buffer.acquireBufferForSharingNoEvent(clQueue);
         //no need to wait for the returned event, since the kernel implicitely waits for it (same command queue)
         
         //execute kernel
         float scale = (float) Math.pow(1.1, (1.0 - time%2) / 16.0);
-        kernel.Run1(clQueue, ws, buffer, scale).release();
+        kernel.Run1NoEvent(clQueue, ws, buffer, scale);
         
         //release resource
-        buffer.releaseBufferForSharingAsync(clQueue).release();
+        buffer.releaseBufferForSharingNoEvent(clQueue);
     }
 
 }

+ 4 - 4
jme3-examples/src/main/java/jme3test/opencl/TestWriteToTexture.java

@@ -136,15 +136,15 @@ public class TestWriteToTexture extends SimpleApplication implements AnalogListe
     }
     private void updateOpenCL(float tpf) {
         //aquire resource
-        texCL.acquireImageForSharingAsync(clQueue).release();
+        texCL.acquireImageForSharingNoEvent(clQueue);
         //no need to wait for the returned event, since the kernel implicitely waits for it (same command queue)
         
         //execute kernel
-        kernel.Run1(clQueue, new com.jme3.opencl.Kernel.WorkSize(settings.getWidth(), settings.getHeight()), texCL, C, 16)
-                .release();
+        Kernel.WorkSize ws = new Kernel.WorkSize(settings.getWidth(), settings.getHeight());
+        kernel.Run1NoEvent(clQueue, ws, texCL, C, 16);
         
         //release resource
-        texCL.releaseImageForSharingAsync(clQueue).release();
+        texCL.releaseImageForSharingNoEvent(clQueue);
     }
 
     @Override

+ 14 - 0
jme3-jogl/src/main/java/com/jme3/opencl/jocl/JoclBuffer.java

@@ -191,6 +191,13 @@ public class JoclBuffer extends Buffer {
         long event = Utils.pointers[0].get(0);
         return new JoclEvent(event);
     }
+    @Override
+    public void acquireBufferForSharingNoEvent(CommandQueue queue) {
+        Utils.pointers[1].rewind();
+        Utils.pointers[1].put(0, id);
+        long q = ((JoclCommandQueue)queue).id;
+        ((CLGL) cl).clEnqueueAcquireGLObjects(q, 1, Utils.pointers[1], 0, null, null);
+    }
 
     @Override
     public Event releaseBufferForSharingAsync(CommandQueue queue) {
@@ -202,6 +209,13 @@ public class JoclBuffer extends Buffer {
         long event = Utils.pointers[0].get(0);
         return new JoclEvent(event);
     }
+    @Override
+    public void releaseBufferForSharingNoEvent(CommandQueue queue) {
+        Utils.pointers[1].rewind();
+        Utils.pointers[1].put(0, id);
+        long q = ((JoclCommandQueue)queue).id;
+        ((CLGL) cl).clEnqueueReleaseGLObjects(q, 1, Utils.pointers[1], 0, null, null);
+    }
 
     private static class ReleaserImpl implements ObjectReleaser {
         private long mem;

+ 14 - 0
jme3-jogl/src/main/java/com/jme3/opencl/jocl/JoclImage.java

@@ -502,6 +502,13 @@ public class JoclImage extends Image {
         return new JoclEvent(event);
     }
     @Override
+    public void acquireImageForSharingNoEvent(CommandQueue queue) {
+        Utils.pointers[1].rewind();
+        Utils.pointers[1].put(0, id);
+        long q = ((JoclCommandQueue)queue).id;
+        ((CLGL) cl).clEnqueueAcquireGLObjects(q, 1, Utils.pointers[1], 0, null, null);
+    }
+    @Override
     public Event releaseImageForSharingAsync(CommandQueue queue) {
         Utils.pointers[0].rewind();
         Utils.pointers[1].rewind();
@@ -511,6 +518,13 @@ public class JoclImage extends Image {
         long event = Utils.pointers[0].get(0);
         return new JoclEvent(event);
     }
+    @Override
+    public void releaseImageForSharingNoEvent(CommandQueue queue) {
+        Utils.pointers[1].rewind();
+        Utils.pointers[1].put(0, id);
+        long q = ((JoclCommandQueue)queue).id;
+        ((CLGL) cl).clEnqueueReleaseGLObjects(q, 1, Utils.pointers[1], 0, null, null);
+    }
     
     private static class ReleaserImpl implements ObjectReleaser {
         private long mem;

+ 18 - 0
jme3-jogl/src/main/java/com/jme3/opencl/jocl/JoclKernel.java

@@ -239,6 +239,24 @@ public class JoclKernel extends Kernel {
         Utils.checkError(ret, "clEnqueueNDRangeKernel");
         return new JoclEvent(Utils.pointers[0].get(0));
     }
+    
+    @Override
+    public void RunNoEvent(CommandQueue queue) {
+        Utils.pointers[1].rewind();
+        Utils.pointers[1].put(globalWorkSize.getSizes(), 0, globalWorkSize.getSizes().length);
+        Utils.pointers[1].position(0);
+        PointerBuffer p2 = null;
+        if (workGroupSize.getSizes()[0] > 0) {
+            p2 = Utils.pointers[2].rewind();
+            p2.put(workGroupSize.getSizes(), 0, workGroupSize.getSizes().length);
+            p2.position(0);
+        }
+        long q = ((JoclCommandQueue) queue).id;
+        int ret = cl.clEnqueueNDRangeKernel(q, kernel,
+			globalWorkSize.getDimension(), null, Utils.pointers[1],
+			p2, 0, null, null);
+        Utils.checkError(ret, "clEnqueueNDRangeKernel");
+    }
 
     private static class ReleaserImpl implements ObjectReleaser {
         private long kernel;

+ 12 - 0
jme3-lwjgl/src/main/java/com/jme3/opencl/lwjgl/LwjglBuffer.java

@@ -194,6 +194,12 @@ public class LwjglBuffer extends Buffer {
         long event = Utils.pointerBuffers[0].get(0);
         return new LwjglEvent(q.getCLEvent(event));
     }
+    @Override
+    public void acquireBufferForSharingNoEvent(CommandQueue queue) {
+        CLCommandQueue q = ((LwjglCommandQueue) queue).getQueue();
+        int ret = CL10GL.clEnqueueAcquireGLObjects(q, buffer, null, null);
+        Utils.checkError(ret, "clEnqueueAcquireGLObjects");
+    }
 
     @Override
     public Event releaseBufferForSharingAsync(CommandQueue queue) {
@@ -204,6 +210,12 @@ public class LwjglBuffer extends Buffer {
         long event = Utils.pointerBuffers[0].get(0);
         return new LwjglEvent(q.getCLEvent(event));
     }
+    @Override
+    public void releaseBufferForSharingNoEvent(CommandQueue queue) {
+        CLCommandQueue q = ((LwjglCommandQueue) queue).getQueue();
+        int ret = CL10GL.clEnqueueReleaseGLObjects(q, buffer, null, null);
+        Utils.checkError(ret, "clEnqueueReleaseGLObjects");
+    }
 
     private static class ReleaserImpl implements ObjectReleaser {
         private CLMem mem;

+ 13 - 0
jme3-lwjgl/src/main/java/com/jme3/opencl/lwjgl/LwjglImage.java

@@ -534,6 +534,13 @@ public class LwjglImage extends Image {
         long event = Utils.pointerBuffers[0].get(0);
         return new LwjglEvent(q.getCLEvent(event));
     }
+    @Override
+    public void acquireImageForSharingNoEvent(CommandQueue queue) {
+        CLCommandQueue q = ((LwjglCommandQueue) queue).getQueue();
+        int ret = CL10GL.clEnqueueAcquireGLObjects(q, image, null, null);
+        Utils.checkError(ret, "clEnqueueAcquireGLObjects");
+    }
+    @Override
     public Event releaseImageForSharingAsync(CommandQueue queue) {
         Utils.pointerBuffers[0].rewind();
         CLCommandQueue q = ((LwjglCommandQueue) queue).getQueue();
@@ -542,6 +549,12 @@ public class LwjglImage extends Image {
         long event = Utils.pointerBuffers[0].get(0);
         return new LwjglEvent(q.getCLEvent(event));
     }
+    @Override
+    public void releaseImageForSharingNoEvent(CommandQueue queue) {
+        CLCommandQueue q = ((LwjglCommandQueue) queue).getQueue();
+        int ret = CL10GL.clEnqueueReleaseGLObjects(q, image, null, null);
+        Utils.checkError(ret, "clEnqueueReleaseGLObjects");
+    }
     
     private static class ReleaserImpl implements ObjectReleaser {
         private CLMem mem;

+ 17 - 0
jme3-lwjgl/src/main/java/com/jme3/opencl/lwjgl/LwjglKernel.java

@@ -223,6 +223,23 @@ public class LwjglKernel extends Kernel {
         Utils.checkError(ret, "clEnqueueNDRangeKernel");
         return new LwjglEvent(q.getCLEvent(Utils.pointerBuffers[0].get(0)));
     }
+    @Override
+    public void RunNoEvent(CommandQueue queue) {
+        Utils.pointerBuffers[1].rewind();
+        Utils.pointerBuffers[1].put(globalWorkSize.getSizes());
+        Utils.pointerBuffers[1].position(0);
+        PointerBuffer p2 = null;
+        if (workGroupSize.getSizes()[0] > 0) {
+            p2 = Utils.pointerBuffers[2].rewind();
+            p2.put(workGroupSize.getSizes());
+            p2.position(0);
+        }
+        CLCommandQueue q = ((LwjglCommandQueue) queue).getQueue();
+        int ret = CL10.clEnqueueNDRangeKernel(q, kernel,
+			globalWorkSize.getDimension(), null, Utils.pointerBuffers[1],
+			p2, null, null);
+        Utils.checkError(ret, "clEnqueueNDRangeKernel");
+    }
 
     @Override
     public ObjectReleaser getReleaser() {