diff options
-rw-r--r-- | src/com/jogamp/opencl/CLKernel.java | 22 | ||||
-rw-r--r-- | test/com/jogamp/opencl/CLProgramTest.java | 46 |
2 files changed, 68 insertions, 0 deletions
diff --git a/src/com/jogamp/opencl/CLKernel.java b/src/com/jogamp/opencl/CLKernel.java index c3031ae..4eeb5af 100644 --- a/src/com/jogamp/opencl/CLKernel.java +++ b/src/com/jogamp/opencl/CLKernel.java @@ -315,6 +315,7 @@ public class CLKernel extends CLObjectResource implements Cloneable { * qualifier and whose size is specified with clSetKernelArg. * If the local memory size, for any pointer argument to the kernel declared with * the <code>__local</code> address qualifier, is not specified, its size is assumed to be 0. + * @version 1.0 */ public long getLocalMemorySize(final CLDevice device) { return getWorkGroupInfo(device, CL_KERNEL_LOCAL_MEM_SIZE); @@ -326,6 +327,7 @@ public class CLKernel extends CLObjectResource implements Cloneable { * that can be used to execute a kernel on a specific device given by device. * The OpenCL implementation uses the resource requirements of the kernel * (register usage etc.) to determine what this work-group size should be. + * @version 1.0 */ public long getWorkGroupSize(final CLDevice device) { return getWorkGroupInfo(device, CL_KERNEL_WORK_GROUP_SIZE); @@ -335,6 +337,7 @@ public class CLKernel extends CLObjectResource implements Cloneable { * Returns the work-group size specified by the <code>__attribute__((reqd_work_group_size(X, Y, Z)))</code> qualifier in kernel sources. * If the work-group size is not specified using the above attribute qualifier <code>new long[]{(0, 0, 0)}</code> is returned. * The returned array has always three elements. + * @version 1.0 */ public long[] getCompileWorkGroupSize(final CLDevice device) { final int ret = binding.clGetKernelWorkGroupInfo(ID, device.ID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, (is32Bit()?4:8)*3, buffer, null); @@ -349,6 +352,25 @@ public class CLKernel extends CLObjectResource implements Cloneable { } } + /** + * Returns the preferred multiple of workgroup size to use for kernel launch. This is only a performance hint; enqueueing + * with other sizes will still work, unless the size is more than the maximum allowed. + * @version 1.1 + */ + public long getPreferredWorkGroupSizeMultiple(final CLDevice device) { + return getWorkGroupInfo(device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE); + } + + /** + * Returns the number of bytes of private memory used by each work item in the kernel. + * This includes private memory declared with the <code>__private</code> qualifier, as + * well as other private memory used by the implementation. + * @version 1.1 + */ + public long getPrivateMemSize(final CLDevice device) { + return getWorkGroupInfo(device, CL_KERNEL_PRIVATE_MEM_SIZE); + } + private long getWorkGroupInfo(final CLDevice device, final int flag) { final int ret = binding.clGetKernelWorkGroupInfo(ID, device.ID, flag, 8, buffer, null); if(ret != CL_SUCCESS) { diff --git a/test/com/jogamp/opencl/CLProgramTest.java b/test/com/jogamp/opencl/CLProgramTest.java index ae09d2c..56289e5 100644 --- a/test/com/jogamp/opencl/CLProgramTest.java +++ b/test/com/jogamp/opencl/CLProgramTest.java @@ -31,6 +31,7 @@ package com.jogamp.opencl; import com.jogamp.opencl.test.util.UITestCase; import com.jogamp.opencl.util.CLBuildConfiguration; import com.jogamp.opencl.util.CLProgramConfiguration; +import com.jogamp.common.nio.Buffers; import com.jogamp.opencl.CLProgram.Status; import com.jogamp.opencl.util.CLBuildListener; import com.jogamp.opencl.llb.CL; @@ -41,6 +42,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.nio.ByteBuffer; import java.util.Map; import java.util.concurrent.CountDownLatch; @@ -52,7 +54,11 @@ import org.junit.runners.MethodSorters; import static org.junit.Assert.*; import static java.lang.System.*; +import static com.jogamp.common.os.Platform.is32Bit; +import static com.jogamp.opencl.CLException.newException; import static com.jogamp.opencl.CLProgram.CompilerOptions.*; +import static com.jogamp.opencl.llb.CL12.CL_KERNEL_GLOBAL_WORK_SIZE; +import static com.jogamp.opencl.llb.CL.CL_SUCCESS; /** * @@ -370,6 +376,46 @@ public class CLProgramTest extends UITestCase { } + /** + * Test of getting new kernel work group information, including those from OpenCL versions newer than 1.1. + */ + @Test + public void test22KerneWorkGrouplInfo() { + final CLContext context = CLContext.create(); + + try{ + final CLProgram program = context.createProgram(test20KernelSource).build(); + assertTrue(program.isExecutable()); + + final CLKernel kernel = program.createCLKernel("foo"); + assertNotNull(kernel); + + final long pwgsm = kernel.getPreferredWorkGroupSizeMultiple(context.getDevices()[0]); + out.println("preferred workgroup size multiple: " + pwgsm); + + final long pms = kernel.getPrivateMemSize(context.getDevices()[0]); + out.println("private mem size: " + pms); + + if( context.getDevices()[0].getCVersion().isAtLeast(1, 2) ) { + CL deviceInterface = CLPlatform.getLowLevelCLInterfaceForDevice(context.getDevices()[0].ID); + + ByteBuffer buffer = Buffers.newDirectByteBuffer((is32Bit()?4:8)*3); + final int ret = deviceInterface.clGetKernelWorkGroupInfo(kernel.ID, context.getDevices()[0].ID, CL_KERNEL_GLOBAL_WORK_SIZE, (is32Bit()?4:8)*3, buffer, null); + if(ret != CL_SUCCESS) { + throw newException(ret, "Error while asking for CL_KERNEL_GLOBAL_WORK_SIZE of " + kernel + " on "+ context.getDevices()[0]); + } + + if(is32Bit()) { + out.println("kernel global work size: " + buffer.getInt(0) + ", " + buffer.getInt(4) + ", " + buffer.getInt(8) ); + }else { + out.println("kernel global work size: " + buffer.getLong(0) + ", " + buffer.getLong(8) + ", " + buffer.getLong(16) ); + } + } + }finally{ + context.release(); + } + } + // @Test public void test60Load() throws IOException, ClassNotFoundException, InterruptedException { for(int i = 0; i < 100; i++) { |