aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/com/jogamp/opencl/CLKernel.java22
-rw-r--r--test/com/jogamp/opencl/CLProgramTest.java46
2 files changed, 68 insertions, 0 deletions
diff --git a/src/com/jogamp/opencl/CLKernel.java b/src/com/jogamp/opencl/CLKernel.java
index c3031ae..4eeb5af 100644
--- a/src/com/jogamp/opencl/CLKernel.java
+++ b/src/com/jogamp/opencl/CLKernel.java
@@ -315,6 +315,7 @@ public class CLKernel extends CLObjectResource implements Cloneable {
* qualifier and whose size is specified with clSetKernelArg.
* If the local memory size, for any pointer argument to the kernel declared with
* the <code>__local</code> address qualifier, is not specified, its size is assumed to be 0.
+ * @version 1.0
*/
public long getLocalMemorySize(final CLDevice device) {
return getWorkGroupInfo(device, CL_KERNEL_LOCAL_MEM_SIZE);
@@ -326,6 +327,7 @@ public class CLKernel extends CLObjectResource implements Cloneable {
* that can be used to execute a kernel on a specific device given by device.
* The OpenCL implementation uses the resource requirements of the kernel
* (register usage etc.) to determine what this work-group size should be.
+ * @version 1.0
*/
public long getWorkGroupSize(final CLDevice device) {
return getWorkGroupInfo(device, CL_KERNEL_WORK_GROUP_SIZE);
@@ -335,6 +337,7 @@ public class CLKernel extends CLObjectResource implements Cloneable {
* Returns the work-group size specified by the <code>__attribute__((reqd_work_group_size(X, Y, Z)))</code> qualifier in kernel sources.
* If the work-group size is not specified using the above attribute qualifier <code>new long[]{(0, 0, 0)}</code> is returned.
* The returned array has always three elements.
+ * @version 1.0
*/
public long[] getCompileWorkGroupSize(final CLDevice device) {
final int ret = binding.clGetKernelWorkGroupInfo(ID, device.ID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, (is32Bit()?4:8)*3, buffer, null);
@@ -349,6 +352,25 @@ public class CLKernel extends CLObjectResource implements Cloneable {
}
}
+ /**
+ * Returns the preferred multiple of workgroup size to use for kernel launch. This is only a performance hint; enqueueing
+ * with other sizes will still work, unless the size is more than the maximum allowed.
+ * @version 1.1
+ */
+ public long getPreferredWorkGroupSizeMultiple(final CLDevice device) {
+ return getWorkGroupInfo(device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE);
+ }
+
+ /**
+ * Returns the number of bytes of private memory used by each work item in the kernel.
+ * This includes private memory declared with the <code>__private</code> qualifier, as
+ * well as other private memory used by the implementation.
+ * @version 1.1
+ */
+ public long getPrivateMemSize(final CLDevice device) {
+ return getWorkGroupInfo(device, CL_KERNEL_PRIVATE_MEM_SIZE);
+ }
+
private long getWorkGroupInfo(final CLDevice device, final int flag) {
final int ret = binding.clGetKernelWorkGroupInfo(ID, device.ID, flag, 8, buffer, null);
if(ret != CL_SUCCESS) {
diff --git a/test/com/jogamp/opencl/CLProgramTest.java b/test/com/jogamp/opencl/CLProgramTest.java
index ae09d2c..56289e5 100644
--- a/test/com/jogamp/opencl/CLProgramTest.java
+++ b/test/com/jogamp/opencl/CLProgramTest.java
@@ -31,6 +31,7 @@ package com.jogamp.opencl;
import com.jogamp.opencl.test.util.UITestCase;
import com.jogamp.opencl.util.CLBuildConfiguration;
import com.jogamp.opencl.util.CLProgramConfiguration;
+import com.jogamp.common.nio.Buffers;
import com.jogamp.opencl.CLProgram.Status;
import com.jogamp.opencl.util.CLBuildListener;
import com.jogamp.opencl.llb.CL;
@@ -41,6 +42,7 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
+import java.nio.ByteBuffer;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
@@ -52,7 +54,11 @@ import org.junit.runners.MethodSorters;
import static org.junit.Assert.*;
import static java.lang.System.*;
+import static com.jogamp.common.os.Platform.is32Bit;
+import static com.jogamp.opencl.CLException.newException;
import static com.jogamp.opencl.CLProgram.CompilerOptions.*;
+import static com.jogamp.opencl.llb.CL12.CL_KERNEL_GLOBAL_WORK_SIZE;
+import static com.jogamp.opencl.llb.CL.CL_SUCCESS;
/**
*
@@ -370,6 +376,46 @@ public class CLProgramTest extends UITestCase {
}
+ /**
+ * Test of getting new kernel work group information, including those from OpenCL versions newer than 1.1.
+ */
+ @Test
+ public void test22KerneWorkGrouplInfo() {
+ final CLContext context = CLContext.create();
+
+ try{
+ final CLProgram program = context.createProgram(test20KernelSource).build();
+ assertTrue(program.isExecutable());
+
+ final CLKernel kernel = program.createCLKernel("foo");
+ assertNotNull(kernel);
+
+ final long pwgsm = kernel.getPreferredWorkGroupSizeMultiple(context.getDevices()[0]);
+ out.println("preferred workgroup size multiple: " + pwgsm);
+
+ final long pms = kernel.getPrivateMemSize(context.getDevices()[0]);
+ out.println("private mem size: " + pms);
+
+ if( context.getDevices()[0].getCVersion().isAtLeast(1, 2) ) {
+ CL deviceInterface = CLPlatform.getLowLevelCLInterfaceForDevice(context.getDevices()[0].ID);
+
+ ByteBuffer buffer = Buffers.newDirectByteBuffer((is32Bit()?4:8)*3);
+ final int ret = deviceInterface.clGetKernelWorkGroupInfo(kernel.ID, context.getDevices()[0].ID, CL_KERNEL_GLOBAL_WORK_SIZE, (is32Bit()?4:8)*3, buffer, null);
+ if(ret != CL_SUCCESS) {
+ throw newException(ret, "Error while asking for CL_KERNEL_GLOBAL_WORK_SIZE of " + kernel + " on "+ context.getDevices()[0]);
+ }
+
+ if(is32Bit()) {
+ out.println("kernel global work size: " + buffer.getInt(0) + ", " + buffer.getInt(4) + ", " + buffer.getInt(8) );
+ }else {
+ out.println("kernel global work size: " + buffer.getLong(0) + ", " + buffer.getLong(8) + ", " + buffer.getLong(16) );
+ }
+ }
+ }finally{
+ context.release();
+ }
+ }
+
// @Test
public void test60Load() throws IOException, ClassNotFoundException, InterruptedException {
for(int i = 0; i < 100; i++) {