From ea8f4a6b13b94eb060bff4ccc6c13efc01d2b682 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Thu, 17 Mar 2016 14:15:39 +0100 Subject: gallium: add compute shader IR type Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Ilia Mirkin Reviewed-by: Dave Airlie --- src/mesa/state_tracker/st_program.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa') diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 94dc48971ec..d2d68ac05bf 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -1463,6 +1463,7 @@ st_translate_compute_program(struct st_context *st, st_translate_program_common(st, &stcp->Base.Base, stcp->glsl_to_tgsi, ureg, TGSI_PROCESSOR_COMPUTE, &prog); + stcp->tgsi.ir_type = PIPE_SHADER_IR_TGSI; stcp->tgsi.prog = prog.tokens; stcp->tgsi.req_local_mem = stcp->Base.SharedSize; stcp->tgsi.req_private_mem = 0; -- cgit v1.2.3 From 01f993a21f859d372d68c2818d845ebf47d70492 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 28 Mar 2016 02:40:03 +0200 Subject: gallium: add threads per block TGSI property The value 0 for unknown has been chosen to so that drivers using tgsi_scan_shader do not need to detect missing properties if they zero-initialize the struct. Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Ilia Mirkin Reviewed-by: Dave Airlie --- src/gallium/auxiliary/tgsi/tgsi_strings.c | 3 +++ src/gallium/docs/source/tgsi.rst | 6 ++++++ src/gallium/include/pipe/p_shader_tokens.h | 5 ++++- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 18 ++++++++++++++++++ 4 files changed, 31 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c index ae779a8320a..d613f5e8cfb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c @@ -146,6 +146,9 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] = "NUM_CULLDIST_ENABLED", "FS_EARLY_DEPTH_STENCIL", "NEXT_SHADER", + "CS_FIXED_BLOCK_WIDTH", + "CS_FIXED_BLOCK_HEIGHT", + "CS_FIXED_BLOCK_DEPTH" }; const char *tgsi_return_type_names[TGSI_RETURN_TYPE_COUNT] = diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 3ac6ba3c25a..ac6052a244a 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -3220,6 +3220,12 @@ Which shader stage will MOST LIKELY follow after this shader when the shader is bound. This is only a hint to the driver and doesn't have to be precise. Only set for VS and TES. +TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH / HEIGHT / DEPTH +""""""""""""""""""""""""""""""""""""""""""""""""""" + +Threads per block in each dimension, if known at compile time. If the block size +is known all three should be at least 1. If it is unknown they should all be set +to 0 or not set. Texture Sampling and Texture Formats ------------------------------------ diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 5cc18a293d3..c25786e871e 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -276,7 +276,10 @@ union tgsi_immediate_data #define TGSI_PROPERTY_NUM_CULLDIST_ENABLED 16 #define TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL 17 #define TGSI_PROPERTY_NEXT_SHADER 18 -#define TGSI_PROPERTY_COUNT 19 +#define TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH 19 +#define TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT 20 +#define TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH 21 +#define TGSI_PROPERTY_COUNT 22 struct tgsi_property { unsigned Type : 4; /**< TGSI_TOKEN_TYPE_PROPERTY */ diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 23786b85529..cd481c166e7 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -5935,6 +5935,20 @@ find_array(unsigned attr, struct array_decl *arrays, unsigned count, return false; } +static void +emit_compute_block_size(const struct gl_program *program, + struct ureg_program *ureg) { + const struct gl_compute_program *cp = + (const struct gl_compute_program *)program; + + ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, + cp->LocalSize[0]); + ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, + cp->LocalSize[1]); + ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, + cp->LocalSize[2]); +} + /** * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. * \param program the program to translate @@ -6180,6 +6194,10 @@ st_translate_program( } } + if (procType == TGSI_PROCESSOR_COMPUTE) { + emit_compute_block_size(proginfo, ureg); + } + /* Declare address register. */ if (program->num_address_regs > 0) { -- cgit v1.2.3 From 1a5c8c24b5791efa02a7beefa4ba1c49ae033c73 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Fri, 25 Mar 2016 02:06:50 +0100 Subject: gallium: distinguish between shader IR in get_compute_param For radeonsi, native and TGSI use different compilers and this results in different limits for different IR's. The set we strictly need for radeonsi is only the MAX_BLOCK_SIZE and MAX_THREADS_PER_BLOCK params, but I added a few others as shader related that seemed like they would also typically depend on the compiler. Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Dave Airlie --- src/gallium/docs/source/screen.rst | 18 ++++++------- src/gallium/drivers/ilo/ilo_screen.c | 1 + src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 + src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 1 + src/gallium/drivers/r600/r600_pipe.c | 2 +- src/gallium/drivers/radeon/r600_pipe_common.c | 3 ++- src/gallium/drivers/radeonsi/si_pipe.c | 2 +- src/gallium/drivers/trace/tr_screen.c | 4 ++- src/gallium/include/pipe/p_screen.h | 13 ++++++---- src/gallium/state_trackers/clover/core/device.cpp | 31 +++++++++++++---------- src/gallium/tests/trivial/compute.c | 4 ++- src/mesa/state_tracker/st_extensions.c | 13 +++++----- 12 files changed, 54 insertions(+), 39 deletions(-) (limited to 'src/mesa') diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 46ec3815412..47a19de6ea9 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -436,26 +436,26 @@ pipe_screen::get_compute_param. ``processor-arch-manufacturer-os`` that will be passed on to the compiler. This CAP is only relevant for drivers that specify PIPE_SHADER_IR_LLVM or PIPE_SHADER_IR_NATIVE for their preferred IR. - Value type: null-terminated string. + Value type: null-terminated string. Shader IR type dependent. * ``PIPE_COMPUTE_CAP_GRID_DIMENSION``: Number of supported dimensions - for grid and block coordinates. Value type: ``uint64_t``. + for grid and block coordinates. Value type: ``uint64_t``. Shader IR type dependent. * ``PIPE_COMPUTE_CAP_MAX_GRID_SIZE``: Maximum grid size in block - units. Value type: ``uint64_t []``. + units. Value type: ``uint64_t []``. Shader IR type dependent. * ``PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE``: Maximum block size in thread - units. Value type: ``uint64_t []``. + units. Value type: ``uint64_t []``. Shader IR type dependent. * ``PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK``: Maximum number of threads that - a single block can contain. Value type: ``uint64_t``. + a single block can contain. Value type: ``uint64_t``. Shader IR type dependent. This may be less than the product of the components of MAX_BLOCK_SIZE and is usually limited by the number of threads that can be resident simultaneously on a compute unit. * ``PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE``: Maximum size of the GLOBAL - resource. Value type: ``uint64_t``. + resource. Value type: ``uint64_t``. Shader IR type dependent. * ``PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE``: Maximum size of the LOCAL - resource. Value type: ``uint64_t``. + resource. Value type: ``uint64_t``. Shader IR type dependent. * ``PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE``: Maximum size of the PRIVATE - resource. Value type: ``uint64_t``. + resource. Value type: ``uint64_t``. Shader IR type dependent. * ``PIPE_COMPUTE_CAP_MAX_INPUT_SIZE``: Maximum size of the INPUT - resource. Value type: ``uint64_t``. + resource. Value type: ``uint64_t``. Shader IR type dependent. * ``PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE``: Maximum size of a memory object allocation in bytes. Value type: ``uint64_t``. * ``PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY``: Maximum frequency of the GPU diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index 548d215c718..7812c826250 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -179,6 +179,7 @@ ilo_get_video_param(struct pipe_screen *screen, static int ilo_get_compute_param(struct pipe_screen *screen, + enum pipe_shader_ir ir_type, enum pipe_compute_cap param, void *ret) { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 57e28992727..ba5e5003b69 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -368,6 +368,7 @@ nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) static int nv50_screen_get_compute_param(struct pipe_screen *pscreen, + enum pipe_shader_ir ir_type, enum pipe_compute_cap param, void *data) { struct nv50_screen *screen = nv50_screen(pscreen); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 590dac972a7..14438ced7a3 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -395,6 +395,7 @@ nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param) static int nvc0_screen_get_compute_param(struct pipe_screen *pscreen, + enum pipe_shader_ir ir_type, enum pipe_compute_cap param, void *data) { struct nvc0_screen *screen = nvc0_screen(pscreen); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index b8011917907..c97e34121e3 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -499,7 +499,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: if (shader == PIPE_SHADER_COMPUTE) { uint64_t max_const_buffer_size; - pscreen->get_compute_param(pscreen, + pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI, PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, &max_const_buffer_size); return max_const_buffer_size; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 720fc06ece2..32bd6e40d32 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -612,6 +612,7 @@ const char *r600_get_llvm_processor_name(enum radeon_family family) } static int r600_get_compute_param(struct pipe_screen *screen, + enum pipe_shader_ir ir_type, enum pipe_compute_cap param, void *ret) { @@ -678,7 +679,7 @@ static int r600_get_compute_param(struct pipe_screen *screen, uint64_t *max_global_size = ret; uint64_t max_mem_alloc_size; - r600_get_compute_param(screen, + r600_get_compute_param(screen, ir_type, PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, &max_mem_alloc_size); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index ed84dc224ff..407b9e19cc4 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -467,7 +467,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: { uint64_t max_const_buffer_size; - pscreen->get_compute_param(pscreen, + pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI, PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, &max_const_buffer_size); return max_const_buffer_size; diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index b24e1856aca..260f1df5ce7 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -175,6 +175,7 @@ trace_screen_get_paramf(struct pipe_screen *_screen, static int trace_screen_get_compute_param(struct pipe_screen *_screen, + enum pipe_shader_ir ir_type, enum pipe_compute_cap param, void *data) { struct trace_screen *tr_scr = trace_screen(_screen); @@ -184,10 +185,11 @@ trace_screen_get_compute_param(struct pipe_screen *_screen, trace_dump_call_begin("pipe_screen", "get_compute_param"); trace_dump_arg(ptr, screen); + trace_dump_arg(int, ir_type); trace_dump_arg(int, param); trace_dump_arg(ptr, data); - result = screen->get_compute_param(screen, param, data); + result = screen->get_compute_param(screen, ir_type, param, data); trace_dump_ret(int, result); diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index 4f30e75ab49..3ac5f3cc9ff 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -109,13 +109,16 @@ struct pipe_screen { /** * Query a compute-specific capability/parameter/limit. - * \param param one of PIPE_COMPUTE_CAP_x - * \param ret pointer to a preallocated buffer that will be - * initialized to the parameter value, or NULL. - * \return size in bytes of the parameter value that would be - * returned. + * \param ir_type shader IR type for which the param applies, or don't care + * if the param is not shader related + * \param param one of PIPE_COMPUTE_CAP_x + * \param ret pointer to a preallocated buffer that will be + * initialized to the parameter value, or NULL. + * \return size in bytes of the parameter value that would be + * returned. */ int (*get_compute_param)(struct pipe_screen *, + enum pipe_shader_ir ir_type, enum pipe_compute_cap param, void *ret); diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index 1be2f6413f4..39f39f436c6 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -30,11 +30,12 @@ using namespace clover; namespace { template std::vector - get_compute_param(pipe_screen *pipe, pipe_compute_cap cap) { - int sz = pipe->get_compute_param(pipe, cap, NULL); + get_compute_param(pipe_screen *pipe, pipe_shader_ir ir_format, + pipe_compute_cap cap) { + int sz = pipe->get_compute_param(pipe, ir_format, cap, NULL); std::vector v(sz / sizeof(T)); - pipe->get_compute_param(pipe, cap, &v.front()); + pipe->get_compute_param(pipe, ir_format, cap, &v.front()); return v; } } @@ -115,19 +116,19 @@ device::max_samplers() const { cl_ulong device::max_mem_global() const { - return get_compute_param(pipe, + return get_compute_param(pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0]; } cl_ulong device::max_mem_local() const { - return get_compute_param(pipe, + return get_compute_param(pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0]; } cl_ulong device::max_mem_input() const { - return get_compute_param(pipe, + return get_compute_param(pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0]; } @@ -146,30 +147,30 @@ device::max_const_buffers() const { size_t device::max_threads_per_block() const { return get_compute_param( - pipe, PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0]; + pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0]; } cl_ulong device::max_mem_alloc_size() const { - return get_compute_param(pipe, + return get_compute_param(pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE)[0]; } cl_uint device::max_clock_frequency() const { - return get_compute_param(pipe, + return get_compute_param(pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0]; } cl_uint device::max_compute_units() const { - return get_compute_param(pipe, + return get_compute_param(pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0]; } bool device::image_support() const { - return get_compute_param(pipe, + return get_compute_param(pipe, ir_format(), PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0]; } @@ -181,13 +182,15 @@ device::has_doubles() const { std::vector device::max_block_size() const { - auto v = get_compute_param(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); + auto v = get_compute_param(pipe, ir_format(), + PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); return { v.begin(), v.end() }; } cl_uint device::subgroup_size() const { - return get_compute_param(pipe, PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0]; + return get_compute_param(pipe, ir_format(), + PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0]; } std::string @@ -209,7 +212,7 @@ device::ir_format() const { std::string device::ir_target() const { std::vector target = get_compute_param( - pipe, PIPE_COMPUTE_CAP_IR_TARGET); + pipe, ir_format(), PIPE_COMPUTE_CAP_IR_TARGET); return { target.data() }; } diff --git a/src/gallium/tests/trivial/compute.c b/src/gallium/tests/trivial/compute.c index 2ddfc42e9cd..5d012ac3838 100644 --- a/src/gallium/tests/trivial/compute.c +++ b/src/gallium/tests/trivial/compute.c @@ -58,7 +58,9 @@ struct context { uint64_t __v[4]; \ int __i, __n; \ \ - __n = ctx->screen->get_compute_param(ctx->screen, c, __v); \ + __n = ctx->screen->get_compute_param(ctx->screen, \ + PIPE_SHADER_IR_TGSI, \ + c, __v); \ printf("%s: {", #c); \ \ for (__i = 0; __i < __n / sizeof(*__v); ++__i) \ diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 8748ab5c876..6c0df8d2a98 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -1124,14 +1124,15 @@ void st_init_extensions(struct pipe_screen *screen, if (compute_supported_irs & (1 << PIPE_SHADER_IR_TGSI)) { uint64_t grid_size[3], block_size[3]; - screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_GRID_SIZE, - grid_size); - screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE, - block_size); - screen->get_compute_param(screen, + screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, + PIPE_COMPUTE_CAP_MAX_GRID_SIZE, grid_size); + screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, + PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE, block_size); + screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK, &consts->MaxComputeWorkGroupInvocations); - screen->get_compute_param(screen, PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE, + screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, + PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE, &consts->MaxComputeSharedMemorySize); for (i = 0; i < 3; i++) { -- cgit v1.2.3 From 15cd3ebede62a0c73bfa1513a0c9ab942906cd5a Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 21 Mar 2016 14:01:24 -0700 Subject: mesa: Make _mesa_choose_tex_format() handle stencil textures. This is necessary for ARB_texture_stencil8 support on classic drivers. Presumably Gallium works because it implements its own ChooseTexFormat. Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/mesa/main/texformat.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c index 419fd78e893..be2581b004f 100644 --- a/src/mesa/main/texformat.c +++ b/src/mesa/main/texformat.c @@ -765,6 +765,11 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum target, RETURN_IF_SUPPORTED(MESA_FORMAT_B8G8R8A8_UNORM); break; + case GL_STENCIL_INDEX: + case GL_STENCIL_INDEX8: + RETURN_IF_SUPPORTED(MESA_FORMAT_S_UINT8); + break; + default: /* For non-generic compressed format we assert two things: * -- cgit v1.2.3 From 98c22c04036da4f0a7fcc396dc6c5e0bfe5890e5 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 29 Mar 2016 12:07:37 -0700 Subject: glsl: Add all system variables to the input resource list. System values are just built-in input variables that we've opted to special-case out of convenience. We need to consider all inputs, regardless of how we've classified them. Unfortunately, there's one exception: we shouldn't add gl_BaseVertex unless ARB_shader_draw_parameters is enabled, because it doesn't actually exist in the language, and shouldn't be counted in the GL_ACTIVE_RESOURCES query. Fixes dEQP-GLES31.functional.program_interface_query.program_input. resource_list.compute.empty, which expects gl_NumWorkGroups to appear in the resource list. v2: Delete more code Signed-off-by: Kenneth Graunke Reviewed-by: Timothy Arceri --- src/compiler/glsl/linker.cpp | 10 ---------- src/mesa/main/shader_query.cpp | 9 +-------- 2 files changed, 1 insertion(+), 18 deletions(-) (limited to 'src/mesa') diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index 0005d4965e1..19f4641c724 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -3531,17 +3531,7 @@ add_interface_variables(struct gl_shader_program *shProg, continue; switch (var->data.mode) { - /* From GL 4.3 core spec, section 11.1.1 (Vertex Attributes): - * "For GetActiveAttrib, all active vertex shader input variables - * are enumerated, including the special built-in inputs gl_VertexID - * and gl_InstanceID." - */ case ir_var_system_value: - if (var->data.location != SYSTEM_VALUE_VERTEX_ID && - var->data.location != SYSTEM_VALUE_VERTEX_ID_ZERO_BASE && - var->data.location != SYSTEM_VALUE_INSTANCE_ID) - continue; - /* FALLTHROUGH */ case ir_var_shader_in: if (programInterface != GL_PROGRAM_INPUT) continue; diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 993dc863220..e85e81d897c 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -112,14 +112,7 @@ is_active_attrib(const gl_shader_variable *var) return var->location != -1; case ir_var_system_value: - /* From GL 4.3 core spec, section 11.1.1 (Vertex Attributes): - * "For GetActiveAttrib, all active vertex shader input variables - * are enumerated, including the special built-in inputs gl_VertexID - * and gl_InstanceID." - */ - return var->location == SYSTEM_VALUE_VERTEX_ID || - var->location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE || - var->location == SYSTEM_VALUE_INSTANCE_ID; + return true; default: return false; -- cgit v1.2.3 From 9fe211bec4533bd5cebeb61b38343ae9c174abb7 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 29 Mar 2016 13:32:32 -0700 Subject: glsl: Consolidate gl_VertexIDMESA -> gl_VertexID query hacks. A program will either have gl_VertexID or gl_VertexIDMESA (the lowered zero-based version), not both. Just spoof it in the resource list so the hacks are done in a single place. Signed-off-by: Kenneth Graunke Reviewed-by: Timothy Arceri --- src/compiler/glsl/linker.cpp | 12 ++++++++++-- src/mesa/main/shader_query.cpp | 17 ----------------- 2 files changed, 10 insertions(+), 19 deletions(-) (limited to 'src/mesa') diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index 064e7f5ca21..c0d310746f3 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -3503,12 +3503,20 @@ create_shader_variable(struct gl_shader_program *shProg, const ir_variable *in) if (!out) return NULL; - out->type = in->type; - out->name = ralloc_strdup(shProg, in->name); + /* Since gl_VertexID may be lowered to gl_VertexIDMESA, but applications + * expect to see gl_VertexID in the program resource list. Pretend. + */ + if (in->data.mode == ir_var_system_value && + in->data.location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) { + out->name = ralloc_strdup(shProg, "gl_VertexID"); + } else { + out->name = ralloc_strdup(shProg, in->name); + } if (!out->name) return NULL; + out->type = in->type; out->location = in->data.location; out->index = in->data.index; out->patch = in->data.patch; diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index e85e81d897c..caff79f39af 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -164,15 +164,6 @@ _mesa_GetActiveAttrib(GLuint program, GLuint desired_index, const char *var_name = var->name; - /* Since gl_VertexID may be lowered to gl_VertexIDMESA, we need to - * consider gl_VertexIDMESA as gl_VertexID for purposes of checking - * active attributes. - */ - if (var->mode == ir_var_system_value && - var->location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) { - var_name = "gl_VertexID"; - } - _mesa_copy_string(name, maxLength, length, var_name); if (size) @@ -421,7 +412,6 @@ _mesa_GetFragDataLocation(GLuint program, const GLchar *name) const char* _mesa_program_resource_name(struct gl_program_resource *res) { - const gl_shader_variable *var; switch (res->Type) { case GL_UNIFORM_BLOCK: case GL_SHADER_STORAGE_BLOCK: @@ -429,13 +419,6 @@ _mesa_program_resource_name(struct gl_program_resource *res) case GL_TRANSFORM_FEEDBACK_VARYING: return RESOURCE_XFV(res)->Name; case GL_PROGRAM_INPUT: - var = RESOURCE_VAR(res); - /* Special case gl_VertexIDMESA -> gl_VertexID. */ - if (var->mode == ir_var_system_value && - var->location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) { - return "gl_VertexID"; - } - /* fallthrough */ case GL_PROGRAM_OUTPUT: return RESOURCE_VAR(res)->name; case GL_UNIFORM: -- cgit v1.2.3 From c123294dfe2e52443f2eff2723ef922f22972ef5 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 29 Mar 2016 13:21:48 -0700 Subject: glsl: Return -1 for program interface query locations in many cases. We were recording locations for all variables, even ones without an explicit location set. Implement the rules from the spec, and record -1 in the resource list accordngly. Make program_resource_location stop doing math on negative values. Remove hacks that are no longer necessary now that we've stopped doing that. Fixes 4 dEQP-GLES31.functional.program_interface_query tests: - program_input.location.separable_fragment.var - program_input.location.separable_fragment.var_array - program_output.location.separable_vertex.var_array - program_output.location.separable_vertex.var_array v2: Delete more code Signed-off-by: Kenneth Graunke Reviewed-by: Timothy Arceri --- src/compiler/glsl/linker.cpp | 38 ++++++++++++++++++++++---- src/mesa/main/shader_query.cpp | 62 ++++++------------------------------------ 2 files changed, 42 insertions(+), 58 deletions(-) (limited to 'src/mesa') diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index c0d310746f3..71dbddd8763 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -3497,7 +3497,8 @@ build_stageref(struct gl_shader_program *shProg, const char *name, * Create gl_shader_variable from ir_variable class. */ static gl_shader_variable * -create_shader_variable(struct gl_shader_program *shProg, const ir_variable *in) +create_shader_variable(struct gl_shader_program *shProg, + const ir_variable *in, bool use_implicit_location) { gl_shader_variable *out = ralloc(shProg, struct gl_shader_variable); if (!out) @@ -3516,8 +3517,29 @@ create_shader_variable(struct gl_shader_program *shProg, const ir_variable *in) if (!out->name) return NULL; + /* From the ARB_program_interface_query specification: + * + * "Not all active variables are assigned valid locations; the + * following variables will have an effective location of -1: + * + * * uniforms declared as atomic counters; + * + * * members of a uniform block; + * + * * built-in inputs, outputs, and uniforms (starting with "gl_"); and + * + * * inputs or outputs not declared with a "location" layout qualifier, + * except for vertex shader inputs and fragment shader outputs." + */ + if (in->type->base_type == GLSL_TYPE_ATOMIC_UINT || + is_gl_identifier(in->name) || + !(in->data.explicit_location || use_implicit_location)) { + out->location = -1; + } else { + out->location = in->data.location; + } + out->type = in->type; - out->location = in->data.location; out->index = in->data.index; out->patch = in->data.patch; out->mode = in->data.mode; @@ -3563,7 +3585,12 @@ add_interface_variables(struct gl_shader_program *shProg, if (strncmp(var->name, "gl_out_FragData", 15) == 0) continue; - gl_shader_variable *sha_v = create_shader_variable(shProg, var); + const bool vs_input_or_fs_output = + (stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) || + (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out); + + gl_shader_variable *sha_v = + create_shader_variable(shProg, var, vs_input_or_fs_output); if (!sha_v) return false; @@ -3597,7 +3624,8 @@ add_packed_varyings(struct gl_shader_program *shProg, int stage, GLenum type) } if (type == iface) { - gl_shader_variable *sha_v = create_shader_variable(shProg, var); + gl_shader_variable *sha_v = + create_shader_variable(shProg, var, false); if (!sha_v) return false; if (!add_program_resource(shProg, iface, sha_v, @@ -3622,7 +3650,7 @@ add_fragdata_arrays(struct gl_shader_program *shProg) ir_variable *var = node->as_variable(); if (var) { assert(var->data.mode == ir_var_shader_out); - gl_shader_variable *sha_v = create_shader_variable(shProg, var); + gl_shader_variable *sha_v = create_shader_variable(shProg, var, true); if (!sha_v) return false; if (!add_program_resource(shProg, GL_PROGRAM_OUTPUT, sha_v, diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index caff79f39af..190f6387987 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -101,24 +101,6 @@ _mesa_BindAttribLocation(GLuint program, GLuint index, */ } -static bool -is_active_attrib(const gl_shader_variable *var) -{ - if (!var) - return false; - - switch (var->mode) { - case ir_var_shader_in: - return var->location != -1; - - case ir_var_system_value: - return true; - - default: - return false; - } -} - void GLAPIENTRY _mesa_GetActiveAttrib(GLuint program, GLuint desired_index, GLsizei maxLength, GLsizei * length, GLint * size, @@ -159,9 +141,6 @@ _mesa_GetActiveAttrib(GLuint program, GLuint desired_index, const gl_shader_variable *const var = RESOURCE_VAR(res); - if (!is_active_attrib(var)) - return; - const char *var_name = var->name; _mesa_copy_string(name, maxLength, length, var_name); @@ -208,19 +187,7 @@ _mesa_GetAttribLocation(GLuint program, const GLchar * name) if (!res) return -1; - GLint loc = program_resource_location(shProg, res, name, array_index); - - /* The extra check against against 0 is made because of builtin-attribute - * locations that have offset applied. Function program_resource_location - * can return built-in attribute locations < 0 and glGetAttribLocation - * cannot be used on "conventional" attributes. - * - * From page 95 of the OpenGL 3.0 spec: - * - * "If name is not an active attribute, if name is a conventional - * attribute, or if an error occurs, -1 will be returned." - */ - return (loc >= 0) ? loc : -1; + return program_resource_location(shProg, res, name, array_index); } unsigned @@ -235,8 +202,7 @@ _mesa_count_active_attribs(struct gl_shader_program *shProg) unsigned count = 0; for (unsigned j = 0; j < shProg->NumProgramResourceList; j++, res++) { if (res->Type == GL_PROGRAM_INPUT && - res->StageReferences & (1 << MESA_SHADER_VERTEX) && - is_active_attrib(RESOURCE_VAR(res))) + res->StageReferences & (1 << MESA_SHADER_VERTEX)) count++; } return count; @@ -394,19 +360,7 @@ _mesa_GetFragDataLocation(GLuint program, const GLchar *name) if (!res) return -1; - GLint loc = program_resource_location(shProg, res, name, array_index); - - /* The extra check against against 0 is made because of builtin-attribute - * locations that have offset applied. Function program_resource_location - * can return built-in attribute locations < 0 and glGetFragDataLocation - * cannot be used on "conventional" attributes. - * - * From page 95 of the OpenGL 3.0 spec: - * - * "If name is not an active attribute, if name is a conventional - * attribute, or if an error occurs, -1 will be returned." - */ - return (loc >= 0) ? loc : -1; + return program_resource_location(shProg, res, name, array_index); } const char* @@ -826,10 +780,6 @@ program_resource_location(struct gl_shader_program *shProg, struct gl_program_resource *res, const char *name, unsigned array_index) { - /* Built-in locations should report GL_INVALID_INDEX. */ - if (is_gl_identifier(name)) - return GL_INVALID_INDEX; - /* VERT_ATTRIB_GENERIC0 and FRAG_RESULT_DATA0 are decremented as these * offsets are used internally to differentiate between built-in attributes * and user-defined attributes. @@ -838,6 +788,9 @@ program_resource_location(struct gl_shader_program *shProg, case GL_PROGRAM_INPUT: { const gl_shader_variable *var = RESOURCE_VAR(res); + if (var->location == -1) + return -1; + /* If the input is an array, fail if the index is out of bounds. */ if (array_index > 0 && array_index >= var->type->length) { @@ -848,6 +801,9 @@ program_resource_location(struct gl_shader_program *shProg, VERT_ATTRIB_GENERIC0); } case GL_PROGRAM_OUTPUT: + if (RESOURCE_VAR(res)->location == -1) + return -1; + /* If the output is an array, fail if the index is out of bounds. */ if (array_index > 0 && array_index >= RESOURCE_VAR(res)->type->length) { -- cgit v1.2.3 From 94ed482c19916ddede91c3c2ea3a538039ddb489 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 29 Mar 2016 14:15:14 -0700 Subject: glsl: Fix prorgram interface query locations biasing for SSO. With SSO, the GL_PROGRAM_INPUT and GL_PROGRAM_OUTPUT interfaces refer to the first and last shader stage linked into a program. This may not be the vertex and fragment shader stages. So, subtracting VERT_ATTRIB_GENERIC0 and FRAG_RESULT_DATA0 is bogus. We need to subtract VERT_ATTRIB_GENERIC0 for VS inputs, FRAG_RESULT_DATA0 for FS outputs, and VARYING_SLOT_VAR0 for other cases. Note that built-in variables get a location of -1. Fixes 4 dEQP-GLES31.functional.program_interface_query tests: - program_input.location.separable_fragment.var_explicit_location - program_input.location.separable_fragment.var_array_explicit_location - program_output.location.separable_vertex.var_array_explicit_location - program_output.location.separable_vertex.var_array_explicit_location Signed-off-by: Kenneth Graunke Reviewed-by: Timothy Arceri --- src/compiler/glsl/linker.cpp | 18 +++++++++++++----- src/mesa/main/shader_query.cpp | 11 +++-------- 2 files changed, 16 insertions(+), 13 deletions(-) (limited to 'src/mesa') diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index 71dbddd8763..736852e7809 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -3498,7 +3498,8 @@ build_stageref(struct gl_shader_program *shProg, const char *name, */ static gl_shader_variable * create_shader_variable(struct gl_shader_program *shProg, - const ir_variable *in, bool use_implicit_location) + const ir_variable *in, bool use_implicit_location, + int location_bias) { gl_shader_variable *out = ralloc(shProg, struct gl_shader_variable); if (!out) @@ -3536,7 +3537,7 @@ create_shader_variable(struct gl_shader_program *shProg, !(in->data.explicit_location || use_implicit_location)) { out->location = -1; } else { - out->location = in->data.location; + out->location = in->data.location - location_bias; } out->type = in->type; @@ -3559,15 +3560,21 @@ add_interface_variables(struct gl_shader_program *shProg, if (!var || var->data.how_declared == ir_var_hidden) continue; + int loc_bias; + switch (var->data.mode) { case ir_var_system_value: case ir_var_shader_in: if (programInterface != GL_PROGRAM_INPUT) continue; + loc_bias = (stage == MESA_SHADER_VERTEX) ? int(VERT_ATTRIB_GENERIC0) + : int(VARYING_SLOT_VAR0); break; case ir_var_shader_out: if (programInterface != GL_PROGRAM_OUTPUT) continue; + loc_bias = (stage == MESA_SHADER_FRAGMENT) ? int(FRAG_RESULT_DATA0) + : int(VARYING_SLOT_VAR0); break; default: continue; @@ -3590,7 +3597,7 @@ add_interface_variables(struct gl_shader_program *shProg, (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out); gl_shader_variable *sha_v = - create_shader_variable(shProg, var, vs_input_or_fs_output); + create_shader_variable(shProg, var, vs_input_or_fs_output, loc_bias); if (!sha_v) return false; @@ -3625,7 +3632,7 @@ add_packed_varyings(struct gl_shader_program *shProg, int stage, GLenum type) if (type == iface) { gl_shader_variable *sha_v = - create_shader_variable(shProg, var, false); + create_shader_variable(shProg, var, false, VARYING_SLOT_VAR0); if (!sha_v) return false; if (!add_program_resource(shProg, iface, sha_v, @@ -3650,7 +3657,8 @@ add_fragdata_arrays(struct gl_shader_program *shProg) ir_variable *var = node->as_variable(); if (var) { assert(var->data.mode == ir_var_shader_out); - gl_shader_variable *sha_v = create_shader_variable(shProg, var, true); + gl_shader_variable *sha_v = + create_shader_variable(shProg, var, true, FRAG_RESULT_DATA0); if (!sha_v) return false; if (!add_program_resource(shProg, GL_PROGRAM_OUTPUT, sha_v, diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 190f6387987..4650a5c9ef1 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -780,10 +780,6 @@ program_resource_location(struct gl_shader_program *shProg, struct gl_program_resource *res, const char *name, unsigned array_index) { - /* VERT_ATTRIB_GENERIC0 and FRAG_RESULT_DATA0 are decremented as these - * offsets are used internally to differentiate between built-in attributes - * and user-defined attributes. - */ switch (res->Type) { case GL_PROGRAM_INPUT: { const gl_shader_variable *var = RESOURCE_VAR(res); @@ -796,9 +792,8 @@ program_resource_location(struct gl_shader_program *shProg, && array_index >= var->type->length) { return -1; } - return (var->location + - (array_index * var->type->without_array()->matrix_columns) - - VERT_ATTRIB_GENERIC0); + return var->location + + (array_index * var->type->without_array()->matrix_columns); } case GL_PROGRAM_OUTPUT: if (RESOURCE_VAR(res)->location == -1) @@ -809,7 +804,7 @@ program_resource_location(struct gl_shader_program *shProg, && array_index >= RESOURCE_VAR(res)->type->length) { return -1; } - return RESOURCE_VAR(res)->location + array_index - FRAG_RESULT_DATA0; + return RESOURCE_VAR(res)->location + array_index; case GL_UNIFORM: /* If the uniform is built-in, fail. */ if (RESOURCE_UNI(res)->builtin) -- cgit v1.2.3 From 1265e1c4e17dec5c9931fda8b6d44a4006ed1a4c Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Sat, 2 Apr 2016 12:51:12 +1100 Subject: glsl: store stage reference in gl_uniform_block This allows us to simplify the code and drop InterfaceBlockStageIndex which is a per stage array of integers the size of all blocks in the program combined including duplicates across stages. Adding a stage ref per block will use less memory. Reviewed-by: Kenneth Graunke --- src/compiler/glsl/linker.cpp | 37 +++++++++++++++++----------- src/compiler/glsl/standalone_scaffolding.cpp | 7 ------ src/mesa/main/mtypes.h | 13 +++------- src/mesa/main/shader_query.cpp | 2 +- src/mesa/main/shaderobj.c | 4 --- 5 files changed, 26 insertions(+), 37 deletions(-) (limited to 'src/mesa') diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index f440dbce888..f750f5b7722 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -1171,6 +1171,8 @@ cross_validate_uniforms(struct gl_shader_program *prog) static bool interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog) { + int *InterfaceBlockStageIndex[MESA_SHADER_STAGES]; + unsigned max_num_uniform_blocks = 0; for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { if (prog->_LinkedShaders[i]) @@ -1180,10 +1182,9 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog) for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { struct gl_shader *sh = prog->_LinkedShaders[i]; - prog->InterfaceBlockStageIndex[i] = ralloc_array(prog, int, - max_num_uniform_blocks); + InterfaceBlockStageIndex[i] = new int[max_num_uniform_blocks]; for (unsigned int j = 0; j < max_num_uniform_blocks; j++) - prog->InterfaceBlockStageIndex[i][j] = -1; + InterfaceBlockStageIndex[i][j] = -1; if (sh == NULL) continue; @@ -1194,13 +1195,17 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog) &prog->NumBufferInterfaceBlocks, sh->BufferInterfaceBlocks[j]); - if (index == -1) { - linker_error(prog, "uniform block `%s' has mismatching definitions\n", - sh->BufferInterfaceBlocks[j]->Name); - return false; - } + if (index == -1) { + linker_error(prog, "uniform block `%s' has mismatching definitions\n", + sh->BufferInterfaceBlocks[j]->Name); + + for (unsigned k = 0; k <= i; k++) { + delete[] InterfaceBlockStageIndex[k]; + } + return false; + } - prog->InterfaceBlockStageIndex[i][index] = j; + InterfaceBlockStageIndex[i][index] = j; } } @@ -1209,18 +1214,23 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog) */ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) { - int stage_index = - prog->InterfaceBlockStageIndex[i][j]; + int stage_index = InterfaceBlockStageIndex[i][j]; if (stage_index != -1) { struct gl_shader *sh = prog->_LinkedShaders[i]; + prog->BufferInterfaceBlocks[j].stageref |= (1 << i); + sh->BufferInterfaceBlocks[stage_index] = &prog->BufferInterfaceBlocks[j]; } } } + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + delete[] InterfaceBlockStageIndex[i]; + } + return true; } @@ -3933,10 +3943,7 @@ build_program_resource_list(struct gl_context *ctx, /* Add stagereferences for uniforms in a uniform block. */ int block_index = shProg->UniformStorage[i].block_index; if (block_index != -1) { - for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) { - if (shProg->InterfaceBlockStageIndex[j][block_index] != -1) - stageref |= (1 << j); - } + stageref |= shProg->BufferInterfaceBlocks[block_index].stageref; } bool is_shader_storage = shProg->UniformStorage[i].is_shader_storage; diff --git a/src/compiler/glsl/standalone_scaffolding.cpp b/src/compiler/glsl/standalone_scaffolding.cpp index e350f702099..49b4a26dc12 100644 --- a/src/compiler/glsl/standalone_scaffolding.cpp +++ b/src/compiler/glsl/standalone_scaffolding.cpp @@ -96,8 +96,6 @@ _mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh) void _mesa_clear_shader_program_data(struct gl_shader_program *shProg) { - unsigned i; - shProg->NumUniformStorage = 0; shProg->UniformStorage = NULL; shProg->NumUniformRemapTable = 0; @@ -119,11 +117,6 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg) shProg->ShaderStorageBlocks = NULL; shProg->NumShaderStorageBlocks = 0; - for (i = 0; i < MESA_SHADER_STAGES; i++) { - ralloc_free(shProg->InterfaceBlockStageIndex[i]); - shProg->InterfaceBlockStageIndex[i] = NULL; - } - ralloc_free(shProg->AtomicBuffers); shProg->AtomicBuffers = NULL; shProg->NumAtomicBuffers = 0; diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index f2cb4cb107b..e579794de3c 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2529,6 +2529,9 @@ struct gl_uniform_block */ bool IsShaderStorage; + /** Stages that reference this block */ + uint8_t stageref; + /** * Layout specified in the shader * @@ -2829,16 +2832,6 @@ struct gl_shader_program unsigned NumShaderStorageBlocks; struct gl_uniform_block **ShaderStorageBlocks; - /** - * Indices into the BufferInterfaceBlocks[] array for each stage they're - * used in, or -1. - * - * This is used to maintain the Binding values of the stage's - * BufferInterfaceBlocks[] and to answer the - * GL_UNIFORM_BLOCK_REFERENCED_BY_*_SHADER queries. - */ - int *InterfaceBlockStageIndex[MESA_SHADER_STAGES]; - /** * Map of active uniform names to locations * diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 4650a5c9ef1..4ef6a81204e 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -926,7 +926,7 @@ is_resource_referenced(struct gl_shader_program *shProg, return RESOURCE_ATC(res)->StageReferences[stage]; if (res->Type == GL_UNIFORM_BLOCK || res->Type == GL_SHADER_STORAGE_BLOCK) - return shProg->InterfaceBlockStageIndex[stage][index] != -1; + return shProg->BufferInterfaceBlocks[index].stageref & (1 << stage); return res->StageReferences & (1 << stage); } diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c index 9a4eb6b56fd..8b9166ceecb 100644 --- a/src/mesa/main/shaderobj.c +++ b/src/mesa/main/shaderobj.c @@ -295,10 +295,6 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg) ralloc_free(shProg->BufferInterfaceBlocks); shProg->BufferInterfaceBlocks = NULL; shProg->NumBufferInterfaceBlocks = 0; - for (i = 0; i < MESA_SHADER_STAGES; i++) { - ralloc_free(shProg->InterfaceBlockStageIndex[i]); - shProg->InterfaceBlockStageIndex[i] = NULL; - } ralloc_free(shProg->AtomicBuffers); shProg->AtomicBuffers = NULL; -- cgit v1.2.3 From 6d54096fa6cde0ebc7da29468071fe2c34aec0cf Mon Sep 17 00:00:00 2001 From: Timothy Arceri Date: Sat, 2 Apr 2016 21:07:50 +1100 Subject: mesa: remove unrequired else The if always returns so no need for an else. Reviewed-by: Brian Paul --- src/mesa/program/prog_parameter.c | 81 +++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 42 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/program/prog_parameter.c b/src/mesa/program/prog_parameter.c index 02d84f20cd8..c294b005b85 100644 --- a/src/mesa/program/prog_parameter.c +++ b/src/mesa/program/prog_parameter.c @@ -248,54 +248,51 @@ _mesa_add_parameter(struct gl_program_parameter_list *paramList, paramList->Size = 0; return -1; } - else { - GLuint i, j; - - paramList->NumParameters = oldNum + sz4; - - memset(¶mList->Parameters[oldNum], 0, - sz4 * sizeof(struct gl_program_parameter)); - - for (i = 0; i < sz4; i++) { - struct gl_program_parameter *p = paramList->Parameters + oldNum + i; - p->Name = name ? strdup(name) : NULL; - p->Type = type; - p->Size = size; - p->DataType = datatype; - if (values) { - if (size >= 4) { - COPY_4V(paramList->ParameterValues[oldNum + i], values); + + GLuint i, j; + + paramList->NumParameters = oldNum + sz4; + + memset(¶mList->Parameters[oldNum], 0, + sz4 * sizeof(struct gl_program_parameter)); + + for (i = 0; i < sz4; i++) { + struct gl_program_parameter *p = paramList->Parameters + oldNum + i; + p->Name = name ? strdup(name) : NULL; + p->Type = type; + p->Size = size; + p->DataType = datatype; + if (values) { + if (size >= 4) { + COPY_4V(paramList->ParameterValues[oldNum + i], values); + } else { + /* copy 1, 2 or 3 values */ + GLuint remaining = size % 4; + assert(remaining < 4); + for (j = 0; j < remaining; j++) { + paramList->ParameterValues[oldNum + i][j].f = values[j].f; } - else { - /* copy 1, 2 or 3 values */ - GLuint remaining = size % 4; - assert(remaining < 4); - for (j = 0; j < remaining; j++) { - paramList->ParameterValues[oldNum + i][j].f = values[j].f; - } - /* fill in remaining positions with zeros */ - for (; j < 4; j++) { - paramList->ParameterValues[oldNum + i][j].f = 0.0f; - } + /* fill in remaining positions with zeros */ + for (; j < 4; j++) { + paramList->ParameterValues[oldNum + i][j].f = 0.0f; } - values += 4; - p->Initialized = GL_TRUE; } - else { - /* silence valgrind */ - for (j = 0; j < 4; j++) - paramList->ParameterValues[oldNum + i][j].f = 0; - } - size -= 4; - } - - if (state) { - for (i = 0; i < STATE_LENGTH; i++) - paramList->Parameters[oldNum].StateIndexes[i] = state[i]; + values += 4; + p->Initialized = GL_TRUE; + } else { + /* silence valgrind */ + for (j = 0; j < 4; j++) + paramList->ParameterValues[oldNum + i][j].f = 0; } + size -= 4; + } - return (GLint) oldNum; + if (state) { + for (i = 0; i < STATE_LENGTH; i++) + paramList->Parameters[oldNum].StateIndexes[i] = state[i]; } + + return (GLint) oldNum; } -- cgit v1.2.3 From 1708e24f654706565633c4deacf83f7422e5b2a7 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 19 Feb 2016 13:26:43 -0500 Subject: mesa: add ES3_1_compatibility extension enable Signed-off-by: Ilia Mirkin Reviewed-by: Dave Airlie --- src/mesa/main/extensions_table.h | 1 + src/mesa/main/mtypes.h | 1 + 2 files changed, 2 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index 7c36b1e1d0b..119039083f6 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -27,6 +27,7 @@ EXT(APPLE_texture_max_level , dummy_true EXT(APPLE_vertex_array_object , dummy_true , GLL, x , x , x , 2002) EXT(ARB_ES2_compatibility , ARB_ES2_compatibility , GLL, GLC, x , x , 2009) +EXT(ARB_ES3_1_compatibility , ARB_ES3_1_compatibility , x , GLC, x , x , 2014) EXT(ARB_ES3_compatibility , ARB_ES3_compatibility , GLL, GLC, x , x , 2012) EXT(ARB_arrays_of_arrays , ARB_arrays_of_arrays , GLL, GLC, x , x , 2012) EXT(ARB_base_instance , ARB_base_instance , GLL, GLC, x , x , 2011) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index e579794de3c..b2060c282f4 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3774,6 +3774,7 @@ struct gl_extensions GLboolean ANGLE_texture_compression_dxt; GLboolean ARB_ES2_compatibility; GLboolean ARB_ES3_compatibility; + GLboolean ARB_ES3_1_compatibility; GLboolean ARB_arrays_of_arrays; GLboolean ARB_base_instance; GLboolean ARB_blend_func_extended; -- cgit v1.2.3 From d50ffb5e464e18894eaf8cb779df3a74f5836ef9 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Fri, 8 Jan 2016 15:48:44 -0500 Subject: mesa: add always-false-for-now enables for GL 4.3, 4.4, 4.5. As the relevant extensions get implemented, the lines should be uncommented. I believe this is (almost) everything needed for those GL versions though. Signed-off-by: Ilia Mirkin Reviewed-by: Dave Airlie --- src/mesa/main/version.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/version.c b/src/mesa/main/version.c index 112a73dc0e7..2af3653f7bb 100644 --- a/src/mesa/main/version.c +++ b/src/mesa/main/version.c @@ -351,8 +351,55 @@ compute_version(const struct gl_extensions *extensions, extensions->ARB_shading_language_packing && extensions->ARB_texture_compression_bptc && extensions->ARB_transform_feedback_instanced); - - if (ver_4_2) { + const bool ver_4_3 = (ver_4_2 && + consts->GLSLVersion >= 430 && + extensions->ARB_ES3_compatibility && + extensions->ARB_arrays_of_arrays && + extensions->ARB_compute_shader && + extensions->ARB_copy_image && + extensions->ARB_explicit_uniform_location && + extensions->ARB_fragment_layer_viewport && + extensions->ARB_framebuffer_no_attachments && + extensions->ARB_internalformat_query2 && + /* extensions->ARB_robust_buffer_access_behavior */ 0 && + extensions->ARB_shader_image_size && + extensions->ARB_shader_storage_buffer_object && + extensions->ARB_stencil_texturing && + extensions->ARB_texture_buffer_range && + extensions->ARB_texture_query_levels && + extensions->ARB_texture_view); + const bool ver_4_4 = (ver_4_3 && + consts->GLSLVersion >= 440 && + extensions->ARB_buffer_storage && + extensions->ARB_clear_texture && + extensions->ARB_enhanced_layouts && + extensions->ARB_query_buffer_object && + extensions->ARB_texture_mirror_clamp_to_edge && + extensions->ARB_texture_stencil8 && + extensions->ARB_vertex_type_10f_11f_11f_rev); + const bool ver_4_5 = (ver_4_4 && + consts->GLSLVersion >= 450 && + extensions->ARB_ES3_1_compatibility && + extensions->ARB_clip_control && + extensions->ARB_conditional_render_inverted && + /* extensions->ARB_cull_distance */ 0 && + extensions->ARB_derivative_control && + extensions->ARB_shader_texture_image_samples && + extensions->NV_texture_barrier); + + if (ver_4_5) { + major = 4; + minor = 5; + } + else if (ver_4_4) { + major = 4; + minor = 4; + } + else if (ver_4_3) { + major = 4; + minor = 3; + } + else if (ver_4_2) { major = 4; minor = 2; } -- cgit v1.2.3 From 807e2c27ac03718df8f53c2f28686dfed4c12677 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 3 Apr 2016 02:28:53 -0400 Subject: mesa: expose EXT_polygon_offset_clamp in ES contexts The extension spec was extended to also support ES. This functionality is provided all the way back to ES 1.0. Signed-off-by: Ilia Mirkin Reviewed-by: Dave Airlie --- src/mapi/glapi/gen/gl_API.xml | 2 +- src/mesa/main/extensions_table.h | 2 +- src/mesa/main/get_hash_params.py | 6 +++--- src/mesa/main/tests/dispatch_sanity.cpp | 6 ++++++ 4 files changed, 11 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml index 8b49f915169..cdd680c70ee 100644 --- a/src/mapi/glapi/gen/gl_API.xml +++ b/src/mapi/glapi/gen/gl_API.xml @@ -12741,7 +12741,7 @@ - + diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index 119039083f6..a31a9a1dc51 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -214,7 +214,7 @@ EXT(EXT_packed_pixels , dummy_true EXT(EXT_pixel_buffer_object , EXT_pixel_buffer_object , GLL, GLC, x , x , 2004) EXT(EXT_point_parameters , EXT_point_parameters , GLL, x , x , x , 1997) EXT(EXT_polygon_offset , dummy_true , GLL, x , x , x , 1995) -EXT(EXT_polygon_offset_clamp , EXT_polygon_offset_clamp , GLL, GLC, x , x , 2014) +EXT(EXT_polygon_offset_clamp , EXT_polygon_offset_clamp , GLL, GLC, ES1, ES2, 2014) EXT(EXT_provoking_vertex , EXT_provoking_vertex , GLL, GLC, x , x , 2009) EXT(EXT_read_format_bgra , dummy_true , x , x , ES1, ES2, 2009) EXT(EXT_rescale_normal , dummy_true , GLL, x , x , x , 1997) diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index a0cc4f8e842..59d6557eb4a 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -135,6 +135,9 @@ descriptor=[ [ "MAX_LABEL_LENGTH", "CONST(MAX_LABEL_LENGTH), NO_EXTRA" ], [ "MAX_DEBUG_GROUP_STACK_DEPTH", "CONST(MAX_DEBUG_GROUP_STACK_DEPTH), NO_EXTRA" ], [ "DEBUG_GROUP_STACK_DEPTH", "LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA" ], + +# GL_EXT_polygon_offset_clamp + [ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ], ]}, # Enums in OpenGL and GLES1 @@ -857,9 +860,6 @@ descriptor=[ # GL_ARB_shader_image_load_store [ "MAX_IMAGE_SAMPLES", "CONTEXT_INT(Const.MaxImageSamples), extra_ARB_shader_image_load_store" ], -# GL_EXT_polygon_offset_clamp - [ "POLYGON_OFFSET_CLAMP_EXT", "CONTEXT_FLOAT(Polygon.OffsetClamp), extra_EXT_polygon_offset_clamp" ], - # GL_ARB_shader_storage_buffer_object [ "MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ], [ "MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object" ], diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index 9f278be47ca..c6599fd5cea 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -2064,6 +2064,9 @@ const struct function gles11_functions_possible[] = { { "glObjectLabelKHR", 11, -1 }, { "glObjectPtrLabelKHR", 11, -1 }, + /* GL_EXT_polygon_offset_clamp */ + { "glPolygonOffsetClampEXT", 11, -1 }, + { NULL, 0, -1 } }; @@ -2300,6 +2303,9 @@ const struct function gles2_functions_possible[] = { { "glObjectLabelKHR", 20, -1 }, { "glObjectPtrLabelKHR", 20, -1 }, + /* GL_EXT_polygon_offset_clamp */ + { "glPolygonOffsetClampEXT", 11, -1 }, + { NULL, 0, -1 } }; -- cgit v1.2.3 From d76e1cd2dd24d0807a8db78fb6f7ebfeb50f22b4 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 3 Apr 2016 02:44:50 -0400 Subject: mesa: expose EXT_base_instance in ES3 contexts This extension is identical to ARB_base_instance. Reuse the same entrypoints. Signed-off-by: Ilia Mirkin Reviewed-by: Dave Airlie --- src/mapi/glapi/gen/es_EXT.xml | 34 +++++++++++++++++++++++++++++++++ src/mesa/main/extensions_table.h | 1 + src/mesa/main/tests/dispatch_sanity.cpp | 5 +++++ src/mesa/vbo/vbo_exec_array.c | 2 +- 4 files changed, 41 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml index 3b2c15ebf5c..bf67eae40d4 100644 --- a/src/mapi/glapi/gen/es_EXT.xml +++ b/src/mapi/glapi/gen/es_EXT.xml @@ -924,6 +924,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Date: Mon, 4 Apr 2016 07:54:28 +0100 Subject: mesa/get: fix MAX_GEOMETRY_SHADER_STORAGE_BLOCKS this was returning the fragment shader value. Reviewed-by: Kenneth Graunke Signed-off-by: Dave Airlie --- src/mesa/main/get_hash_params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index 59d6557eb4a..33ec60d9ce0 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -535,7 +535,7 @@ descriptor=[ [ "MAX_GEOMETRY_ATOMIC_COUNTERS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters), extra_ARB_shader_atomic_counters_and_geometry_shader" ], # GL_ARB_shader_storage_buffer_object / geometry shader - [ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_and_geometry_shader" ], + [ "MAX_GEOMETRY_SHADER_STORAGE_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks), extra_ARB_shader_storage_buffer_object_and_geometry_shader" ], # GL_ARB_uniform_buffer_object / geometry shader [ "MAX_GEOMETRY_UNIFORM_BLOCKS", "CONTEXT_INT(Const.Program[MESA_SHADER_GEOMETRY].MaxUniformBlocks), extra_ARB_uniform_buffer_object_and_geometry_shader" ], -- cgit v1.2.3 From 3aa51e02d6659c8f2e34a4153ae140a4f90cc51b Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 4 Apr 2016 00:45:25 -0700 Subject: i965: Allow 8x MSAA on >= 64bpp formats on Gen8+. See commit 3b0279a69 - this restriction is documented in the "Surface Format" field of RENDER_SURFACE_STATE. Looking at newer documentation, this restriction appears to exist on Haswell, but no longer applies on Gen8+. Signed-off-by: Kenneth Graunke Reviewed-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_surface_formats.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c b/src/mesa/drivers/dri/i965/brw_surface_formats.c index 3c0b23b4a42..ba9b5b90b63 100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@ -806,7 +806,8 @@ brw_render_target_supported(struct brw_context *brw, /* Under some conditions, MSAA is not supported for formats whose width is * more than 64 bits. */ - if (rb->NumSamples > 0 && _mesa_get_format_bytes(format) > 8) { + if (brw->gen < 8 && + rb->NumSamples > 0 && _mesa_get_format_bytes(format) > 8) { /* Gen6: MSAA on >64 bit formats is unsupported. */ if (brw->gen <= 6) return false; -- cgit v1.2.3 From 65fbc43d54403905e3eaea02372b5a364dc1d773 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 27 Jan 2016 12:21:04 -0800 Subject: i965: Add an INTEL_PRECISE_TRIG=1 option to fix SIN/COS output range. The SIN and COS instructions on Intel hardware can produce values slightly outside of the [-1.0, 1.0] range for a small set of values. Obviously, this can break everyone's expectations about trig functions. According to an internal presentation, the COS instruction can produce a value up to 1.000027 for inputs in the range (0.08296, 0.09888). One suggested workaround is to multiply by 0.99997, scaling down the amplitude slightly. Apparently this also minimizes the error function, reducing the maximum error from 0.00006 to about 0.00003. When enabled, fixes 16 dEQP precision tests dEQP-GLES31.functional.shaders.builtin_functions.precision. {cos,sin}.{highp,mediump}_compute.{scalar,vec2,vec4,vec4}. at the cost of making every sin and cos call more expensive (about twice the number of cycles on recent hardware). Enabling this option has been shown to reduce GPUTest Volplosion performance by about 10%. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_compiler.c | 2 ++ src/mesa/drivers/dri/i965/brw_compiler.h | 6 ++++++ src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 16 ++++++++++++++-- src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 16 ++++++++++++++-- 4 files changed, 36 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c index 3da6aac2cbf..6509267a52e 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.c +++ b/src/mesa/drivers/dri/i965/brw_compiler.c @@ -147,6 +147,8 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) brw_fs_alloc_reg_sets(compiler); brw_vec4_alloc_reg_set(compiler); + compiler->precise_trig = env_var_as_boolean("INTEL_PRECISE_TRIG", false); + compiler->scalar_stage[MESA_SHADER_VERTEX] = devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS); compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false; diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index 27a95a3c661..231e0001d54 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -92,6 +92,12 @@ struct brw_compiler { bool scalar_stage[MESA_SHADER_STAGES]; struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES]; + + /** + * Apply workarounds for SIN and COS output range problems. + * This can negatively impact performance. + */ + bool precise_trig; }; diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 7839428c52e..5cca91ec5b4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -775,12 +775,24 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) break; case nir_op_fsin: - inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]); + if (!compiler->precise_trig) { + inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]); + } else { + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F); + inst = bld.emit(SHADER_OPCODE_SIN, tmp, op[0]); + inst = bld.MUL(result, tmp, brw_imm_f(0.99997)); + } inst->saturate = instr->dest.saturate; break; case nir_op_fcos: - inst = bld.emit(SHADER_OPCODE_COS, result, op[0]); + if (!compiler->precise_trig) { + inst = bld.emit(SHADER_OPCODE_COS, result, op[0]); + } else { + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F); + inst = bld.emit(SHADER_OPCODE_COS, tmp, op[0]); + inst = bld.MUL(result, tmp, brw_imm_f(0.99997)); + } inst->saturate = instr->dest.saturate; break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index ee6929b16a2..6c8fd06fb5e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1101,12 +1101,24 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) break; case nir_op_fsin: - inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]); + if (!compiler->precise_trig) { + inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]); + } else { + src_reg tmp = src_reg(this, glsl_type::vec4_type); + inst = emit_math(SHADER_OPCODE_SIN, dst_reg(tmp), op[0]); + inst = emit(MUL(dst, tmp, brw_imm_f(0.99997))); + } inst->saturate = instr->dest.saturate; break; case nir_op_fcos: - inst = emit_math(SHADER_OPCODE_COS, dst, op[0]); + if (!compiler->precise_trig) { + inst = emit_math(SHADER_OPCODE_COS, dst, op[0]); + } else { + src_reg tmp = src_reg(this, glsl_type::vec4_type); + inst = emit_math(SHADER_OPCODE_COS, dst_reg(tmp), op[0]); + inst = emit(MUL(dst, tmp, brw_imm_f(0.99997))); + } inst->saturate = instr->dest.saturate; break; -- cgit v1.2.3 From 88ef2476dcdd61000cbae7ded9c8fa52927429d8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 28 Mar 2016 11:47:27 -0700 Subject: i965/peephole_ffma: Only match a mul+add if none of the ops are exact Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c index 6e8b1f99505..22ff2e3c9f8 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c +++ b/src/mesa/drivers/dri/i965/brw_nir_opt_peephole_ffma.c @@ -84,6 +84,17 @@ get_mul_for_src(nir_alu_src *src, int num_components, return NULL; nir_alu_instr *alu = nir_instr_as_alu(instr); + + /* We want to bail if any of the other ALU operations involved is labled + * exact. One reason for this is that, while the value that is changing is + * actually the result of the add and not the multiply, the intention of + * the user when they specify an exact multiply is that they want *that* + * value and what they don't care about is the add. Another reason is that + * SPIR-V explicitly requires this behaviour. + */ + if (alu->exact) + return NULL; + switch (alu->op) { case nir_op_imov: case nir_op_fmov: -- cgit v1.2.3