summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2015-10-08 17:09:54 -0700
committerJason Ekstrand <[email protected]>2015-10-19 08:47:03 -0700
commit6980372010ad5929c0b4b0a0370d281cbd6f8b2e (patch)
treef9199c4e5a5481db70e324b374abddf198f9a27a /src/mesa/drivers
parent4467344c829f1dccdf74e27bef2c5fda72552be6 (diff)
i965: Move the entire compiler API into a single file
At this point, the compiler API has been substantially simplified. In the spirit of Kristian's making a compiler library, this commit makes a single header file that contains, more-or-less, the entire compiler API. There's still a bit of cleanup to do particularly in the area of geometry shaders. However, this gets us much closer to having a separate compiler. Reviewed-by: Topi Pohjolainen <[email protected]>
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/dri/i965/Makefile.sources1
-rw-r--r--src/mesa/drivers/dri/i965/brw_compiler.h661
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h356
-rw-r--r--src/mesa/drivers/dri/i965/brw_cs.h17
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.h124
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h58
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h33
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h12
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h20
10 files changed, 665 insertions, 619 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 81ef6283fa1..c2438bda356 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -1,6 +1,7 @@
i965_compiler_FILES = \
brw_cfg.cpp \
brw_cfg.h \
+ brw_compiler.h \
brw_cubemap_normalize.cpp \
brw_dead_control_flow.cpp \
brw_dead_control_flow.h \
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
new file mode 100644
index 00000000000..11c485d2f08
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -0,0 +1,661 @@
+/*
+ * Copyright © 2010 - 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include "brw_device_info.h"
+#include "main/mtypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ra_regs;
+struct nir_shader;
+struct brw_geometry_program;
+union gl_constant_value;
+
+struct brw_compiler {
+ const struct brw_device_info *devinfo;
+
+ struct {
+ struct ra_regs *regs;
+
+ /**
+ * Array of the ra classes for the unaligned contiguous register
+ * block sizes used.
+ */
+ int *classes;
+
+ /**
+ * Mapping for register-allocated objects in *regs to the first
+ * GRF for that object.
+ */
+ uint8_t *ra_reg_to_grf;
+ } vec4_reg_set;
+
+ struct {
+ struct ra_regs *regs;
+
+ /**
+ * Array of the ra classes for the unaligned contiguous register
+ * block sizes used, indexed by register size.
+ */
+ int classes[16];
+
+ /**
+ * Mapping from classes to ra_reg ranges. Each of the per-size
+ * classes corresponds to a range of ra_reg nodes. This array stores
+ * those ranges in the form of first ra_reg in each class and the
+ * total number of ra_reg elements in the last array element. This
+ * way the range of the i'th class is given by:
+ * [ class_to_ra_reg_range[i], class_to_ra_reg_range[i+1] )
+ */
+ int class_to_ra_reg_range[17];
+
+ /**
+ * Mapping for register-allocated objects in *regs to the first
+ * GRF for that object.
+ */
+ uint8_t *ra_reg_to_grf;
+
+ /**
+ * ra class for the aligned pairs we use for PLN, which doesn't
+ * appear in *classes.
+ */
+ int aligned_pairs_class;
+ } fs_reg_sets[2];
+
+ void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
+ void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
+
+ bool scalar_vs;
+ struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
+};
+
+
+/**
+ * Program key structures.
+ *
+ * When drawing, we look for the currently bound shaders in the program
+ * cache. This is essentially a hash table lookup, and these are the keys.
+ *
+ * Sometimes OpenGL features specified as state need to be simulated via
+ * shader code, due to a mismatch between the API and the hardware. This
+ * is often referred to as "non-orthagonal state" or "NOS". We store NOS
+ * in the program key so it's considered when searching for a program. If
+ * we haven't seen a particular combination before, we have to recompile a
+ * new specialized version.
+ *
+ * Shader compilation should not look up state in gl_context directly, but
+ * instead use the copy in the program key. This guarantees recompiles will
+ * happen correctly.
+ *
+ * @{
+ */
+
+enum PACKED gen6_gather_sampler_wa {
+ WA_SIGN = 1, /* whether we need to sign extend */
+ WA_8BIT = 2, /* if we have an 8bit format needing wa */
+ WA_16BIT = 4, /* if we have a 16bit format needing wa */
+};
+
+/**
+ * Sampler information needed by VS, WM, and GS program cache keys.
+ */
+struct brw_sampler_prog_key_data {
+ /**
+ * EXT_texture_swizzle and DEPTH_TEXTURE_MODE swizzles.
+ */
+ uint16_t swizzles[MAX_SAMPLERS];
+
+ uint32_t gl_clamp_mask[3];
+
+ /**
+ * For RG32F, gather4's channel select is broken.
+ */
+ uint32_t gather_channel_quirk_mask;
+
+ /**
+ * Whether this sampler uses the compressed multisample surface layout.
+ */
+ uint32_t compressed_multisample_layout_mask;
+
+ /**
+ * For Sandybridge, which shader w/a we need for gather quirks.
+ */
+ enum gen6_gather_sampler_wa gen6_gather_wa[MAX_SAMPLERS];
+};
+
+
+/** The program key for Vertex Shaders. */
+struct brw_vs_prog_key {
+ unsigned program_string_id;
+
+ /*
+ * Per-attribute workaround flags
+ */
+ uint8_t gl_attrib_wa_flags[VERT_ATTRIB_MAX];
+
+ bool copy_edgeflag:1;
+
+ bool clamp_vertex_color:1;
+
+ /**
+ * How many user clipping planes are being uploaded to the vertex shader as
+ * push constants.
+ *
+ * These are used for lowering legacy gl_ClipVertex/gl_Position clipping to
+ * clip distances.
+ */
+ unsigned nr_userclip_plane_consts:4;
+
+ /**
+ * For pre-Gen6 hardware, a bitfield indicating which texture coordinates
+ * are going to be replaced with point coordinates (as a consequence of a
+ * call to glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)). Because
+ * our SF thread requires exact matching between VS outputs and FS inputs,
+ * these texture coordinates will need to be unconditionally included in
+ * the VUE, even if they aren't written by the vertex shader.
+ */
+ uint8_t point_coord_replace;
+
+ struct brw_sampler_prog_key_data tex;
+};
+
+/** The program key for Geometry Shaders. */
+struct brw_gs_prog_key
+{
+ unsigned program_string_id;
+
+ struct brw_sampler_prog_key_data tex;
+};
+
+/** The program key for Fragment/Pixel Shaders. */
+struct brw_wm_prog_key {
+ uint8_t iz_lookup;
+ bool stats_wm:1;
+ bool flat_shade:1;
+ bool persample_shading:1;
+ bool persample_2x:1;
+ unsigned nr_color_regions:5;
+ bool replicate_alpha:1;
+ bool render_to_fbo:1;
+ bool clamp_fragment_color:1;
+ bool compute_pos_offset:1;
+ bool compute_sample_id:1;
+ unsigned line_aa:2;
+ bool high_quality_derivatives:1;
+
+ uint16_t drawable_height;
+ uint64_t input_slots_valid;
+ unsigned program_string_id;
+ GLenum alpha_test_func; /* < For Gen4/5 MRT alpha test */
+ float alpha_test_ref;
+
+ struct brw_sampler_prog_key_data tex;
+};
+
+struct brw_cs_prog_key {
+ uint32_t program_string_id;
+ struct brw_sampler_prog_key_data tex;
+};
+
+/*
+ * Image metadata structure as laid out in the shader parameter
+ * buffer. Entries have to be 16B-aligned for the vec4 back-end to be
+ * able to use them. That's okay because the padding and any unused
+ * entries [most of them except when we're doing untyped surface
+ * access] will be removed by the uniform packing pass.
+ */
+#define BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET 0
+#define BRW_IMAGE_PARAM_OFFSET_OFFSET 4
+#define BRW_IMAGE_PARAM_SIZE_OFFSET 8
+#define BRW_IMAGE_PARAM_STRIDE_OFFSET 12
+#define BRW_IMAGE_PARAM_TILING_OFFSET 16
+#define BRW_IMAGE_PARAM_SWIZZLING_OFFSET 20
+#define BRW_IMAGE_PARAM_SIZE 24
+
+struct brw_image_param {
+ /** Surface binding table index. */
+ uint32_t surface_idx;
+
+ /** Offset applied to the X and Y surface coordinates. */
+ uint32_t offset[2];
+
+ /** Surface X, Y and Z dimensions. */
+ uint32_t size[3];
+
+ /** X-stride in bytes, Y-stride in pixels, horizontal slice stride in
+ * pixels, vertical slice stride in pixels.
+ */
+ uint32_t stride[4];
+
+ /** Log2 of the tiling modulus in the X, Y and Z dimension. */
+ uint32_t tiling[3];
+
+ /**
+ * Right shift to apply for bit 6 address swizzling. Two different
+ * swizzles can be specified and will be applied one after the other. The
+ * resulting address will be:
+ *
+ * addr' = addr ^ ((1 << 6) & ((addr >> swizzling[0]) ^
+ * (addr >> swizzling[1])))
+ *
+ * Use \c 0xff if any of the swizzles is not required.
+ */
+ uint32_t swizzling[2];
+};
+
+struct brw_stage_prog_data {
+ struct {
+ /** size of our binding table. */
+ uint32_t size_bytes;
+
+ /** @{
+ * surface indices for the various groups of surfaces
+ */
+ uint32_t pull_constants_start;
+ uint32_t texture_start;
+ uint32_t gather_texture_start;
+ uint32_t ubo_start;
+ uint32_t ssbo_start;
+ uint32_t abo_start;
+ uint32_t image_start;
+ uint32_t shader_time_start;
+ /** @} */
+ } binding_table;
+
+ GLuint nr_params; /**< number of float params/constants */
+ GLuint nr_pull_params;
+ unsigned nr_image_params;
+
+ unsigned curb_read_length;
+ unsigned total_scratch;
+
+ /**
+ * Register where the thread expects to find input data from the URB
+ * (typically uniforms, followed by vertex or fragment attributes).
+ */
+ unsigned dispatch_grf_start_reg;
+
+ bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */
+
+ /* Pointers to tracked values (only valid once
+ * _mesa_load_state_parameters has been called at runtime).
+ */
+ const union gl_constant_value **param;
+ const union gl_constant_value **pull_param;
+
+ /** Image metadata passed to the shader as uniforms. */
+ struct brw_image_param *image_param;
+};
+
+/* Data about a particular attempt to compile a program. Note that
+ * there can be many of these, each in a different GL state
+ * corresponding to a different brw_wm_prog_key struct, with different
+ * compiled programs.
+ */
+struct brw_wm_prog_data {
+ struct brw_stage_prog_data base;
+
+ GLuint num_varying_inputs;
+
+ GLuint dispatch_grf_start_reg_16;
+ GLuint reg_blocks;
+ GLuint reg_blocks_16;
+
+ struct {
+ /** @{
+ * surface indices the WM-specific surfaces
+ */
+ uint32_t render_target_start;
+ /** @} */
+ } binding_table;
+
+ uint8_t computed_depth_mode;
+
+ bool early_fragment_tests;
+ bool no_8;
+ bool dual_src_blend;
+ bool uses_pos_offset;
+ bool uses_omask;
+ bool uses_kill;
+ bool pulls_bary;
+ uint32_t prog_offset_16;
+
+ /**
+ * Mask of which interpolation modes are required by the fragment shader.
+ * Used in hardware setup on gen6+.
+ */
+ uint32_t barycentric_interp_modes;
+
+ /**
+ * Map from gl_varying_slot to the position within the FS setup data
+ * payload where the varying's attribute vertex deltas should be delivered.
+ * For varying slots that are not used by the FS, the value is -1.
+ */
+ int urb_setup[VARYING_SLOT_MAX];
+};
+
+struct brw_cs_prog_data {
+ struct brw_stage_prog_data base;
+
+ GLuint dispatch_grf_start_reg_16;
+ unsigned local_size[3];
+ unsigned simd_size;
+ bool uses_barrier;
+ bool uses_num_work_groups;
+ unsigned local_invocation_id_regs;
+
+ struct {
+ /** @{
+ * surface indices the CS-specific surfaces
+ */
+ uint32_t work_groups_start;
+ /** @} */
+ } binding_table;
+};
+
+/**
+ * Enum representing the i965-specific vertex results that don't correspond
+ * exactly to any element of gl_varying_slot. The values of this enum are
+ * assigned such that they don't conflict with gl_varying_slot.
+ */
+typedef enum
+{
+ BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX,
+ BRW_VARYING_SLOT_PAD,
+ /**
+ * Technically this is not a varying but just a placeholder that
+ * compile_sf_prog() inserts into its VUE map to cause the gl_PointCoord
+ * builtin variable to be compiled correctly. see compile_sf_prog() for
+ * more info.
+ */
+ BRW_VARYING_SLOT_PNTC,
+ BRW_VARYING_SLOT_COUNT
+} brw_varying_slot;
+
+/**
+ * Data structure recording the relationship between the gl_varying_slot enum
+ * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a
+ * single octaword within the VUE (128 bits).
+ *
+ * Note that each BRW register contains 256 bits (2 octawords), so when
+ * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
+ * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as
+ * in a vertex shader), each register corresponds to a single VUE slot, since
+ * it contains data for two separate vertices.
+ */
+struct brw_vue_map {
+ /**
+ * Bitfield representing all varying slots that are (a) stored in this VUE
+ * map, and (b) actually written by the shader. Does not include any of
+ * the additional varying slots defined in brw_varying_slot.
+ */
+ GLbitfield64 slots_valid;
+
+ /**
+ * Is this VUE map for a separate shader pipeline?
+ *
+ * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
+ * without the linker having a chance to dead code eliminate unused varyings.
+ *
+ * This means that we have to use a fixed slot layout, based on the output's
+ * location field, rather than assigning slots in a compact contiguous block.
+ */
+ bool separate;
+
+ /**
+ * Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are
+ * not stored in a slot (because they are not written, or because
+ * additional processing is applied before storing them in the VUE), the
+ * value is -1.
+ */
+ signed char varying_to_slot[BRW_VARYING_SLOT_COUNT];
+
+ /**
+ * Map from VUE slot to gl_varying_slot value. For slots that do not
+ * directly correspond to a gl_varying_slot, the value comes from
+ * brw_varying_slot.
+ *
+ * For slots that are not in use, the value is BRW_VARYING_SLOT_COUNT (this
+ * simplifies code that uses the value stored in slot_to_varying to
+ * create a bit mask).
+ */
+ signed char slot_to_varying[BRW_VARYING_SLOT_COUNT];
+
+ /**
+ * Total number of VUE slots in use
+ */
+ int num_slots;
+};
+
+/**
+ * Convert a VUE slot number into a byte offset within the VUE.
+ */
+static inline GLuint brw_vue_slot_to_offset(GLuint slot)
+{
+ return 16*slot;
+}
+
+/**
+ * Convert a vertex output (brw_varying_slot) into a byte offset within the
+ * VUE.
+ */
+static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map,
+ GLuint varying)
+{
+ return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]);
+}
+
+void brw_compute_vue_map(const struct brw_device_info *devinfo,
+ struct brw_vue_map *vue_map,
+ GLbitfield64 slots_valid,
+ bool separate_shader);
+
+enum shader_dispatch_mode {
+ DISPATCH_MODE_4X1_SINGLE = 0,
+ DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
+ DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
+ DISPATCH_MODE_SIMD8 = 3,
+};
+
+struct brw_vue_prog_data {
+ struct brw_stage_prog_data base;
+ struct brw_vue_map vue_map;
+
+ GLuint urb_read_length;
+ GLuint total_grf;
+
+ /* Used for calculating urb partitions. In the VS, this is the size of the
+ * URB entry used for both input and output to the thread. In the GS, this
+ * is the size of the URB entry used for output.
+ */
+ GLuint urb_entry_size;
+
+ enum shader_dispatch_mode dispatch_mode;
+};
+
+struct brw_vs_prog_data {
+ struct brw_vue_prog_data base;
+
+ GLbitfield64 inputs_read;
+
+ unsigned nr_attributes;
+
+ bool uses_vertexid;
+ bool uses_instanceid;
+};
+
+struct brw_gs_prog_data
+{
+ struct brw_vue_prog_data base;
+
+ /**
+ * Size of an output vertex, measured in HWORDS (32 bytes).
+ */
+ unsigned output_vertex_size_hwords;
+
+ unsigned output_topology;
+
+ /**
+ * Size of the control data (cut bits or StreamID bits), in hwords (32
+ * bytes). 0 if there is no control data.
+ */
+ unsigned control_data_header_size_hwords;
+
+ /**
+ * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
+ * if the control data is StreamID bits, or
+ * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
+ * Ignored if control_data_header_size is 0.
+ */
+ unsigned control_data_format;
+
+ bool include_primitive_id;
+
+ /**
+ * The number of vertices emitted, if constant - otherwise -1.
+ */
+ int static_vertex_count;
+
+ int invocations;
+
+ /**
+ * Gen6 transform feedback enabled flag.
+ */
+ bool gen6_xfb_enabled;
+
+ /**
+ * Gen6: Provoking vertex convention for odd-numbered triangles
+ * in tristrips.
+ */
+ GLuint pv_first:1;
+
+ /**
+ * Gen6: Number of varyings that are output to transform feedback.
+ */
+ GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */
+
+ /**
+ * Gen6: Map from the index of a transform feedback binding table entry to the
+ * gl_varying_slot that should be streamed out through that binding table
+ * entry.
+ */
+ unsigned char transform_feedback_bindings[64 /* BRW_MAX_SOL_BINDINGS */];
+
+ /**
+ * Gen6: Map from the index of a transform feedback binding table entry to the
+ * swizzles that should be used when streaming out data through that
+ * binding table entry.
+ */
+ unsigned char transform_feedback_swizzles[64 /* BRW_MAX_SOL_BINDINGS */];
+};
+
+
+/** @} */
+
+/**
+ * Compile a vertex shader.
+ *
+ * Returns the final assembly and the program's size.
+ */
+const unsigned *
+brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
+ void *mem_ctx,
+ const struct brw_vs_prog_key *key,
+ struct brw_vs_prog_data *prog_data,
+ const struct nir_shader *shader,
+ gl_clip_plane *clip_planes,
+ bool use_legacy_snorm_formula,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str);
+
+/**
+ * Scratch data used when compiling a GLSL geometry shader.
+ */
+struct brw_gs_compile
+{
+ struct brw_gs_prog_key key;
+ struct brw_gs_prog_data prog_data;
+ struct brw_vue_map input_vue_map;
+
+ struct brw_geometry_program *gp;
+
+ unsigned control_data_bits_per_vertex;
+ unsigned control_data_header_size_bits;
+};
+
+/**
+ * Compile a vertex shader.
+ *
+ * Returns the final assembly and the program's size.
+ */
+const unsigned *
+brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
+ struct brw_gs_compile *c,
+ const struct nir_shader *shader,
+ struct gl_shader_program *shader_prog,
+ void *mem_ctx,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str);
+
+/**
+ * Compile a fragment shader.
+ *
+ * Returns the final assembly and the program's size.
+ */
+const unsigned *
+brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
+ void *mem_ctx,
+ const struct brw_wm_prog_key *key,
+ struct brw_wm_prog_data *prog_data,
+ const struct nir_shader *shader,
+ struct gl_program *prog,
+ int shader_time_index8,
+ int shader_time_index16,
+ bool use_rep_send,
+ unsigned *final_assembly_size,
+ char **error_str);
+
+/**
+ * Compile a compute shader.
+ *
+ * Returns the final assembly and the program's size.
+ */
+const unsigned *
+brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
+ void *mem_ctx,
+ const struct brw_cs_prog_key *key,
+ struct brw_cs_prog_data *prog_data,
+ const struct nir_shader *shader,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 6b2bbd21703..3b125448e14 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -51,7 +51,7 @@
#include "brw_context.h"
#include "brw_defines.h"
-#include "brw_shader.h"
+#include "brw_compiler.h"
#include "brw_draw.h"
#include "brw_state.h"
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 4aba7b814c6..4f503ae4869 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -40,6 +40,7 @@
#include "main/mm.h"
#include "main/mtypes.h"
#include "brw_structs.h"
+#include "brw_compiler.h"
#include "intel_aub.h"
#include "program/prog_parameter.h"
@@ -340,261 +341,6 @@ struct brw_shader {
bool compiled_once;
};
-struct brw_stage_prog_data {
- struct {
- /** size of our binding table. */
- uint32_t size_bytes;
-
- /** @{
- * surface indices for the various groups of surfaces
- */
- uint32_t pull_constants_start;
- uint32_t texture_start;
- uint32_t gather_texture_start;
- uint32_t ubo_start;
- uint32_t ssbo_start;
- uint32_t abo_start;
- uint32_t image_start;
- uint32_t shader_time_start;
- /** @} */
- } binding_table;
-
- GLuint nr_params; /**< number of float params/constants */
- GLuint nr_pull_params;
- unsigned nr_image_params;
-
- unsigned curb_read_length;
- unsigned total_scratch;
-
- /**
- * Register where the thread expects to find input data from the URB
- * (typically uniforms, followed by vertex or fragment attributes).
- */
- unsigned dispatch_grf_start_reg;
-
- bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */
-
- /* Pointers to tracked values (only valid once
- * _mesa_load_state_parameters has been called at runtime).
- */
- const gl_constant_value **param;
- const gl_constant_value **pull_param;
-
- /** Image metadata passed to the shader as uniforms. */
- struct brw_image_param *image_param;
-};
-
-/*
- * Image metadata structure as laid out in the shader parameter
- * buffer. Entries have to be 16B-aligned for the vec4 back-end to be
- * able to use them. That's okay because the padding and any unused
- * entries [most of them except when we're doing untyped surface
- * access] will be removed by the uniform packing pass.
- */
-#define BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET 0
-#define BRW_IMAGE_PARAM_OFFSET_OFFSET 4
-#define BRW_IMAGE_PARAM_SIZE_OFFSET 8
-#define BRW_IMAGE_PARAM_STRIDE_OFFSET 12
-#define BRW_IMAGE_PARAM_TILING_OFFSET 16
-#define BRW_IMAGE_PARAM_SWIZZLING_OFFSET 20
-#define BRW_IMAGE_PARAM_SIZE 24
-
-struct brw_image_param {
- /** Surface binding table index. */
- uint32_t surface_idx;
-
- /** Offset applied to the X and Y surface coordinates. */
- uint32_t offset[2];
-
- /** Surface X, Y and Z dimensions. */
- uint32_t size[3];
-
- /** X-stride in bytes, Y-stride in pixels, horizontal slice stride in
- * pixels, vertical slice stride in pixels.
- */
- uint32_t stride[4];
-
- /** Log2 of the tiling modulus in the X, Y and Z dimension. */
- uint32_t tiling[3];
-
- /**
- * Right shift to apply for bit 6 address swizzling. Two different
- * swizzles can be specified and will be applied one after the other. The
- * resulting address will be:
- *
- * addr' = addr ^ ((1 << 6) & ((addr >> swizzling[0]) ^
- * (addr >> swizzling[1])))
- *
- * Use \c 0xff if any of the swizzles is not required.
- */
- uint32_t swizzling[2];
-};
-
-/* Data about a particular attempt to compile a program. Note that
- * there can be many of these, each in a different GL state
- * corresponding to a different brw_wm_prog_key struct, with different
- * compiled programs.
- */
-struct brw_wm_prog_data {
- struct brw_stage_prog_data base;
-
- GLuint num_varying_inputs;
-
- GLuint dispatch_grf_start_reg_16;
- GLuint reg_blocks;
- GLuint reg_blocks_16;
-
- struct {
- /** @{
- * surface indices the WM-specific surfaces
- */
- uint32_t render_target_start;
- /** @} */
- } binding_table;
-
- uint8_t computed_depth_mode;
-
- bool early_fragment_tests;
- bool no_8;
- bool dual_src_blend;
- bool uses_pos_offset;
- bool uses_omask;
- bool uses_kill;
- bool pulls_bary;
- uint32_t prog_offset_16;
-
- /**
- * Mask of which interpolation modes are required by the fragment shader.
- * Used in hardware setup on gen6+.
- */
- uint32_t barycentric_interp_modes;
-
- /**
- * Map from gl_varying_slot to the position within the FS setup data
- * payload where the varying's attribute vertex deltas should be delivered.
- * For varying slots that are not used by the FS, the value is -1.
- */
- int urb_setup[VARYING_SLOT_MAX];
-};
-
-struct brw_cs_prog_data {
- struct brw_stage_prog_data base;
-
- GLuint dispatch_grf_start_reg_16;
- unsigned local_size[3];
- unsigned simd_size;
- bool uses_barrier;
- bool uses_num_work_groups;
- unsigned local_invocation_id_regs;
-
- struct {
- /** @{
- * surface indices the CS-specific surfaces
- */
- uint32_t work_groups_start;
- /** @} */
- } binding_table;
-};
-
-/**
- * Enum representing the i965-specific vertex results that don't correspond
- * exactly to any element of gl_varying_slot. The values of this enum are
- * assigned such that they don't conflict with gl_varying_slot.
- */
-typedef enum
-{
- BRW_VARYING_SLOT_NDC = VARYING_SLOT_MAX,
- BRW_VARYING_SLOT_PAD,
- /**
- * Technically this is not a varying but just a placeholder that
- * compile_sf_prog() inserts into its VUE map to cause the gl_PointCoord
- * builtin variable to be compiled correctly. see compile_sf_prog() for
- * more info.
- */
- BRW_VARYING_SLOT_PNTC,
- BRW_VARYING_SLOT_COUNT
-} brw_varying_slot;
-
-
-/**
- * Data structure recording the relationship between the gl_varying_slot enum
- * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a
- * single octaword within the VUE (128 bits).
- *
- * Note that each BRW register contains 256 bits (2 octawords), so when
- * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
- * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as
- * in a vertex shader), each register corresponds to a single VUE slot, since
- * it contains data for two separate vertices.
- */
-struct brw_vue_map {
- /**
- * Bitfield representing all varying slots that are (a) stored in this VUE
- * map, and (b) actually written by the shader. Does not include any of
- * the additional varying slots defined in brw_varying_slot.
- */
- GLbitfield64 slots_valid;
-
- /**
- * Is this VUE map for a separate shader pipeline?
- *
- * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
- * without the linker having a chance to dead code eliminate unused varyings.
- *
- * This means that we have to use a fixed slot layout, based on the output's
- * location field, rather than assigning slots in a compact contiguous block.
- */
- bool separate;
-
- /**
- * Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are
- * not stored in a slot (because they are not written, or because
- * additional processing is applied before storing them in the VUE), the
- * value is -1.
- */
- signed char varying_to_slot[BRW_VARYING_SLOT_COUNT];
-
- /**
- * Map from VUE slot to gl_varying_slot value. For slots that do not
- * directly correspond to a gl_varying_slot, the value comes from
- * brw_varying_slot.
- *
- * For slots that are not in use, the value is BRW_VARYING_SLOT_COUNT (this
- * simplifies code that uses the value stored in slot_to_varying to
- * create a bit mask).
- */
- signed char slot_to_varying[BRW_VARYING_SLOT_COUNT];
-
- /**
- * Total number of VUE slots in use
- */
- int num_slots;
-};
-
-/**
- * Convert a VUE slot number into a byte offset within the VUE.
- */
-static inline GLuint brw_vue_slot_to_offset(GLuint slot)
-{
- return 16*slot;
-}
-
-/**
- * Convert a vertex output (brw_varying_slot) into a byte offset within the
- * VUE.
- */
-static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map,
- GLuint varying)
-{
- return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]);
-}
-
-void brw_compute_vue_map(const struct brw_device_info *devinfo,
- struct brw_vue_map *vue_map,
- GLbitfield64 slots_valid,
- bool separate_shader);
-
-
/**
* Bitmask indicating which fragment shader inputs represent varyings (and
* hence have to be delivered to the fragment shader by the SF/SBE stage).
@@ -671,41 +417,6 @@ struct brw_ff_gs_prog_data {
unsigned svbi_postincrement_value;
};
-enum shader_dispatch_mode {
- DISPATCH_MODE_4X1_SINGLE = 0,
- DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
- DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
- DISPATCH_MODE_SIMD8 = 3,
-};
-
-struct brw_vue_prog_data {
- struct brw_stage_prog_data base;
- struct brw_vue_map vue_map;
-
- GLuint urb_read_length;
- GLuint total_grf;
-
- /* Used for calculating urb partitions. In the VS, this is the size of the
- * URB entry used for both input and output to the thread. In the GS, this
- * is the size of the URB entry used for output.
- */
- GLuint urb_entry_size;
-
- enum shader_dispatch_mode dispatch_mode;
-};
-
-
-struct brw_vs_prog_data {
- struct brw_vue_prog_data base;
-
- GLbitfield64 inputs_read;
-
- unsigned nr_attributes;
-
- bool uses_vertexid;
- bool uses_instanceid;
-};
-
/** Number of texture sampler units */
#define BRW_MAX_TEX_UNIT 32
@@ -763,71 +474,6 @@ struct brw_vs_prog_data {
#define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
-struct brw_gs_prog_data
-{
- struct brw_vue_prog_data base;
-
- /**
- * Size of an output vertex, measured in HWORDS (32 bytes).
- */
- unsigned output_vertex_size_hwords;
-
- unsigned output_topology;
-
- /**
- * Size of the control data (cut bits or StreamID bits), in hwords (32
- * bytes). 0 if there is no control data.
- */
- unsigned control_data_header_size_hwords;
-
- /**
- * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
- * if the control data is StreamID bits, or
- * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
- * Ignored if control_data_header_size is 0.
- */
- unsigned control_data_format;
-
- bool include_primitive_id;
-
- /**
- * The number of vertices emitted, if constant - otherwise -1.
- */
- int static_vertex_count;
-
- int invocations;
-
- /**
- * Gen6 transform feedback enabled flag.
- */
- bool gen6_xfb_enabled;
-
- /**
- * Gen6: Provoking vertex convention for odd-numbered triangles
- * in tristrips.
- */
- GLuint pv_first:1;
-
- /**
- * Gen6: Number of varyings that are output to transform feedback.
- */
- GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */
-
- /**
- * Gen6: Map from the index of a transform feedback binding table entry to the
- * gl_varying_slot that should be streamed out through that binding table
- * entry.
- */
- unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS];
-
- /**
- * Gen6: Map from the index of a transform feedback binding table entry to the
- * swizzles that should be used when streaming out data through that
- * binding table entry.
- */
- unsigned char transform_feedback_swizzles[BRW_MAX_SOL_BINDINGS];
-};
-
/**
* Stride in bytes between shader_time entries.
*
diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h
index aac519f1fd1..899e340f14e 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.h
+++ b/src/mesa/drivers/dri/i965/brw_cs.h
@@ -27,11 +27,6 @@
#include "brw_program.h"
-struct brw_cs_prog_key {
- uint32_t program_string_id;
- struct brw_sampler_prog_key_data tex;
-};
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -39,18 +34,6 @@ extern "C" {
void
brw_upload_cs_prog(struct brw_context *brw);
-struct nir_shader;
-
-const unsigned *
-brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
- void *mem_ctx,
- const struct brw_cs_prog_key *key,
- struct brw_cs_prog_data *prog_data,
- const struct nir_shader *shader,
- int shader_time_index,
- unsigned *final_assembly_size,
- char **error_str);
-
void
brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
void *buffer, uint32_t threads, uint32_t stride);
diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h
index cf0522a8b10..f8cf2b062c8 100644
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -24,129 +24,7 @@
#ifndef BRW_PROGRAM_H
#define BRW_PROGRAM_H
-/**
- * Program key structures.
- *
- * When drawing, we look for the currently bound shaders in the program
- * cache. This is essentially a hash table lookup, and these are the keys.
- *
- * Sometimes OpenGL features specified as state need to be simulated via
- * shader code, due to a mismatch between the API and the hardware. This
- * is often referred to as "non-orthagonal state" or "NOS". We store NOS
- * in the program key so it's considered when searching for a program. If
- * we haven't seen a particular combination before, we have to recompile a
- * new specialized version.
- *
- * Shader compilation should not look up state in gl_context directly, but
- * instead use the copy in the program key. This guarantees recompiles will
- * happen correctly.
- *
- * @{
- */
-
-enum PACKED gen6_gather_sampler_wa {
- WA_SIGN = 1, /* whether we need to sign extend */
- WA_8BIT = 2, /* if we have an 8bit format needing wa */
- WA_16BIT = 4, /* if we have a 16bit format needing wa */
-};
-
-/**
- * Sampler information needed by VS, WM, and GS program cache keys.
- */
-struct brw_sampler_prog_key_data {
- /**
- * EXT_texture_swizzle and DEPTH_TEXTURE_MODE swizzles.
- */
- uint16_t swizzles[MAX_SAMPLERS];
-
- uint32_t gl_clamp_mask[3];
-
- /**
- * For RG32F, gather4's channel select is broken.
- */
- uint32_t gather_channel_quirk_mask;
-
- /**
- * Whether this sampler uses the compressed multisample surface layout.
- */
- uint32_t compressed_multisample_layout_mask;
-
- /**
- * For Sandybridge, which shader w/a we need for gather quirks.
- */
- enum gen6_gather_sampler_wa gen6_gather_wa[MAX_SAMPLERS];
-};
-
-
-/** The program key for Vertex Shaders. */
-struct brw_vs_prog_key {
- unsigned program_string_id;
-
- /*
- * Per-attribute workaround flags
- */
- uint8_t gl_attrib_wa_flags[VERT_ATTRIB_MAX];
-
- bool copy_edgeflag:1;
-
- bool clamp_vertex_color:1;
-
- /**
- * How many user clipping planes are being uploaded to the vertex shader as
- * push constants.
- *
- * These are used for lowering legacy gl_ClipVertex/gl_Position clipping to
- * clip distances.
- */
- unsigned nr_userclip_plane_consts:4;
-
- /**
- * For pre-Gen6 hardware, a bitfield indicating which texture coordinates
- * are going to be replaced with point coordinates (as a consequence of a
- * call to glTexEnvi(GL_POINT_SPRITE, GL_COORD_REPLACE, GL_TRUE)). Because
- * our SF thread requires exact matching between VS outputs and FS inputs,
- * these texture coordinates will need to be unconditionally included in
- * the VUE, even if they aren't written by the vertex shader.
- */
- uint8_t point_coord_replace;
-
- struct brw_sampler_prog_key_data tex;
-};
-
-/** The program key for Geometry Shaders. */
-struct brw_gs_prog_key
-{
- unsigned program_string_id;
-
- struct brw_sampler_prog_key_data tex;
-};
-
-/** The program key for Fragment/Pixel Shaders. */
-struct brw_wm_prog_key {
- uint8_t iz_lookup;
- bool stats_wm:1;
- bool flat_shade:1;
- bool persample_shading:1;
- bool persample_2x:1;
- unsigned nr_color_regions:5;
- bool replicate_alpha:1;
- bool render_to_fbo:1;
- bool clamp_fragment_color:1;
- bool compute_pos_offset:1;
- bool compute_sample_id:1;
- unsigned line_aa:2;
- bool high_quality_derivatives:1;
-
- uint16_t drawable_height;
- uint64_t input_slots_valid;
- unsigned program_string_id;
- GLenum alpha_test_func; /* < For Gen4/5 MRT alpha test */
- float alpha_test_ref;
-
- struct brw_sampler_prog_key_data tex;
-};
-
-/** @} */
+#include "brw_compiler.h"
#ifdef __cplusplus
extern "C" {
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 6d4cf048390..b33b08f40d7 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -38,64 +38,6 @@
#define MAX_SAMPLER_MESSAGE_SIZE 11
#define MAX_VGRF_SIZE 16
-struct brw_compiler {
- const struct brw_device_info *devinfo;
-
- struct {
- struct ra_regs *regs;
-
- /**
- * Array of the ra classes for the unaligned contiguous register
- * block sizes used.
- */
- int *classes;
-
- /**
- * Mapping for register-allocated objects in *regs to the first
- * GRF for that object.
- */
- uint8_t *ra_reg_to_grf;
- } vec4_reg_set;
-
- struct {
- struct ra_regs *regs;
-
- /**
- * Array of the ra classes for the unaligned contiguous register
- * block sizes used, indexed by register size.
- */
- int classes[16];
-
- /**
- * Mapping from classes to ra_reg ranges. Each of the per-size
- * classes corresponds to a range of ra_reg nodes. This array stores
- * those ranges in the form of first ra_reg in each class and the
- * total number of ra_reg elements in the last array element. This
- * way the range of the i'th class is given by:
- * [ class_to_ra_reg_range[i], class_to_ra_reg_range[i+1] )
- */
- int class_to_ra_reg_range[17];
-
- /**
- * Mapping for register-allocated objects in *regs to the first
- * GRF for that object.
- */
- uint8_t *ra_reg_to_grf;
-
- /**
- * ra class for the aligned pairs we use for PLN, which doesn't
- * appear in *classes.
- */
- int aligned_pairs_class;
- } fs_reg_sets[2];
-
- void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
- void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
-
- bool scalar_vs;
- struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES];
-};
-
enum PACKED register_file {
BAD_FILE,
GRF,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
index df33e941d24..c52552768c8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
@@ -32,39 +32,6 @@
#include "brw_vec4.h"
-/**
- * Scratch data used when compiling a GLSL geometry shader.
- */
-struct brw_gs_compile
-{
- struct brw_gs_prog_key key;
- struct brw_gs_prog_data prog_data;
- struct brw_vue_map input_vue_map;
-
- struct brw_geometry_program *gp;
-
- unsigned control_data_bits_per_vertex;
- unsigned control_data_header_size_bits;
-};
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-const unsigned *brw_compile_gs(const struct brw_compiler *compiler,
- void *log_data,
- struct brw_gs_compile *c,
- const nir_shader *shader,
- struct gl_shader_program *shader_prog,
- void *mem_ctx,
- int shader_time_index,
- unsigned *final_assembly_size,
- char **error_str);
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
#ifdef __cplusplus
namespace brw {
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index aebb76f7bd0..bcb5e7b0b2a 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -54,18 +54,6 @@
extern "C" {
#endif
-struct nir_shader;
-
-const unsigned *brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
- void *mem_ctx,
- const struct brw_vs_prog_key *key,
- struct brw_vs_prog_data *prog_data,
- const struct nir_shader *shader,
- gl_clip_plane *clip_planes,
- bool use_legacy_snorm_formula,
- int shader_time_index,
- unsigned *final_assembly_size,
- char **error_str);
void brw_vs_debug_recompile(struct brw_context *brw,
struct gl_shader_program *prog,
const struct brw_vs_prog_key *key);
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index 7e4bf35d089..53a642ee8bb 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -61,26 +61,6 @@
extern "C" {
#endif
-struct nir_shader;
-
-/**
- * Compile a fragment shader.
- *
- * Returns the final assembly and the program's size.
- */
-const unsigned *brw_compile_fs(const struct brw_compiler *compiler,
- void *log_data,
- void *mem_ctx,
- const struct brw_wm_prog_key *key,
- struct brw_wm_prog_data *prog_data,
- const struct nir_shader *shader,
- struct gl_program *prog,
- int shader_time_index8,
- int shader_time_index16,
- bool use_rep_send,
- unsigned *final_assembly_size,
- char **error_str);
-
GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog);
struct gl_shader *brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type);