diff options
Diffstat (limited to 'src/mesa')
88 files changed, 3638 insertions, 1481 deletions
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 34fb4461985..de0e330b7d1 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -345,7 +345,6 @@ TNL_FILES = \ tnl/tnl.h \ tnl/t_pipeline.c \ tnl/t_pipeline.h \ - tnl/t_rasterpos.c \ tnl/t_vb_cliptmp.h \ tnl/t_vb_fog.c \ tnl/t_vb_light.c \ @@ -424,6 +423,8 @@ STATETRACKER_FILES = \ state_tracker/st_cb_clear.h \ state_tracker/st_cb_condrender.c \ state_tracker/st_cb_condrender.h \ + state_tracker/st_cb_copyimage.c \ + state_tracker/st_cb_copyimage.h \ state_tracker/st_cb_drawpixels.c \ state_tracker/st_cb_drawpixels.h \ state_tracker/st_cb_drawpixels_shader.c \ diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 3d1fccb3ab4..752aaf6c006 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -33,6 +33,7 @@ #include "main/mipmap.h" #include "main/queryobj.h" #include "main/readpix.h" +#include "main/rastpos.h" #include "main/renderbuffer.h" #include "main/shaderobj.h" #include "main/texcompress.h" @@ -81,7 +82,7 @@ _mesa_init_driver_functions(struct dd_function_table *driver) /* framebuffer/image functions */ driver->Clear = _swrast_Clear; - driver->RasterPos = _tnl_RasterPos; + driver->RasterPos = _mesa_RasterPos; driver->DrawPixels = _swrast_DrawPixels; driver->ReadPixels = _mesa_readpixels; driver->CopyPixels = _swrast_CopyPixels; diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index 04b3f9cc8ce..9d003e48bd8 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -59,6 +59,7 @@ TESTS = \ test_fs_saturate_propagation \ test_eu_compact \ test_vf_float_conversions \ + test_vec4_cmod_propagation \ test_vec4_copy_propagation \ test_vec4_register_coalesce @@ -94,6 +95,12 @@ test_vec4_copy_propagation_LDADD = \ $(top_builddir)/src/gtest/libgtest.la \ $(TEST_LIBS) +test_vec4_cmod_propagation_SOURCES = \ + test_vec4_cmod_propagation.cpp +test_vec4_cmod_propagation_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + $(TEST_LIBS) + test_eu_compact_SOURCES = \ test_eu_compact.c nodist_EXTRA_test_eu_compact_SOURCES = dummy.cpp diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index ccd540dabca..ed2654ef329 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -58,6 +58,7 @@ i965_compiler_FILES = \ brw_util.c \ brw_util.h \ brw_vec4_builder.h \ + brw_vec4_cmod_propagation.cpp \ brw_vec4_copy_propagation.cpp \ brw_vec4.cpp \ brw_vec4_cse.cpp \ diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp index 10bcd4bafd4..5d46615bc7b 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.cpp +++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp @@ -528,7 +528,9 @@ cfg_t::dump_domtree() { printf("digraph DominanceTree {\n"); foreach_block(block, this) { - printf("\t%d -> %d\n", block->idom->num, block->num); + if (block->idom) { + printf("\t%d -> %d\n", block->idom->num, block->num); + } } printf("}\n"); } diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h index a06b0aa1cd0..69e39e8964d 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.h +++ b/src/mesa/drivers/dri/i965/brw_cfg.h @@ -90,6 +90,8 @@ struct bblock_t { struct exec_list parents; struct exec_list children; int num; + + unsigned cycle_count; }; static inline struct backend_instruction * @@ -285,6 +287,8 @@ struct cfg_t { int num_blocks; bool idom_dirty; + + unsigned cycle_count; }; /* Note that this is implemented with a double for loop -- break will diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index d9967143d8a..e5133ef5a3d 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -338,6 +338,7 @@ struct brw_wm_prog_data { } binding_table; uint8_t computed_depth_mode; + bool computed_stencil; bool early_fragment_tests; bool no_8; @@ -443,9 +444,7 @@ struct brw_vue_map { * directly correspond to a gl_varying_slot, the value comes from * brw_varying_slot. * - * For slots that are not in use, the value is BRW_VARYING_SLOT_COUNT (this - * simplifies code that uses the value stored in slot_to_varying to - * create a bit mask). + * For slots that are not in use, the value is BRW_VARYING_SLOT_PAD. */ signed char slot_to_varying[BRW_VARYING_SLOT_COUNT]; @@ -467,8 +466,8 @@ static inline GLuint brw_vue_slot_to_offset(GLuint slot) * Convert a vertex output (brw_varying_slot) into a byte offset within the * VUE. */ -static inline GLuint brw_varying_to_offset(struct brw_vue_map *vue_map, - GLuint varying) +static inline +GLuint brw_varying_to_offset(const struct brw_vue_map *vue_map, GLuint varying) { return brw_vue_slot_to_offset(vue_map->varying_to_slot[varying]); } diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 4f503ae4869..c83f47bdff7 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -501,8 +501,6 @@ struct brw_cache_item { }; -typedef void (*cache_aux_free_func)(const void *aux); - struct brw_cache { struct brw_context *brw; @@ -512,9 +510,6 @@ struct brw_cache { uint32_t next_offset; bool bo_used_by_gpu; - - /** Optional functions for freeing other pointers attached to a prog_data. */ - cache_aux_free_func aux_free[BRW_MAX_CACHE]; }; @@ -1177,7 +1172,7 @@ struct brw_context int num_atoms[BRW_NUM_PIPELINES]; const struct brw_tracked_state render_atoms[60]; - const struct brw_tracked_state compute_atoms[8]; + const struct brw_tracked_state compute_atoms[9]; /* If (INTEL_DEBUG & DEBUG_BATCH) */ struct { @@ -1463,7 +1458,7 @@ void brw_upload_ubo_surfaces(struct brw_context *brw, struct brw_stage_prog_data *prog_data, bool dword_pitch); void brw_upload_abo_surfaces(struct brw_context *brw, - struct gl_shader_program *prog, + struct gl_shader *shader, struct brw_stage_state *stage_state, struct brw_stage_prog_data *prog_data); void brw_upload_image_surfaces(struct brw_context *brw, @@ -1680,6 +1675,7 @@ struct opcode_desc { extern const struct opcode_desc opcode_descs[128]; extern const char * const conditional_modifier[16]; +extern const char *const pred_ctrl_align16[16]; void brw_emit_depthbuffer(struct brw_context *brw); diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 169d092f90e..754da9fc3da 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -913,20 +913,15 @@ enum opcode { /** * Same as FS_OPCODE_FB_WRITE but expects its arguments separately as - * individual sources instead of as a single payload blob: - * - * Source 0: [required] Color 0. - * Source 1: [optional] Color 1 (for dual source blend messages). - * Source 2: [optional] Src0 Alpha. - * Source 3: [optional] Source Depth (gl_FragDepth) - * Source 4: [optional (gen4-5)] Destination Depth passthrough from thread - * Source 5: [optional] Sample Mask (gl_SampleMask). - * Source 6: [required] Number of color components (as a UD immediate). + * individual sources instead of as a single payload blob. The + * position/ordering of the arguments are defined by the enum + * fb_write_logical_srcs. */ FS_OPCODE_FB_WRITE_LOGICAL, FS_OPCODE_BLORP_FB_WRITE, FS_OPCODE_REP_FB_WRITE, + FS_OPCODE_PACK_STENCIL_REF, SHADER_OPCODE_RCP, SHADER_OPCODE_RSQ, SHADER_OPCODE_SQRT, @@ -1332,6 +1327,17 @@ enum brw_urb_write_flags { BRW_URB_WRITE_ALLOCATE | BRW_URB_WRITE_COMPLETE, }; +enum fb_write_logical_srcs { + FB_WRITE_LOGICAL_SRC_COLOR0, /* REQUIRED */ + FB_WRITE_LOGICAL_SRC_COLOR1, /* for dual source blend messages */ + FB_WRITE_LOGICAL_SRC_SRC0_ALPHA, + FB_WRITE_LOGICAL_SRC_SRC_DEPTH, /* gl_FragDepth */ + FB_WRITE_LOGICAL_SRC_DST_DEPTH, /* GEN4-5: passthrough from thread */ + FB_WRITE_LOGICAL_SRC_SRC_STENCIL, /* gl_FragStencilRefARB */ + FB_WRITE_LOGICAL_SRC_OMASK, /* Sample Mask (gl_SampleMask) */ + FB_WRITE_LOGICAL_SRC_COMPONENTS, /* REQUIRED */ +}; + #ifdef __cplusplus /** * Allow brw_urb_write_flags enums to be ORed together. diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c b/src/mesa/drivers/dri/i965/brw_device_info.c index 65172490da3..6372fb5c55f 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.c +++ b/src/mesa/drivers/dri/i965/brw_device_info.c @@ -311,7 +311,7 @@ static const struct brw_device_info brw_device_info_chv = { .max_gs_threads = 336, \ .max_hs_threads = 336, \ .max_ds_threads = 336, \ - .max_wm_threads = 64 * 6, \ + .max_wm_threads = 64 * 9, \ .max_cs_threads = 56, \ .urb = { \ .size = 384, \ @@ -335,6 +335,10 @@ static const struct brw_device_info brw_device_info_skl_gt3 = { GEN9_FEATURES, .gt = 3, }; +static const struct brw_device_info brw_device_info_skl_gt4 = { + GEN9_FEATURES, .gt = 4, +}; + static const struct brw_device_info brw_device_info_bxt = { GEN9_FEATURES, .is_broxton = 1, @@ -359,7 +363,7 @@ static const struct brw_device_info brw_device_info_bxt = { }; const struct brw_device_info * -brw_get_device_info(int devid, int revision) +brw_get_device_info(int devid) { const struct brw_device_info *devinfo; switch (devid) { diff --git a/src/mesa/drivers/dri/i965/brw_device_info.h b/src/mesa/drivers/dri/i965/brw_device_info.h index 7bab5716b43..6f4a250e874 100644 --- a/src/mesa/drivers/dri/i965/brw_device_info.h +++ b/src/mesa/drivers/dri/i965/brw_device_info.h @@ -86,5 +86,5 @@ struct brw_device_info /** @} */ }; -const struct brw_device_info *brw_get_device_info(int devid, int revision); +const struct brw_device_info *brw_get_device_info(int devid); const char *brw_get_device_name(int devid); diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index db23a187a93..df747107188 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -252,7 +252,7 @@ static const char *const pred_inv[2] = { [1] = "-" }; -static const char *const pred_ctrl_align16[16] = { +const char *const pred_ctrl_align16[16] = { [1] = "", [2] = ".x", [3] = ".y", @@ -726,7 +726,7 @@ reg(FILE *file, unsigned _reg_file, unsigned _reg_nr) switch (_reg_nr & 0xf0) { case BRW_ARF_NULL: string(file, "null"); - return -1; + break; case BRW_ARF_ADDRESS: format(file, "a%d", _reg_nr & 0x0f); break; @@ -908,7 +908,6 @@ src_ia1(FILE *file, unsigned _addr_subreg_nr, unsigned _negate, unsigned __abs, - unsigned _addr_mode, unsigned _horiz_stride, unsigned _width, unsigned _vert_stride) { int err = 0; @@ -1143,7 +1142,6 @@ src0(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst) brw_inst_src0_ia_subreg_nr(devinfo, inst), brw_inst_src0_negate(devinfo, inst), brw_inst_src0_abs(devinfo, inst), - brw_inst_src0_address_mode(devinfo, inst), brw_inst_src0_hstride(devinfo, inst), brw_inst_src0_width(devinfo, inst), brw_inst_src0_vstride(devinfo, inst)); @@ -1200,7 +1198,6 @@ src1(FILE *file, const struct brw_device_info *devinfo, brw_inst *inst) brw_inst_src1_ia_subreg_nr(devinfo, inst), brw_inst_src1_negate(devinfo, inst), brw_inst_src1_abs(devinfo, inst), - brw_inst_src1_address_mode(devinfo, inst), brw_inst_src1_hstride(devinfo, inst), brw_inst_src1_width(devinfo, inst), brw_inst_src1_vstride(devinfo, inst)); diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index 1f4a3516fa2..40ec87d38f0 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -261,7 +261,7 @@ void brw_disassemble(const struct brw_device_info *devinfo, void *assembly, int start, int end, FILE *out) { - bool dump_hex = false; + bool dump_hex = (INTEL_DEBUG & DEBUG_HEX) != 0; for (int offset = start; offset < end;) { brw_inst *insn = assembly + offset; diff --git a/src/mesa/drivers/dri/i965/brw_eu_compact.c b/src/mesa/drivers/dri/i965/brw_eu_compact.c index f787ea3d4f8..07ace6bfbcb 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_compact.c +++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c @@ -1407,6 +1407,9 @@ void brw_compact_instructions(struct brw_codegen *p, int start_offset, int num_annotations, struct annotation *annotation) { + if (unlikely(INTEL_DEBUG & DEBUG_NO_COMPACTION)) + return; + const struct brw_device_info *devinfo = p->devinfo; void *store = p->store + start_offset / 16; /* For an instruction at byte offset 16*i before compaction, this is the diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index bf2fee9ed48..a6fbb542919 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -410,7 +410,7 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.dw1.bits.indirect_offset); } else { - brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.dw1.bits.indirect_offset); + brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.dw1.bits.indirect_offset); } } @@ -2511,12 +2511,20 @@ brw_send_indirect_message(struct brw_codegen *p, struct brw_reg desc) { const struct brw_device_info *devinfo = p->devinfo; - struct brw_inst *send, *setup; + struct brw_inst *send; + int setup; assert(desc.type == BRW_REGISTER_TYPE_UD); + /* We hold on to the setup instruction (the SEND in the direct case, the OR + * in the indirect case) by its index in the instruction store. The + * pointer returned by next_insn() may become invalid if emitting the SEND + * in the indirect case reallocs the store. + */ + if (desc.file == BRW_IMMEDIATE_VALUE) { - setup = send = next_insn(p, BRW_OPCODE_SEND); + setup = p->nr_insn; + send = next_insn(p, BRW_OPCODE_SEND); brw_set_src1(p, send, desc); } else { @@ -2531,7 +2539,8 @@ brw_send_indirect_message(struct brw_codegen *p, * caller can specify additional descriptor bits with the usual * brw_set_*_message() helper functions. */ - setup = brw_OR(p, addr, desc, brw_imm_ud(0)); + setup = p->nr_insn; + brw_OR(p, addr, desc, brw_imm_ud(0)); brw_pop_insn_state(p); @@ -2543,7 +2552,7 @@ brw_send_indirect_message(struct brw_codegen *p, brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); brw_inst_set_sfid(devinfo, send, sfid); - return setup; + return &p->store[setup]; } static struct brw_inst * @@ -2906,11 +2915,10 @@ brw_untyped_surface_read(struct brw_codegen *p, const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? HSW_SFID_DATAPORT_DATA_CACHE_1 : GEN7_SFID_DATAPORT_DATA_CACHE); - const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1); struct brw_inst *insn = brw_send_indirect_surface_message( p, sfid, dst, payload, surface, msg_length, brw_surface_payload_size(p, num_channels, true, true), - align1); + false); brw_set_dp_untyped_surface_read_message( p, insn, num_channels); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 8320cd77299..e218a85a363 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -88,8 +88,6 @@ fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, case IMM: case UNIFORM: unreachable("Invalid destination register file"); - default: - unreachable("Invalid register file"); } this->writes_accumulator = false; @@ -538,18 +536,6 @@ fs_visitor::get_timestamp(const fs_builder &bld) */ bld.group(4, 0).exec_all().MOV(dst, ts); - /* The caller wants the low 32 bits of the timestamp. Since it's running - * at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds, - * which is plenty of time for our purposes. It is identical across the - * EUs, but since it's tracking GPU core speed it will increment at a - * varying rate as render P-states change. - * - * The caller could also check if render P-states have changed (or anything - * else that might disrupt timing) by setting smear to 2 and checking if - * that field is != 0. - */ - dst.set_smear(0); - return dst; } @@ -557,6 +543,14 @@ void fs_visitor::emit_shader_time_begin() { shader_start_time = get_timestamp(bld.annotate("shader time start")); + + /* We want only the low 32 bits of the timestamp. Since it's running + * at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds, + * which is plenty of time for our purposes. It is identical across the + * EUs, but since it's tracking GPU core speed it will increment at a + * varying rate as render P-states change. + */ + shader_start_time.set_smear(0); } void @@ -570,6 +564,15 @@ fs_visitor::emit_shader_time_end() fs_reg shader_end_time = get_timestamp(ibld); + /* We only use the low 32 bits of the timestamp - see + * emit_shader_time_begin()). + * + * We could also check if render P-states have changed (or anything + * else that might disrupt timing) by setting smear to 2 and checking if + * that field is != 0. + */ + shader_end_time.set_smear(0); + /* Check that there weren't any timestamp reset events (assuming these * were the only two timestamp reads that happened). */ @@ -700,10 +703,10 @@ fs_inst::components_read(unsigned i) const return 2; case FS_OPCODE_FB_WRITE_LOGICAL: - assert(src[6].file == IMM); + assert(src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM); /* First/second FB write color. */ if (i < 2) - return src[6].fixed_hw_reg.dw1.ud; + return src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud; else return 1; @@ -841,9 +844,8 @@ fs_inst::regs_read(int arg) const REG_SIZE); case MRF: unreachable("MRF registers are not allowed as sources"); - default: - unreachable("Invalid register file"); } + return 0; } bool @@ -1283,9 +1285,9 @@ fs_visitor::emit_sampleid_setup() fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::int_type)); if (key->compute_sample_id) { - fs_reg t1 = vgrf(glsl_type::int_type); - fs_reg t2 = vgrf(glsl_type::int_type); - t2.type = BRW_REGISTER_TYPE_UW; + fs_reg t1(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_D); + t1.set_smear(0); + fs_reg t2(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_W); /* The PS will be run in MSDISPMODE_PERSAMPLE. For example with * 8x multisampling, subspan 0 will represent sample N (where N @@ -1306,13 +1308,13 @@ fs_visitor::emit_sampleid_setup() * are sample 1 of subspan 0; the third group is sample 0 of * subspan 1, and finally sample 1 of subspan 1. */ - abld.exec_all() - .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)), + abld.exec_all().group(1, 0) + .AND(t1, fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)), fs_reg(0xc0)); - abld.exec_all().SHR(t1, t1, fs_reg(5)); + abld.exec_all().group(1, 0).SHR(t1, t1, fs_reg(5)); /* This works for both SIMD8 and SIMD16 */ - abld.exec_all() + abld.exec_all().group(4, 0) .MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210)); /* This special instruction takes care of setting vstride=1, @@ -1443,6 +1445,9 @@ fs_visitor::calculate_urb_setup() } } } else { + bool include_vue_header = + nir->info.inputs_read & (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT); + /* We have enough input varyings that the SF/SBE pipeline stage can't * arbitrarily rearrange them to suit our whim; we have to put them * in an order that matches the output of the previous pipeline stage @@ -1452,15 +1457,14 @@ fs_visitor::calculate_urb_setup() brw_compute_vue_map(devinfo, &prev_stage_vue_map, key->input_slots_valid, nir->info.separate_shader); - int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET; + int first_slot = + include_vue_header ? 0 : 2 * BRW_SF_URB_ENTRY_READ_OFFSET; + assert(prev_stage_vue_map.num_slots <= first_slot + 32); for (int slot = first_slot; slot < prev_stage_vue_map.num_slots; slot++) { int varying = prev_stage_vue_map.slot_to_varying[slot]; - /* Note that varying == BRW_VARYING_SLOT_COUNT when a slot is - * unused. - */ - if (varying != BRW_VARYING_SLOT_COUNT && + if (varying != BRW_VARYING_SLOT_PAD && (nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK & BITFIELD64_BIT(varying))) { prog_data->urb_setup[varying] = slot - first_slot; @@ -2615,7 +2619,7 @@ fs_visitor::eliminate_find_live_channel() case SHADER_OPCODE_FIND_LIVE_CHANNEL: if (depth == 0) { inst->opcode = BRW_OPCODE_MOV; - inst->src[0] = fs_reg(0); + inst->src[0] = fs_reg(0u); inst->sources = 1; inst->force_writemask_all = true; progress = true; @@ -2643,8 +2647,9 @@ fs_visitor::emit_repclear_shader() fs_inst *mov; if (uniforms == 1) { - mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)), - fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F)); + mov = bld.exec_all().group(4, 0) + .MOV(brw_message_reg(color_mrf), + fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F)); } else { struct brw_reg reg = brw_reg(BRW_GENERAL_REGISTER_FILE, @@ -2653,8 +2658,8 @@ fs_visitor::emit_repclear_shader() BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); - mov = bld.exec_all().MOV(vec4(brw_message_reg(color_mrf)), - fs_reg(reg)); + mov = bld.exec_all().group(4, 0) + .MOV(vec4(brw_message_reg(color_mrf)), fs_reg(reg)); } fs_inst *write; @@ -3366,15 +3371,17 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, const brw_wm_prog_key *key, const fs_visitor::thread_payload &payload) { - assert(inst->src[6].file == IMM); + assert(inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].file == IMM); const brw_device_info *devinfo = bld.shader->devinfo; - const fs_reg &color0 = inst->src[0]; - const fs_reg &color1 = inst->src[1]; - const fs_reg &src0_alpha = inst->src[2]; - const fs_reg &src_depth = inst->src[3]; - const fs_reg &dst_depth = inst->src[4]; - fs_reg sample_mask = inst->src[5]; - const unsigned components = inst->src[6].fixed_hw_reg.dw1.ud; + const fs_reg &color0 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR0]; + const fs_reg &color1 = inst->src[FB_WRITE_LOGICAL_SRC_COLOR1]; + const fs_reg &src0_alpha = inst->src[FB_WRITE_LOGICAL_SRC_SRC0_ALPHA]; + const fs_reg &src_depth = inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH]; + const fs_reg &dst_depth = inst->src[FB_WRITE_LOGICAL_SRC_DST_DEPTH]; + const fs_reg &src_stencil = inst->src[FB_WRITE_LOGICAL_SRC_SRC_STENCIL]; + fs_reg sample_mask = inst->src[FB_WRITE_LOGICAL_SRC_OMASK]; + const unsigned components = + inst->src[FB_WRITE_LOGICAL_SRC_COMPONENTS].fixed_hw_reg.dw1.ud; /* We can potentially have a message length of up to 15, so we have to set * base_mrf to either 0 or 1 in order to fit in m0..m15. @@ -3464,6 +3471,17 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, length++; } + if (src_stencil.file != BAD_FILE) { + assert(devinfo->gen >= 9); + assert(bld.dispatch_width() != 16); + + sources[length] = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.exec_all().annotate("FB write OS") + .emit(FS_OPCODE_PACK_STENCIL_REF, sources[length], + retype(src_stencil, BRW_REGISTER_TYPE_UB)); + length++; + } + fs_inst *load; if (devinfo->gen >= 7) { /* Send from the GRF */ @@ -4073,7 +4091,7 @@ fs_visitor::lower_logical_sends() case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: lower_surface_logical_send(ibld, inst, SHADER_OPCODE_UNTYPED_SURFACE_READ, - fs_reg(0xffff)); + fs_reg()); break; case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: @@ -4202,10 +4220,12 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, /* Gen6 doesn't support SIMD16 depth writes but we cannot handle them * here. */ - assert(devinfo->gen != 6 || inst->src[3].file == BAD_FILE || + assert(devinfo->gen != 6 || + inst->src[FB_WRITE_LOGICAL_SRC_SRC_DEPTH].file == BAD_FILE || inst->exec_size == 8); /* Dual-source FB writes are unsupported in SIMD16 mode. */ - return (inst->src[1].file != BAD_FILE ? 8 : inst->exec_size); + return (inst->src[FB_WRITE_LOGICAL_SRC_COLOR1].file != BAD_FILE ? + 8 : inst->exec_size); case SHADER_OPCODE_TXD_LOGICAL: /* TXD is unsupported in SIMD16 mode. */ @@ -4499,9 +4519,8 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) if (inst->dst.fixed_hw_reg.subnr) fprintf(file, "+%d", inst->dst.fixed_hw_reg.subnr); break; - default: - fprintf(file, "???"); - break; + case IMM: + unreachable("not reached"); } fprintf(file, ":%s, ", brw_reg_type_letters(inst->dst.type)); @@ -4594,9 +4613,6 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) if (inst->src[i].fixed_hw_reg.abs) fprintf(file, "|"); break; - default: - fprintf(file, "???"); - break; } if (inst->src[i].abs) fprintf(file, "|"); @@ -4977,8 +4993,7 @@ fs_visitor::allocate_registers() if (failed) return; - if (!allocated_without_spills) - schedule_instructions(SCHEDULE_POST); + schedule_instructions(SCHEDULE_POST); if (last_scratch > 0) prog_data->total_scratch = brw_get_scratch_size(last_scratch); @@ -5236,6 +5251,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, prog_data->uses_omask = shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); prog_data->computed_depth_mode = computed_depth_mode(shader); + prog_data->computed_stencil = + shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL); prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 50e98becf03..8058b344b7a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -145,6 +145,8 @@ public: void assign_vs_urb_setup(); bool assign_regs(bool allow_spilling); void assign_regs_trivial(); + void calculate_payload_ranges(int payload_node_count, + int *payload_last_use_ip); void setup_payload_interference(struct ra_graph *g, int payload_reg_count, int first_payload_node); int choose_spill_reg(struct ra_graph *g); @@ -337,6 +339,7 @@ public: int *push_constant_loc; fs_reg frag_depth; + fs_reg frag_stencil; fs_reg sample_mask; fs_reg outputs[VARYING_SLOT_MAX]; unsigned output_components[VARYING_SLOT_MAX]; @@ -427,6 +430,8 @@ private: void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload); void generate_urb_write(fs_inst *inst, struct brw_reg payload); void generate_cs_terminate(fs_inst *inst, struct brw_reg payload); + void generate_stencil_ref_packing(fs_inst *inst, struct brw_reg dst, + struct brw_reg src); void generate_barrier(fs_inst *inst, struct brw_reg src); void generate_blorp_fb_write(fs_inst *inst); void generate_linterp(fs_inst *inst, struct brw_reg dst, diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h index df10a9de293..f121f3463d3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h @@ -390,14 +390,21 @@ namespace brw { src_reg emit_uniformize(const src_reg &src) const { + /* FIXME: We use a vector chan_index and dst to allow constant and + * copy propagration to move result all the way into the consuming + * instruction (typically a surface index or sampler index for a + * send). This uses 1 or 3 extra hw registers in 16 or 32 wide + * dispatch. Once we teach const/copy propagation about scalars we + * should go back to scalar destinations here. + */ const fs_builder ubld = exec_all(); - const dst_reg chan_index = component(vgrf(BRW_REGISTER_TYPE_UD), 0); - const dst_reg dst = component(vgrf(src.type), 0); + const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD); + const dst_reg dst = vgrf(src.type); ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index); - ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index); + ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0)); - return src_reg(dst); + return src_reg(component(dst, 0)); } /** diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 5589716239a..26204827156 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -416,9 +416,10 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) inst->src[arg].subreg_offset = offset % 32; } break; - default: - unreachable("Invalid register file"); - break; + + case MRF: + case IMM: + unreachable("not reached"); } if (has_source_modifiers) { @@ -612,6 +613,21 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) } break; + case SHADER_OPCODE_UNTYPED_ATOMIC: + case SHADER_OPCODE_UNTYPED_SURFACE_READ: + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: + case SHADER_OPCODE_TYPED_ATOMIC: + case SHADER_OPCODE_TYPED_SURFACE_READ: + case SHADER_OPCODE_TYPED_SURFACE_WRITE: + /* We only propagate into the surface argument of the + * instruction. Everything else goes through LOAD_PAYLOAD. + */ + if (i == 1) { + inst->src[i] = val; + progress = true; + } + break; + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: case SHADER_OPCODE_BROADCAST: inst->src[i] = val; diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index c7628dcc2f4..3a28c8d591d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -93,7 +93,8 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case SHADER_OPCODE_LOAD_PAYLOAD: return !inst->is_copy_payload(v->alloc); default: - return inst->is_send_from_grf() && !inst->has_side_effects(); + return inst->is_send_from_grf() && !inst->has_side_effects() && + !inst->is_volatile(); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index bb7e792044f..e207a77fdc1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -42,9 +42,13 @@ static uint32_t brw_file_from_reg(fs_reg *reg) return BRW_MESSAGE_REGISTER_FILE; case IMM: return BRW_IMMEDIATE_VALUE; - default: + case BAD_FILE: + case HW_REG: + case ATTR: + case UNIFORM: unreachable("not reached"); } + return 0; } static struct brw_reg @@ -116,7 +120,8 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg, unsigned gen) /* Probably unused. */ brw_reg = brw_null_reg(); break; - default: + case ATTR: + case UNIFORM: unreachable("not reached"); } if (reg->abs) @@ -317,6 +322,14 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload) brw_imm_ud(inst->target)); } + /* Set computes stencil to render target */ + if (prog_data->computed_stencil) { + brw_OR(p, + vec1(retype(payload, BRW_REGISTER_TYPE_UD)), + vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)), + brw_imm_ud(0x1 << 14)); + } + implied_header = brw_null_reg(); } else { implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); @@ -437,6 +450,47 @@ fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload) } void +fs_generator::generate_stencil_ref_packing(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src) +{ + assert(dispatch_width == 8); + assert(devinfo->gen >= 9); + + /* Stencil value updates are provided in 8 slots of 1 byte per slot. + * Presumably, in order to save memory bandwidth, the stencil reference + * values written from the FS need to be packed into 2 dwords (this makes + * sense because the stencil values are limited to 1 byte each and a SIMD8 + * send, so stencil slots 0-3 in dw0, and 4-7 in dw1.) + * + * The spec is confusing here because in the payload definition of MDP_RTW_S8 + * (Message Data Payload for Render Target Writes with Stencil 8b) the + * stencil value seems to be dw4.0-dw4.7. However, if you look at the type of + * dw4 it is type MDPR_STENCIL (Message Data Payload Register) which is the + * packed values specified above and diagrammed below: + * + * 31 0 + * -------------------------------- + * DW | | + * 2-7 | IGNORED | + * | | + * -------------------------------- + * DW1 | STC | STC | STC | STC | + * | slot7 | slot6 | slot5 | slot4| + * -------------------------------- + * DW0 | STC | STC | STC | STC | + * | slot3 | slot2 | slot1 | slot0| + * -------------------------------- + */ + + src.vstride = BRW_VERTICAL_STRIDE_4; + src.width = BRW_WIDTH_1; + src.hstride = BRW_HORIZONTAL_STRIDE_0; + assert(src.type == BRW_REGISTER_TYPE_UB); + brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UB), src); +} + +void fs_generator::generate_barrier(fs_inst *inst, struct brw_reg src) { brw_barrier(p, src); @@ -1455,18 +1509,18 @@ fs_generator::generate_set_sample_id(fs_inst *inst, assert(src0.type == BRW_REGISTER_TYPE_D || src0.type == BRW_REGISTER_TYPE_UD); - brw_push_insn_state(p); - brw_set_default_exec_size(p, BRW_EXECUTE_8); - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - struct brw_reg reg = retype(stride(src1, 1, 4, 0), BRW_REGISTER_TYPE_UW); - if (dispatch_width == 8) { + struct brw_reg reg = stride(src1, 1, 4, 0); + if (devinfo->gen >= 8 || dispatch_width == 8) { brw_ADD(p, dst, src0, reg); } else if (dispatch_width == 16) { + brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_8); + brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_ADD(p, firsthalf(dst), firsthalf(src0), reg); + brw_set_default_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_ADD(p, sechalf(dst), sechalf(src0), suboffset(reg, 2)); + brw_pop_insn_state(p); } - brw_pop_insn_state(p); } void @@ -2182,6 +2236,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) generate_barrier(inst, src[0]); break; + case FS_OPCODE_PACK_STENCIL_REF: + generate_stencil_ref_packing(inst, dst, src[0]); + break; + default: unreachable("Unsupported opcode"); @@ -2216,9 +2274,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) if (unlikely(debug_flag)) { fprintf(stderr, "Native code for %s\n" - "SIMD%d shader: %d instructions. %d loops. %d:%d spills:fills. Promoted %u constants. Compacted %d to %d" + "SIMD%d shader: %d instructions. %d loops. %u cycles. %d:%d spills:fills. Promoted %u constants. Compacted %d to %d" " bytes (%.0f%%)\n", - shader_name, dispatch_width, before_size / 16, loop_count, + shader_name, dispatch_width, before_size / 16, loop_count, cfg->cycle_count, spill_count, fill_count, promoted_constants, before_size, after_size, 100.0f * (before_size - after_size) / before_size); @@ -2228,12 +2286,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) } compiler->shader_debug_log(log_data, - "%s SIMD%d shader: %d inst, %d loops, " + "%s SIMD%d shader: %d inst, %d loops, %u cycles, " "%d:%d spills:fills, Promoted %u constants, " "compacted %d to %d bytes.\n", stage_abbrev, dispatch_width, before_size / 16, - loop_count, spill_count, fill_count, - promoted_constants, before_size, after_size); + loop_count, cfg->cycle_count, spill_count, + fill_count, promoted_constants, before_size, + after_size); return start_offset; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 7b5a0482519..486741bea31 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -71,6 +71,14 @@ fs_visitor::nir_setup_inputs() var->data.origin_upper_left); emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(), input, reg), 0xF); + } else if (var->data.location == VARYING_SLOT_LAYER) { + struct brw_reg reg = suboffset(interp_reg(VARYING_SLOT_LAYER, 1), 3); + reg.type = BRW_REGISTER_TYPE_D; + bld.emit(FS_OPCODE_CINTERP, retype(input, BRW_REGISTER_TYPE_D), reg); + } else if (var->data.location == VARYING_SLOT_VIEWPORT) { + struct brw_reg reg = suboffset(interp_reg(VARYING_SLOT_VIEWPORT, 2), 3); + reg.type = BRW_REGISTER_TYPE_D; + bld.emit(FS_OPCODE_CINTERP, retype(input, BRW_REGISTER_TYPE_D), reg); } else { emit_general_interpolation(input, var->name, var->type, (glsl_interp_qualifier) var->data.interpolation, @@ -114,6 +122,8 @@ fs_visitor::nir_setup_outputs() } } else if (var->data.location == FRAG_RESULT_DEPTH) { this->frag_depth = reg; + } else if (var->data.location == FRAG_RESULT_STENCIL) { + this->frag_stencil = reg; } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) { this->sample_mask = reg; } else { @@ -896,12 +906,11 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then * subtract the result from 31 to convert the MSB count into an LSB count. */ - bld.CMP(bld.null_reg_d(), result, fs_reg(-1), BRW_CONDITIONAL_NZ); - fs_reg neg_result(result); - neg_result.negate = true; - inst = bld.ADD(result, neg_result, fs_reg(31)); + + inst = bld.ADD(result, result, fs_reg(31)); inst->predicate = BRW_PREDICATE_NORMAL; + inst->src[0].negate = true; break; } @@ -1322,6 +1331,15 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } + case nir_intrinsic_shader_clock: { + /* We cannot do anything if there is an event, so ignore it for now */ + fs_reg shader_clock = get_timestamp(bld); + const fs_reg srcs[] = { shader_clock.set_smear(0), shader_clock.set_smear(1) }; + + bld.LOAD_PAYLOAD(dest, srcs, ARRAY_SIZE(srcs), 0); + break; + } + case nir_intrinsic_image_size: { /* Get the referenced image variable and type. */ const nir_variable *var = instr->variables[0]->var; @@ -1509,7 +1527,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr surf_index = vgrf(glsl_type::uint_type); bld.ADD(surf_index, get_nir_src(instr->src[0]), fs_reg(stage_prog_data->binding_table.ssbo_start)); - surf_index = bld.emit_uniformize(surf_index); /* Assume this may touch any UBO. It would be nice to provide * a tighter bound, but the array information is already lowered away. @@ -1520,34 +1537,21 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr } /* Get the offset to read from */ - fs_reg offset_reg = vgrf(glsl_type::uint_type); - unsigned const_offset_bytes = 0; + fs_reg offset_reg; if (has_indirect) { - bld.MOV(offset_reg, get_nir_src(instr->src[1])); + offset_reg = get_nir_src(instr->src[1]); } else { - const_offset_bytes = instr->const_index[0]; - bld.MOV(offset_reg, fs_reg(const_offset_bytes)); + offset_reg = fs_reg(instr->const_index[0]); } /* Read the vector */ - for (int i = 0; i < instr->num_components; i++) { - fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, - 1 /* dims */, 1 /* size */, - BRW_PREDICATE_NONE); - read_result.type = dest.type; - bld.MOV(dest, read_result); - dest = offset(dest, bld, 1); - - /* Vector components are stored contiguous in memory */ - if (i < instr->num_components) { - if (!has_indirect) { - const_offset_bytes += 4; - bld.MOV(offset_reg, fs_reg(const_offset_bytes)); - } else { - bld.ADD(offset_reg, offset_reg, brw_imm_ud(4)); - } - } - } + fs_reg read_result = emit_untyped_read(bld, surf_index, offset_reg, + 1 /* dims */, + instr->num_components, + BRW_PREDICATE_NONE); + read_result.type = dest.type; + for (int i = 0; i < instr->num_components; i++) + bld.MOV(offset(dest, bld, i), offset(read_result, bld, i)); break; } @@ -1765,52 +1769,46 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr surf_index = vgrf(glsl_type::uint_type); bld.ADD(surf_index, get_nir_src(instr->src[1]), fs_reg(stage_prog_data->binding_table.ssbo_start)); - surf_index = bld.emit_uniformize(surf_index); brw_mark_surface_used(prog_data, stage_prog_data->binding_table.ssbo_start + nir->info.num_ssbos - 1); } - /* Offset */ - fs_reg offset_reg = vgrf(glsl_type::uint_type); - unsigned const_offset_bytes = 0; - if (has_indirect) { - bld.MOV(offset_reg, get_nir_src(instr->src[2])); - } else { - const_offset_bytes = instr->const_index[0]; - bld.MOV(offset_reg, fs_reg(const_offset_bytes)); - } - /* Value */ fs_reg val_reg = get_nir_src(instr->src[0]); /* Writemask */ unsigned writemask = instr->const_index[1]; - /* Write each component present in the writemask */ - unsigned skipped_channels = 0; - for (int i = 0; i < instr->num_components; i++) { - int component_mask = 1 << i; - if (writemask & component_mask) { - if (skipped_channels) { - if (!has_indirect) { - const_offset_bytes += 4 * skipped_channels; - bld.MOV(offset_reg, fs_reg(const_offset_bytes)); - } else { - bld.ADD(offset_reg, offset_reg, - brw_imm_ud(4 * skipped_channels)); - } - skipped_channels = 0; - } + /* Combine groups of consecutive enabled channels in one write + * message. We use ffs to find the first enabled channel and then ffs on + * the bit-inverse, down-shifted writemask to determine the length of + * the block of enabled bits. + */ + while (writemask) { + unsigned first_component = ffs(writemask) - 1; + unsigned length = ffs(~(writemask >> first_component)) - 1; + fs_reg offset_reg; - emit_untyped_write(bld, surf_index, offset_reg, - offset(val_reg, bld, i), - 1 /* dims */, 1 /* size */, - BRW_PREDICATE_NONE); + if (!has_indirect) { + offset_reg = fs_reg(instr->const_index[0] + 4 * first_component); + } else { + offset_reg = vgrf(glsl_type::uint_type); + bld.ADD(offset_reg, + retype(get_nir_src(instr->src[2]), BRW_REGISTER_TYPE_UD), + fs_reg(4 * first_component)); } - skipped_channels++; + emit_untyped_write(bld, surf_index, offset_reg, + offset(val_reg, bld, first_component), + 1 /* dims */, length, + BRW_PREDICATE_NONE); + + /* Clear the bits in the writemask that we just wrote, then try + * again to see if more channels are left. + */ + writemask &= (15 << (first_component + length)); } break; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 36388fad98d..9251d9552a5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -330,32 +330,12 @@ count_to_loop_end(const bblock_t *block) unreachable("not reached"); } -/** - * Sets up interference between thread payload registers and the virtual GRFs - * to be allocated for program temporaries. - * - * We want to be able to reallocate the payload for our virtual GRFs, notably - * because the setup coefficients for a full set of 16 FS inputs takes up 8 of - * our 128 registers. - * - * The layout of the payload registers is: - * - * 0..payload.num_regs-1: fixed function setup (including bary coordinates). - * payload.num_regs..payload.num_regs+curb_read_lengh-1: uniform data - * payload.num_regs+curb_read_lengh..first_non_payload_grf-1: setup coefficients. - * - * And we have payload_node_count nodes covering these registers in order - * (note that in SIMD16, a node is two registers). - */ -void -fs_visitor::setup_payload_interference(struct ra_graph *g, - int payload_node_count, - int first_payload_node) +void fs_visitor::calculate_payload_ranges(int payload_node_count, + int *payload_last_use_ip) { int loop_depth = 0; int loop_end_ip = 0; - int payload_last_use_ip[payload_node_count]; for (int i = 0; i < payload_node_count; i++) payload_last_use_ip[i] = -1; @@ -426,6 +406,33 @@ fs_visitor::setup_payload_interference(struct ra_graph *g, ip++; } +} + + +/** + * Sets up interference between thread payload registers and the virtual GRFs + * to be allocated for program temporaries. + * + * We want to be able to reallocate the payload for our virtual GRFs, notably + * because the setup coefficients for a full set of 16 FS inputs takes up 8 of + * our 128 registers. + * + * The layout of the payload registers is: + * + * 0..payload.num_regs-1: fixed function setup (including bary coordinates). + * payload.num_regs..payload.num_regs+curb_read_lengh-1: uniform data + * payload.num_regs+curb_read_lengh..first_non_payload_grf-1: setup coefficients. + * + * And we have payload_node_count nodes covering these registers in order + * (note that in SIMD16, a node is two registers). + */ +void +fs_visitor::setup_payload_interference(struct ra_graph *g, + int payload_node_count, + int first_payload_node) +{ + int payload_last_use_ip[payload_node_count]; + calculate_payload_ranges(payload_node_count, payload_last_use_ip); for (int i = 0; i < payload_node_count; i++) { if (payload_last_use_ip[i] == -1) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 7cc4f3c927a..5c57944ca39 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -697,7 +697,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, const fs_reg dst_depth = (payload.dest_depth_reg ? fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0)) : fs_reg()); - fs_reg src_depth; + fs_reg src_depth, src_stencil; if (source_depth_to_render_target) { if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) @@ -706,10 +706,14 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, src_depth = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)); } + if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) + src_stencil = frag_stencil; + const fs_reg sources[] = { - color0, color1, src0_alpha, src_depth, dst_depth, sample_mask, - fs_reg(components) + color0, color1, src0_alpha, src_depth, dst_depth, src_stencil, + sample_mask, fs_reg(components) }; + assert(ARRAY_SIZE(sources) - 1 == FB_WRITE_LOGICAL_SRC_COMPONENTS); fs_inst *write = bld.emit(FS_OPCODE_FB_WRITE_LOGICAL, fs_reg(), sources, ARRAY_SIZE(sources)); @@ -740,6 +744,16 @@ fs_visitor::emit_fb_writes() no16("Missing support for simd16 depth writes on gen6\n"); } + if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { + /* From the 'Render Target Write message' section of the docs: + * "Output Stencil is not supported with SIMD16 Render Target Write + * Messages." + * + * FINISHME: split 16 into 2 8s + */ + no16("FINISHME: support 2 simd8 writes for gl_FragStencilRefARB\n"); + } + if (do_dual_src) { const fs_builder abld = bld.annotate("FB dual-source write"); diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c index 00125c0f405..76ed237d88a 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c @@ -105,8 +105,8 @@ brw_upload_gs_abo_surfaces(struct brw_context *brw) if (prog) { /* BRW_NEW_GS_PROG_DATA */ - brw_upload_abo_surfaces(brw, prog, &brw->gs.base, - &brw->gs.prog_data->base.base); + brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY], + &brw->gs.base, &brw->gs.prog_data->base.base); } } diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index 7726e4b78a0..4417555f18e 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -97,7 +97,9 @@ byte_offset(fs_reg reg, unsigned delta) case MRF: reg.reg += delta / 32; break; - default: + case IMM: + case HW_REG: + case UNIFORM: assert(delta == 0); } reg.subreg_offset += delta % 32; @@ -119,7 +121,7 @@ horiz_offset(fs_reg reg, unsigned delta) case MRF: case ATTR: return byte_offset(reg, delta * reg.stride * type_sz(reg.type)); - default: + case HW_REG: assert(delta == 0); } return reg; @@ -163,7 +165,6 @@ half(fs_reg reg, unsigned idx) case ATTR: case HW_REG: - default: unreachable("Cannot take half of this register type"); } return reg; diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h index 1b57b65db27..29642c6d2a4 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h @@ -161,9 +161,6 @@ public: const src_reg &src1 = src_reg(), const src_reg &src2 = src_reg()); - struct brw_reg get_dst(unsigned gen); - struct brw_reg get_src(const struct brw_vue_prog_data *prog_data, int i); - dst_reg dst; src_reg src[3]; @@ -186,6 +183,27 @@ public: return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2; } + bool reads_flag(unsigned c) + { + if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2) + return true; + + switch (predicate) { + case BRW_PREDICATE_NONE: + return false; + case BRW_PREDICATE_ALIGN16_REPLICATE_X: + return c == 0; + case BRW_PREDICATE_ALIGN16_REPLICATE_Y: + return c == 1; + case BRW_PREDICATE_ALIGN16_REPLICATE_Z: + return c == 2; + case BRW_PREDICATE_ALIGN16_REPLICATE_W: + return c == 3; + default: + return true; + } + } + bool writes_flag() { return (conditional_mod && (opcode != BRW_OPCODE_SEL && diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 9a33188cb5c..8c1a34ee17a 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -205,6 +205,9 @@ brw_create_nir(struct brw_context *brw, if (shader_prog) { nir_lower_samplers(nir, shader_prog); nir_validate_shader(nir); + + nir_lower_atomics(nir, shader_prog); + nir_validate_shader(nir); } brw_postprocess_nir(nir, brw->intelScreen->devinfo, is_scalar); @@ -278,9 +281,6 @@ brw_postprocess_nir(nir_shader *nir, nir_lower_system_values(nir); nir_validate_shader(nir); - nir_lower_atomics(nir); - nir_validate_shader(nir); - nir_optimize(nir, is_scalar); if (devinfo->gen >= 6) { diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index 87e7e011541..083c46a3726 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -205,7 +205,7 @@ enum PACKED brw_reg_type { /** @} */ /** Immediates only: @{ */ - BRW_REGISTER_TYPE_UV, + BRW_REGISTER_TYPE_UV, /* Gen6+ */ BRW_REGISTER_TYPE_V, BRW_REGISTER_TYPE_VF, /** @} */ diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index b710c60148c..88c45f74333 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -26,6 +26,7 @@ */ #include "brw_fs.h" +#include "brw_fs_live_variables.h" #include "brw_vec4.h" #include "brw_cfg.h" #include "brw_shader.h" @@ -400,22 +401,49 @@ schedule_node::set_latency_gen7(bool is_haswell) class instruction_scheduler { public: instruction_scheduler(backend_shader *s, int grf_count, + int hw_reg_count, int block_count, instruction_scheduler_mode mode) { this->bs = s; this->mem_ctx = ralloc_context(NULL); this->grf_count = grf_count; + this->hw_reg_count = hw_reg_count; this->instructions.make_empty(); this->instructions_to_schedule = 0; this->post_reg_alloc = (mode == SCHEDULE_POST); this->mode = mode; this->time = 0; if (!post_reg_alloc) { - this->remaining_grf_uses = rzalloc_array(mem_ctx, int, grf_count); - this->grf_active = rzalloc_array(mem_ctx, bool, grf_count); + this->reg_pressure_in = rzalloc_array(mem_ctx, int, block_count); + + this->livein = ralloc_array(mem_ctx, BITSET_WORD *, block_count); + for (int i = 0; i < block_count; i++) + this->livein[i] = rzalloc_array(mem_ctx, BITSET_WORD, + BITSET_WORDS(grf_count)); + + this->liveout = ralloc_array(mem_ctx, BITSET_WORD *, block_count); + for (int i = 0; i < block_count; i++) + this->liveout[i] = rzalloc_array(mem_ctx, BITSET_WORD, + BITSET_WORDS(grf_count)); + + this->hw_liveout = ralloc_array(mem_ctx, BITSET_WORD *, block_count); + for (int i = 0; i < block_count; i++) + this->hw_liveout[i] = rzalloc_array(mem_ctx, BITSET_WORD, + BITSET_WORDS(hw_reg_count)); + + this->written = rzalloc_array(mem_ctx, bool, grf_count); + + this->reads_remaining = rzalloc_array(mem_ctx, int, grf_count); + + this->hw_reads_remaining = rzalloc_array(mem_ctx, int, hw_reg_count); } else { - this->remaining_grf_uses = NULL; - this->grf_active = NULL; + this->reg_pressure_in = NULL; + this->livein = NULL; + this->liveout = NULL; + this->hw_liveout = NULL; + this->written = NULL; + this->reads_remaining = NULL; + this->hw_reads_remaining = NULL; } } @@ -442,7 +470,8 @@ public: */ virtual int issue_time(backend_instruction *inst) = 0; - virtual void count_remaining_grf_uses(backend_instruction *inst) = 0; + virtual void count_reads_remaining(backend_instruction *inst) = 0; + virtual void setup_liveness(cfg_t *cfg) = 0; virtual void update_register_pressure(backend_instruction *inst) = 0; virtual int get_register_pressure_benefit(backend_instruction *inst) = 0; @@ -453,33 +482,63 @@ public: bool post_reg_alloc; int instructions_to_schedule; int grf_count; + int hw_reg_count; int time; + int reg_pressure; + int block_idx; exec_list instructions; backend_shader *bs; instruction_scheduler_mode mode; - /** - * Number of instructions left to schedule that reference each vgrf. - * - * Used so that we can prefer scheduling instructions that will end the - * live intervals of multiple variables, to reduce register pressure. + /* + * The register pressure at the beginning of each basic block. */ - int *remaining_grf_uses; - /** - * Tracks whether each VGRF has had an instruction scheduled that uses it. - * - * This is used to estimate whether scheduling a new instruction will - * increase register pressure. + int *reg_pressure_in; + + /* + * The virtual GRF's whose range overlaps the beginning of each basic block. + */ + + BITSET_WORD **livein; + + /* + * The virtual GRF's whose range overlaps the end of each basic block. + */ + + BITSET_WORD **liveout; + + /* + * The hardware GRF's whose range overlaps the end of each basic block. + */ + + BITSET_WORD **hw_liveout; + + /* + * Whether we've scheduled a write for this virtual GRF yet. + */ + + bool *written; + + /* + * How many reads we haven't scheduled for this virtual GRF yet. + */ + + int *reads_remaining; + + /* + * How many reads we haven't scheduled for this hardware GRF yet. */ - bool *grf_active; + + int *hw_reads_remaining; }; class fs_instruction_scheduler : public instruction_scheduler { public: - fs_instruction_scheduler(fs_visitor *v, int grf_count, + fs_instruction_scheduler(fs_visitor *v, int grf_count, int hw_reg_count, + int block_count, instruction_scheduler_mode mode); void calculate_deps(); bool is_compressed(fs_inst *inst); @@ -487,35 +546,109 @@ public: int issue_time(backend_instruction *inst); fs_visitor *v; - void count_remaining_grf_uses(backend_instruction *inst); + void count_reads_remaining(backend_instruction *inst); + void setup_liveness(cfg_t *cfg); void update_register_pressure(backend_instruction *inst); int get_register_pressure_benefit(backend_instruction *inst); }; fs_instruction_scheduler::fs_instruction_scheduler(fs_visitor *v, - int grf_count, + int grf_count, int hw_reg_count, + int block_count, instruction_scheduler_mode mode) - : instruction_scheduler(v, grf_count, mode), + : instruction_scheduler(v, grf_count, hw_reg_count, block_count, mode), v(v) { } +static bool +is_src_duplicate(fs_inst *inst, int src) +{ + for (int i = 0; i < src; i++) + if (inst->src[i].equals(inst->src[src])) + return true; + + return false; +} + void -fs_instruction_scheduler::count_remaining_grf_uses(backend_instruction *be) +fs_instruction_scheduler::count_reads_remaining(backend_instruction *be) { fs_inst *inst = (fs_inst *)be; - if (!remaining_grf_uses) + if (!reads_remaining) return; - if (inst->dst.file == GRF) - remaining_grf_uses[inst->dst.reg]++; - for (int i = 0; i < inst->sources; i++) { - if (inst->src[i].file != GRF) + if (is_src_duplicate(inst, i)) continue; - remaining_grf_uses[inst->src[i].reg]++; + if (inst->src[i].file == GRF) { + reads_remaining[inst->src[i].reg]++; + } else if (inst->src[i].file == HW_REG && + inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) { + if (inst->src[i].fixed_hw_reg.nr >= hw_reg_count) + continue; + + for (int j = 0; j < inst->regs_read(i); j++) + hw_reads_remaining[inst->src[i].fixed_hw_reg.nr + j]++; + } + } +} + +void +fs_instruction_scheduler::setup_liveness(cfg_t *cfg) +{ + /* First, compute liveness on a per-GRF level using the in/out sets from + * liveness calculation. + */ + for (int block = 0; block < cfg->num_blocks; block++) { + for (int i = 0; i < v->live_intervals->num_vars; i++) { + if (BITSET_TEST(v->live_intervals->block_data[block].livein, i)) { + int vgrf = v->live_intervals->vgrf_from_var[i]; + if (!BITSET_TEST(livein[block], vgrf)) { + reg_pressure_in[block] += v->alloc.sizes[vgrf]; + BITSET_SET(livein[block], vgrf); + } + } + + if (BITSET_TEST(v->live_intervals->block_data[block].liveout, i)) + BITSET_SET(liveout[block], v->live_intervals->vgrf_from_var[i]); + } + } + + /* Now, extend the live in/live out sets for when a range crosses a block + * boundary, which matches what our register allocator/interference code + * does to account for force_writemask_all and incompatible exec_mask's. + */ + for (int block = 0; block < cfg->num_blocks - 1; block++) { + for (int i = 0; i < grf_count; i++) { + if (v->virtual_grf_start[i] <= cfg->blocks[block]->end_ip && + v->virtual_grf_end[i] >= cfg->blocks[block + 1]->start_ip) { + if (!BITSET_TEST(livein[block + 1], i)) { + reg_pressure_in[block + 1] += v->alloc.sizes[i]; + BITSET_SET(livein[block + 1], i); + } + + BITSET_SET(liveout[block], i); + } + } + } + + int payload_last_use_ip[hw_reg_count]; + v->calculate_payload_ranges(hw_reg_count, payload_last_use_ip); + + for (int i = 0; i < hw_reg_count; i++) { + if (payload_last_use_ip[i] == -1) + continue; + + for (int block = 0; block < cfg->num_blocks; block++) { + if (cfg->blocks[block]->start_ip <= payload_last_use_ip[i]) + reg_pressure_in[block]++; + + if (cfg->blocks[block]->end_ip <= payload_last_use_ip[i]) + BITSET_SET(hw_liveout[block], i); + } } } @@ -524,18 +657,24 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be) { fs_inst *inst = (fs_inst *)be; - if (!remaining_grf_uses) + if (!reads_remaining) return; if (inst->dst.file == GRF) { - remaining_grf_uses[inst->dst.reg]--; - grf_active[inst->dst.reg] = true; + written[inst->dst.reg] = true; } for (int i = 0; i < inst->sources; i++) { + if (is_src_duplicate(inst, i)) + continue; + if (inst->src[i].file == GRF) { - remaining_grf_uses[inst->src[i].reg]--; - grf_active[inst->src[i].reg] = true; + reads_remaining[inst->src[i].reg]--; + } else if (inst->src[i].file == HW_REG && + inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE && + inst->src[i].fixed_hw_reg.nr < hw_reg_count) { + for (int off = 0; off < inst->regs_read(i); off++) + hw_reads_remaining[inst->src[i].fixed_hw_reg.nr + off]--; } } } @@ -547,20 +686,31 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be) int benefit = 0; if (inst->dst.file == GRF) { - if (remaining_grf_uses[inst->dst.reg] == 1) - benefit += v->alloc.sizes[inst->dst.reg]; - if (!grf_active[inst->dst.reg]) + if (!BITSET_TEST(livein[block_idx], inst->dst.reg) && + !written[inst->dst.reg]) benefit -= v->alloc.sizes[inst->dst.reg]; } for (int i = 0; i < inst->sources; i++) { - if (inst->src[i].file != GRF) + if (is_src_duplicate(inst, i)) continue; - if (remaining_grf_uses[inst->src[i].reg] == 1) + if (inst->src[i].file == GRF && + !BITSET_TEST(liveout[block_idx], inst->src[i].reg) && + reads_remaining[inst->src[i].reg] == 1) benefit += v->alloc.sizes[inst->src[i].reg]; - if (!grf_active[inst->src[i].reg]) - benefit -= v->alloc.sizes[inst->src[i].reg]; + + if (inst->src[i].file == HW_REG && + inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE && + inst->src[i].fixed_hw_reg.nr < hw_reg_count) { + for (int off = 0; off < inst->regs_read(i); off++) { + int reg = inst->src[i].fixed_hw_reg.nr + off; + if (!BITSET_TEST(hw_liveout[block_idx], reg) && + hw_reads_remaining[reg] == 1) { + benefit++; + } + } + } } return benefit; @@ -575,20 +725,26 @@ public: int issue_time(backend_instruction *inst); vec4_visitor *v; - void count_remaining_grf_uses(backend_instruction *inst); + void count_reads_remaining(backend_instruction *inst); + void setup_liveness(cfg_t *cfg); void update_register_pressure(backend_instruction *inst); int get_register_pressure_benefit(backend_instruction *inst); }; vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v, int grf_count) - : instruction_scheduler(v, grf_count, SCHEDULE_POST), + : instruction_scheduler(v, grf_count, 0, 0, SCHEDULE_POST), v(v) { } void -vec4_instruction_scheduler::count_remaining_grf_uses(backend_instruction *be) +vec4_instruction_scheduler::count_reads_remaining(backend_instruction *be) +{ +} + +void +vec4_instruction_scheduler::setup_liveness(cfg_t *cfg) { } @@ -822,7 +978,7 @@ fs_instruction_scheduler::calculate_deps() inst->src[i].file != IMM && inst->src[i].file != UNIFORM && (inst->src[i].file != HW_REG || - inst->src[i].fixed_hw_reg.file != IMM)) { + inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) { assert(inst->src[i].file != MRF); add_barrier_deps(n); } @@ -927,10 +1083,10 @@ fs_instruction_scheduler::calculate_deps() if (inst->src[i].file == GRF) { if (post_reg_alloc) { for (int r = 0; r < inst->regs_read(i); r++) - add_dep(n, last_grf_write[inst->src[i].reg + r]); + add_dep(n, last_grf_write[inst->src[i].reg + r], 0); } else { for (int r = 0; r < inst->regs_read(i); r++) { - add_dep(n, last_grf_write[inst->src[i].reg * 16 + inst->src[i].reg_offset + r]); + add_dep(n, last_grf_write[inst->src[i].reg * 16 + inst->src[i].reg_offset + r], 0); } } } else if (inst->src[i].file == HW_REG && @@ -941,17 +1097,17 @@ fs_instruction_scheduler::calculate_deps() if (inst->src[i].fixed_hw_reg.vstride == BRW_VERTICAL_STRIDE_0) size = 1; for (int r = 0; r < size; r++) - add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r]); + add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r], 0); } else { - add_dep(n, last_fixed_grf_write); + add_dep(n, last_fixed_grf_write, 0); } } else if (inst->src[i].is_accumulator()) { - add_dep(n, last_accumulator_write); + add_dep(n, last_accumulator_write, 0); } else if (inst->src[i].file != BAD_FILE && inst->src[i].file != IMM && inst->src[i].file != UNIFORM && (inst->src[i].file != HW_REG || - inst->src[i].fixed_hw_reg.file != IMM)) { + inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) { assert(inst->src[i].file != MRF); add_barrier_deps(n); } @@ -1080,7 +1236,7 @@ vec4_instruction_scheduler::calculate_deps() inst->src[i].file != IMM && inst->src[i].file != UNIFORM && (inst->src[i].file != HW_REG || - inst->src[i].fixed_hw_reg.file != IMM)) { + inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) { /* No reads from MRF, and ATTR is already translated away */ assert(inst->src[i].file != MRF && inst->src[i].file != ATTR); @@ -1177,7 +1333,7 @@ vec4_instruction_scheduler::calculate_deps() inst->src[i].file != IMM && inst->src[i].file != UNIFORM && (inst->src[i].file != HW_REG || - inst->src[i].fixed_hw_reg.file != IMM)) { + inst->src[i].fixed_hw_reg.file != BRW_IMMEDIATE_VALUE)) { assert(inst->src[i].file != MRF && inst->src[i].file != ATTR); add_barrier_deps(n); @@ -1387,6 +1543,9 @@ instruction_scheduler::schedule_instructions(bblock_t *block) const struct brw_device_info *devinfo = bs->devinfo; backend_instruction *inst = block->end(); time = 0; + if (!post_reg_alloc) + reg_pressure = reg_pressure_in[block->num]; + block_idx = block->num; /* Remove non-DAG heads from the list. */ foreach_in_list_safe(schedule_node, n, &instructions) { @@ -1403,23 +1562,30 @@ instruction_scheduler::schedule_instructions(bblock_t *block) chosen->remove(); inst->insert_before(block, chosen->inst); instructions_to_schedule--; - update_register_pressure(chosen->inst); - /* Update the clock for how soon an instruction could start after the - * chosen one. - */ - time += issue_time(chosen->inst); + if (!post_reg_alloc) { + reg_pressure -= get_register_pressure_benefit(chosen->inst); + update_register_pressure(chosen->inst); + } /* If we expected a delay for scheduling, then bump the clock to reflect - * that as well. In reality, the hardware will switch to another - * hyperthread and may not return to dispatching our thread for a while - * even after we're unblocked. + * that. In reality, the hardware will switch to another hyperthread + * and may not return to dispatching our thread for a while even after + * we're unblocked. After this, we have the time when the chosen + * instruction will start executing. */ time = MAX2(time, chosen->unblocked_time); + /* Update the clock for how soon an instruction could start after the + * chosen one. + */ + time += issue_time(chosen->inst); + if (debug) { fprintf(stderr, "clock %4d, scheduled: ", time); bs->dump_instruction(chosen->inst); + if (!post_reg_alloc) + fprintf(stderr, "(register pressure %d)\n", reg_pressure); } /* Now that we've scheduled a new instruction, some of its @@ -1466,30 +1632,53 @@ instruction_scheduler::schedule_instructions(bblock_t *block) if (block->end()->opcode == BRW_OPCODE_NOP) block->end()->remove(block); assert(instructions_to_schedule == 0); + + block->cycle_count = time; +} + +static unsigned get_cycle_count(cfg_t *cfg) +{ + unsigned count = 0, multiplier = 1; + foreach_block(block, cfg) { + if (block->start()->opcode == BRW_OPCODE_DO) + multiplier *= 10; /* assume that loops execute ~10 times */ + + count += block->cycle_count * multiplier; + + if (block->end()->opcode == BRW_OPCODE_WHILE) + multiplier /= 10; + } + + return count; } void instruction_scheduler::run(cfg_t *cfg) { - if (debug) { + if (debug && !post_reg_alloc) { fprintf(stderr, "\nInstructions before scheduling (reg_alloc %d)\n", post_reg_alloc); - bs->dump_instructions(); + bs->dump_instructions(); } - /* Populate the remaining GRF uses array to improve the pre-regalloc - * scheduling. - */ - if (remaining_grf_uses) { - foreach_block_and_inst(block, backend_instruction, inst, cfg) { - count_remaining_grf_uses(inst); - } - } + if (!post_reg_alloc) + setup_liveness(cfg); foreach_block(block, cfg) { if (block->end_ip - block->start_ip <= 1) continue; + if (reads_remaining) { + memset(reads_remaining, 0, + grf_count * sizeof(*reads_remaining)); + memset(hw_reads_remaining, 0, + hw_reg_count * sizeof(*hw_reads_remaining)); + memset(written, 0, grf_count * sizeof(*written)); + + foreach_inst_in_block(fs_inst, inst, block) + count_reads_remaining(inst); + } + add_insts_from_block(block); calculate_deps(); @@ -1501,23 +1690,29 @@ instruction_scheduler::run(cfg_t *cfg) schedule_instructions(block); } - if (debug) { + if (debug && !post_reg_alloc) { fprintf(stderr, "\nInstructions after scheduling (reg_alloc %d)\n", post_reg_alloc); bs->dump_instructions(); } + + cfg->cycle_count = get_cycle_count(cfg); } void fs_visitor::schedule_instructions(instruction_scheduler_mode mode) { + if (mode != SCHEDULE_POST) + calculate_live_intervals(); + int grf_count; if (mode == SCHEDULE_POST) grf_count = grf_used; else grf_count = alloc.count; - fs_instruction_scheduler sched(this, grf_count, mode); + fs_instruction_scheduler sched(this, grf_count, first_non_payload_grf, + cfg->num_blocks, mode); sched.run(cfg); if (unlikely(debug_enabled) && mode == SCHEDULE_POST) { diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index e48f559afa7..063cb84a958 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -298,6 +298,8 @@ brw_instruction_name(enum opcode op) return "fb_write"; case FS_OPCODE_FB_WRITE_LOGICAL: return "fb_write_logical"; + case FS_OPCODE_PACK_STENCIL_REF: + return "pack_stencil_ref"; case FS_OPCODE_BLORP_FB_WRITE: return "blorp_fb_write"; case FS_OPCODE_REP_FB_WRITE: @@ -988,6 +990,20 @@ backend_instruction::has_side_effects() const } } +bool +backend_instruction::is_volatile() const +{ + switch (opcode) { + case SHADER_OPCODE_UNTYPED_SURFACE_READ: + case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: + case SHADER_OPCODE_TYPED_SURFACE_READ: + case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: + return true; + default: + return false; + } +} + #ifndef NDEBUG static bool inst_is_in_block(const bblock_t *block, const backend_instruction *inst) @@ -1178,9 +1194,9 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage, stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0; } - if (shader_prog && shader_prog->NumAtomicBuffers) { + if (shader && shader->NumAtomicBuffers) { stage_prog_data->binding_table.abo_start = next_binding_table_offset; - next_binding_table_offset += shader_prog->NumAtomicBuffers; + next_binding_table_offset += shader->NumAtomicBuffers; } else { stage_prog_data->binding_table.abo_start = 0xd0d0d0d0; } diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 8899b30c1ae..f4647cca4f9 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -115,6 +115,12 @@ struct backend_instruction : public exec_node { * optimize these out unless you know what you are doing. */ bool has_side_effects() const; + + /** + * True if the instruction might be affected by side effects of other + * instructions. + */ + bool is_volatile() const; #else struct backend_instruction { struct exec_node link; diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index dc2b9415673..2aa1248fea6 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -49,6 +49,7 @@ extern const struct brw_tracked_state brw_clip_unit; extern const struct brw_tracked_state brw_vs_pull_constants; extern const struct brw_tracked_state brw_gs_pull_constants; extern const struct brw_tracked_state brw_wm_pull_constants; +extern const struct brw_tracked_state brw_cs_pull_constants; extern const struct brw_tracked_state brw_constant_buffer; extern const struct brw_tracked_state brw_curbe_offsets; extern const struct brw_tracked_state brw_invariant_state; @@ -220,7 +221,7 @@ bool brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_size, - uint32_t *inout_offset, void *out_aux); + uint32_t *inout_offset, void *inout_aux); void brw_state_cache_check_size( struct brw_context *brw ); void brw_init_caches( struct brw_context *brw ); @@ -345,7 +346,8 @@ calculate_attr_overrides(const struct brw_context *brw, uint16_t *attr_overrides, uint32_t *point_sprite_enables, uint32_t *flat_enables, - uint32_t *urb_entry_read_length); + uint32_t *urb_entry_read_length, + uint32_t *urb_entry_read_offset); /* gen6_surface_state.c */ void gen6_init_vtable_surface_functions(struct brw_context *brw); diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 2fbcd146750..f7c0a2037d9 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -137,7 +137,7 @@ bool brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_size, - uint32_t *inout_offset, void *out_aux) + uint32_t *inout_offset, void *inout_aux) { struct brw_context *brw = cache->brw; struct brw_cache_item *item; @@ -155,11 +155,12 @@ brw_search_cache(struct brw_cache *cache, if (item == NULL) return false; - *(void **)out_aux = ((char *)item->key + item->key_size); + void *aux = ((char *) item->key) + item->key_size; - if (item->offset != *inout_offset) { + if (item->offset != *inout_offset || aux != *((void **) inout_aux)) { brw->ctx.NewDriverState |= (1 << cache_id); *inout_offset = item->offset; + *((void **) inout_aux) = aux; } return true; @@ -349,11 +350,6 @@ brw_init_caches(struct brw_context *brw) 4096, 64); if (brw->has_llc) drm_intel_gem_bo_map_unsynchronized(cache->bo); - - cache->aux_free[BRW_CACHE_VS_PROG] = brw_stage_prog_data_free; - cache->aux_free[BRW_CACHE_GS_PROG] = brw_stage_prog_data_free; - cache->aux_free[BRW_CACHE_FS_PROG] = brw_stage_prog_data_free; - cache->aux_free[BRW_CACHE_CS_PROG] = brw_stage_prog_data_free; } static void @@ -367,9 +363,12 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) for (i = 0; i < cache->size; i++) { for (c = cache->items[i]; c; c = next) { next = c->next; - if (cache->aux_free[c->cache_id]) { + if (c->cache_id == BRW_CACHE_VS_PROG || + c->cache_id == BRW_CACHE_GS_PROG || + c->cache_id == BRW_CACHE_FS_PROG || + c->cache_id == BRW_CACHE_CS_PROG) { const void *item_aux = c->key + c->key_size; - cache->aux_free[c->cache_id](item_aux); + brw_stage_prog_data_free(item_aux); } free((void *)c->key); free(c); diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 79b8301954e..0344b8a7fb0 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -259,6 +259,7 @@ static const struct brw_tracked_state *gen7_compute_atoms[] = &brw_state_base_address, &brw_cs_image_surfaces, &gen7_cs_push_constants, + &brw_cs_pull_constants, &brw_cs_ubo_surfaces, &brw_cs_abo_surfaces, &brw_texture_surfaces, @@ -353,6 +354,7 @@ static const struct brw_tracked_state *gen8_compute_atoms[] = &gen8_state_base_address, &brw_cs_image_surfaces, &gen7_cs_push_constants, + &brw_cs_pull_constants, &brw_cs_ubo_surfaces, &brw_cs_abo_surfaces, &brw_texture_surfaces, diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 3e7078d0b32..01eb1580953 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1370,9 +1370,10 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) vec4_instruction *inst = (vec4_instruction *)be_inst; if (inst->predicate) { - fprintf(file, "(%cf0.%d) ", + fprintf(file, "(%cf0.%d%s) ", inst->predicate_inverse ? '-' : '+', - inst->flag_subreg); + inst->flag_subreg, + pred_ctrl_align16[inst->predicate]); } fprintf(file, "%s", brw_instruction_name(inst->opcode)); @@ -1426,9 +1427,10 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) case BAD_FILE: fprintf(file, "(null)"); break; - default: - fprintf(file, "???"); - break; + case IMM: + case ATTR: + case UNIFORM: + unreachable("not reached"); } if (inst->dst.writemask != WRITEMASK_XYZW) { fprintf(file, "."); @@ -1520,9 +1522,8 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) case BAD_FILE: fprintf(file, "(null)"); break; - default: - fprintf(file, "???"); - break; + case MRF: + unreachable("not reached"); } /* Don't print .0; and only VGRFs have reg_offsets and sizes */ @@ -1787,13 +1788,100 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value) emit(MOV(offset, src_reg(index * SHADER_TIME_STRIDE))); time.type = BRW_REGISTER_TYPE_UD; - emit(MOV(time, src_reg(value))); + emit(MOV(time, value)); vec4_instruction *inst = emit(SHADER_OPCODE_SHADER_TIME_ADD, dst_reg(), src_reg(dst)); inst->mlen = 2; } +void +vec4_visitor::convert_to_hw_regs() +{ + foreach_block_and_inst(block, vec4_instruction, inst, cfg) { + for (int i = 0; i < 3; i++) { + struct src_reg &src = inst->src[i]; + struct brw_reg reg; + switch (src.file) { + case GRF: + reg = brw_vec8_grf(src.reg + src.reg_offset, 0); + reg.type = src.type; + reg.dw1.bits.swizzle = src.swizzle; + reg.abs = src.abs; + reg.negate = src.negate; + break; + + case IMM: + reg = brw_imm_reg(src.type); + reg.dw1.ud = src.fixed_hw_reg.dw1.ud; + break; + + case UNIFORM: + reg = stride(brw_vec4_grf(prog_data->base.dispatch_grf_start_reg + + (src.reg + src.reg_offset) / 2, + ((src.reg + src.reg_offset) % 2) * 4), + 0, 4, 1); + reg.type = src.type; + reg.dw1.bits.swizzle = src.swizzle; + reg.abs = src.abs; + reg.negate = src.negate; + + /* This should have been moved to pull constants. */ + assert(!src.reladdr); + break; + + case HW_REG: + assert(src.type == src.fixed_hw_reg.type); + continue; + + case BAD_FILE: + /* Probably unused. */ + reg = brw_null_reg(); + break; + + case MRF: + case ATTR: + unreachable("not reached"); + } + src.fixed_hw_reg = reg; + } + + dst_reg &dst = inst->dst; + struct brw_reg reg; + + switch (inst->dst.file) { + case GRF: + reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); + reg.type = dst.type; + reg.dw1.bits.writemask = dst.writemask; + break; + + case MRF: + assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(devinfo->gen)); + reg = brw_message_reg(dst.reg + dst.reg_offset); + reg.type = dst.type; + reg.dw1.bits.writemask = dst.writemask; + break; + + case HW_REG: + assert(dst.type == dst.fixed_hw_reg.type); + reg = dst.fixed_hw_reg; + break; + + case BAD_FILE: + reg = brw_null_reg(); + break; + + case IMM: + case ATTR: + case UNIFORM: + unreachable("not reached"); + } + + dst.fixed_hw_reg = reg; + } +} + bool vec4_visitor::run() { @@ -1862,6 +1950,7 @@ vec4_visitor::run() OPT(dead_code_eliminate); OPT(dead_control_flow_eliminate, this); OPT(opt_copy_propagation); + OPT(opt_cmod_propagation); OPT(opt_cse); OPT(opt_algebraic); OPT(opt_register_coalesce); @@ -1914,6 +2003,8 @@ vec4_visitor::run() opt_set_dependency_control(); + convert_to_hw_regs(); + if (last_scratch > 0) { prog_data->base.total_scratch = brw_get_scratch_size(last_scratch * REG_SIZE); @@ -2020,9 +2111,9 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, return NULL; } - vec4_generator g(compiler, log_data, &prog_data->base, - mem_ctx, INTEL_DEBUG & DEBUG_VS, "vertex", "VS"); - assembly = g.generate_assembly(v.cfg, final_assembly_size, shader); + assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, + shader, &prog_data->base, v.cfg, + final_assembly_size); } return assembly; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index d861b2e85df..ec8abf49cd8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -52,6 +52,15 @@ extern "C" { extern "C" { #endif +const unsigned * +brw_vec4_generate_assembly(const struct brw_compiler *compiler, + void *log_data, + void *mem_ctx, + const nir_shader *nir, + struct brw_vue_prog_data *prog_data, + const struct cfg_t *cfg, + unsigned *out_assembly_size); + #ifdef __cplusplus } /* extern "C" */ @@ -149,6 +158,7 @@ public: int var_range_start(unsigned v, unsigned n) const; int var_range_end(unsigned v, unsigned n) const; bool virtual_grf_interferes(int a, int b); + bool opt_cmod_propagation(); bool opt_copy_propagation(bool do_constant_prop = true); bool opt_cse_local(bblock_t *block); bool opt_cse(); @@ -158,6 +168,7 @@ public: bool is_dep_ctrl_unsafe(const vec4_instruction *inst); void opt_set_dependency_control(); void opt_schedule_instructions(); + void convert_to_hw_regs(); vec4_instruction *emit(vec4_instruction *inst); @@ -381,117 +392,6 @@ private: unsigned last_scratch; /**< measured in 32-byte (register size) units */ }; - -/** - * The vertex shader code generator. - * - * Translates VS IR to actual i965 assembly code. - */ -class vec4_generator -{ -public: - vec4_generator(const struct brw_compiler *compiler, void *log_data, - struct brw_vue_prog_data *prog_data, - void *mem_ctx, - bool debug_flag, - const char *stage_name, - const char *stage_abbrev); - ~vec4_generator(); - - const unsigned *generate_assembly(const cfg_t *cfg, unsigned *asm_size, - const nir_shader *nir); - -private: - void generate_code(const cfg_t *cfg, const nir_shader *nir); - - void generate_math1_gen4(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src); - void generate_math2_gen4(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1); - void generate_math_gen6(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1); - - void generate_tex(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg sampler_index); - - void generate_vs_urb_write(vec4_instruction *inst); - void generate_gs_urb_write(vec4_instruction *inst); - void generate_gs_urb_write_allocate(vec4_instruction *inst); - void generate_gs_thread_end(vec4_instruction *inst); - void generate_gs_set_write_offset(struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1); - void generate_gs_set_vertex_count(struct brw_reg dst, - struct brw_reg src); - void generate_gs_svb_write(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1); - void generate_gs_svb_set_destination_index(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src); - void generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src); - void generate_gs_prepare_channel_masks(struct brw_reg dst); - void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src); - void generate_gs_get_instance_id(struct brw_reg dst); - void generate_gs_ff_sync_set_primitives(struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1, - struct brw_reg src2); - void generate_gs_ff_sync(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1); - void generate_gs_set_primitive_id(struct brw_reg dst); - void generate_oword_dual_block_offsets(struct brw_reg m1, - struct brw_reg index); - void generate_scratch_write(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg index); - void generate_scratch_read(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg index); - void generate_pull_constant_load(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset); - void generate_pull_constant_load_gen7(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg surf_index, - struct brw_reg offset); - void generate_set_simd4x2_header_gen9(vec4_instruction *inst, - struct brw_reg dst); - - void generate_get_buffer_size(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg index); - - void generate_unpack_flags(struct brw_reg dst); - - const struct brw_compiler *compiler; - void *log_data; /* Passed to compiler->*_log functions */ - - const struct brw_device_info *devinfo; - - struct brw_codegen *p; - - struct brw_vue_prog_data *prog_data; - - void *mem_ctx; - const char *stage_name; - const char *stage_abbrev; - const bool debug_flag; -}; - } /* namespace brw */ #endif /* __cplusplus */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp new file mode 100644 index 00000000000..329f24269ce --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_cmod_propagation.cpp @@ -0,0 +1,157 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +/** @file brw_vec4_cmod_propagation.cpp + * + * Really similar to brw_fs_cmod_propagation but adapted to vec4 needs. Check + * brw_fs_cmod_propagation for further details on the rationale behind this + * optimization. + */ + +#include "brw_vec4.h" +#include "brw_cfg.h" + +namespace brw { + +static bool +opt_cmod_propagation_local(bblock_t *block) +{ + bool progress = false; + int ip = block->end_ip + 1; + + foreach_inst_in_block_reverse_safe(vec4_instruction, inst, block) { + ip--; + + if ((inst->opcode != BRW_OPCODE_AND && + inst->opcode != BRW_OPCODE_CMP && + inst->opcode != BRW_OPCODE_MOV) || + inst->predicate != BRW_PREDICATE_NONE || + !inst->dst.is_null() || + inst->src[0].file != GRF || + inst->src[0].abs) + continue; + + if (inst->opcode == BRW_OPCODE_AND && + !(inst->src[1].is_one() && + inst->conditional_mod == BRW_CONDITIONAL_NZ && + !inst->src[0].negate)) + continue; + + if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero()) + continue; + + if (inst->opcode == BRW_OPCODE_MOV && + inst->conditional_mod != BRW_CONDITIONAL_NZ) + continue; + + bool read_flag = false; + foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst, inst) { + if (inst->src[0].in_range(scan_inst->dst, + scan_inst->regs_written)) { + if ((scan_inst->predicate && scan_inst->opcode != BRW_OPCODE_SEL) || + scan_inst->dst.reg_offset != inst->src[0].reg_offset || + (scan_inst->dst.writemask != WRITEMASK_X && + scan_inst->dst.writemask != WRITEMASK_XYZW) || + (scan_inst->dst.writemask == WRITEMASK_XYZW && + inst->src[0].swizzle != BRW_SWIZZLE_XYZW) || + (inst->dst.writemask & ~scan_inst->dst.writemask) != 0) { + break; + } + + /* CMP's result is the same regardless of dest type. */ + if (inst->conditional_mod == BRW_CONDITIONAL_NZ && + scan_inst->opcode == BRW_OPCODE_CMP && + (inst->dst.type == BRW_REGISTER_TYPE_D || + inst->dst.type == BRW_REGISTER_TYPE_UD)) { + inst->remove(block); + progress = true; + break; + } + + /* If the AND wasn't handled by the previous case, it isn't safe + * to remove it. + */ + if (inst->opcode == BRW_OPCODE_AND) + break; + + /* Comparisons operate differently for ints and floats */ + if (scan_inst->dst.type != inst->dst.type && + (scan_inst->dst.type == BRW_REGISTER_TYPE_F || + inst->dst.type == BRW_REGISTER_TYPE_F)) + break; + + /* If the instruction generating inst's source also wrote the + * flag, and inst is doing a simple .nz comparison, then inst + * is redundant - the appropriate value is already in the flag + * register. Delete inst. + */ + if (inst->conditional_mod == BRW_CONDITIONAL_NZ && + !inst->src[0].negate && + scan_inst->writes_flag()) { + inst->remove(block); + progress = true; + break; + } + + /* Otherwise, try propagating the conditional. */ + enum brw_conditional_mod cond = + inst->src[0].negate ? brw_swap_cmod(inst->conditional_mod) + : inst->conditional_mod; + + if (scan_inst->can_do_cmod() && + ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) || + scan_inst->conditional_mod == cond)) { + scan_inst->conditional_mod = cond; + inst->remove(block); + progress = true; + } + break; + } + + if (scan_inst->writes_flag()) + break; + + read_flag = read_flag || scan_inst->reads_flag(); + } + } + + return progress; +} + +bool +vec4_visitor::opt_cmod_propagation() +{ + bool progress = false; + + foreach_block_reverse(block, cfg) { + progress = opt_cmod_propagation_local(block) || progress; + } + + if (progress) + invalidate_live_intervals(); + + return progress; +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp index 8fc7a365bfc..284e0a8d0a5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp @@ -78,13 +78,19 @@ vec4_visitor::dead_code_eliminate() sizeof(BITSET_WORD)); foreach_inst_in_block_reverse(vec4_instruction, inst, block) { - if (inst->dst.file == GRF && !inst->has_side_effects()) { + if ((inst->dst.file == GRF && !inst->has_side_effects()) || + (inst->dst.is_null() && inst->writes_flag())){ bool result_live[4] = { false }; - for (unsigned i = 0; i < inst->regs_written; i++) { - for (int c = 0; c < 4; c++) - result_live[c] |= BITSET_TEST( - live, var_from_reg(alloc, offset(inst->dst, i), c)); + if (inst->dst.file == GRF) { + for (unsigned i = 0; i < inst->regs_written; i++) { + for (int c = 0; c < 4; c++) + result_live[c] |= BITSET_TEST( + live, var_from_reg(alloc, offset(inst->dst, i), c)); + } + } else { + for (unsigned c = 0; c < 4; c++) + result_live[c] = BITSET_TEST(flag_live, c); } /* If the instruction can't do writemasking, then it's all or @@ -117,7 +123,11 @@ vec4_visitor::dead_code_eliminate() } if (inst->dst.is_null() && inst->writes_flag()) { - if (!BITSET_TEST(flag_live, 0)) { + bool combined_live = false; + for (unsigned c = 0; c < 4; c++) + combined_live |= BITSET_TEST(flag_live, c); + + if (!combined_live) { inst->opcode = BRW_OPCODE_NOP; progress = true; continue; @@ -136,7 +146,8 @@ vec4_visitor::dead_code_eliminate() } if (inst->writes_flag()) { - BITSET_CLEAR(flag_live, 0); + for (unsigned c = 0; c < 4; c++) + BITSET_CLEAR(flag_live, c); } for (int i = 0; i < 3; i++) { @@ -150,8 +161,10 @@ vec4_visitor::dead_code_eliminate() } } - if (inst->reads_flag()) { - BITSET_SET(flag_live, 0); + for (unsigned c = 0; c < 4; c++) { + if (inst->reads_flag(c)) { + BITSET_SET(flag_live, c); + } } } } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index a84f6c47471..8bc21df5ffc 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -20,146 +20,17 @@ * IN THE SOFTWARE. */ -#include <ctype.h> #include "glsl/glsl_parser_extras.h" #include "brw_vec4.h" #include "brw_cfg.h" -extern "C" { -#include "brw_eu.h" -#include "main/macros.h" -#include "program/prog_print.h" -#include "program/prog_parameter.h" -}; +using namespace brw; -namespace brw { - -struct brw_reg -vec4_instruction::get_dst(unsigned gen) -{ - struct brw_reg brw_reg; - - switch (dst.file) { - case GRF: - brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0); - brw_reg = retype(brw_reg, dst.type); - brw_reg.dw1.bits.writemask = dst.writemask; - break; - - case MRF: - assert(((dst.reg + dst.reg_offset) & ~(1 << 7)) < BRW_MAX_MRF(gen)); - brw_reg = brw_message_reg(dst.reg + dst.reg_offset); - brw_reg = retype(brw_reg, dst.type); - brw_reg.dw1.bits.writemask = dst.writemask; - break; - - case HW_REG: - assert(dst.type == dst.fixed_hw_reg.type); - brw_reg = dst.fixed_hw_reg; - break; - - case BAD_FILE: - brw_reg = brw_null_reg(); - break; - - default: - unreachable("not reached"); - } - return brw_reg; -} - -struct brw_reg -vec4_instruction::get_src(const struct brw_vue_prog_data *prog_data, int i) -{ - struct brw_reg brw_reg; - - switch (src[i].file) { - case GRF: - brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0); - brw_reg = retype(brw_reg, src[i].type); - brw_reg.dw1.bits.swizzle = src[i].swizzle; - if (src[i].abs) - brw_reg = brw_abs(brw_reg); - if (src[i].negate) - brw_reg = negate(brw_reg); - break; - - case IMM: - switch (src[i].type) { - case BRW_REGISTER_TYPE_F: - brw_reg = brw_imm_f(src[i].fixed_hw_reg.dw1.f); - break; - case BRW_REGISTER_TYPE_D: - brw_reg = brw_imm_d(src[i].fixed_hw_reg.dw1.d); - break; - case BRW_REGISTER_TYPE_UD: - brw_reg = brw_imm_ud(src[i].fixed_hw_reg.dw1.ud); - break; - case BRW_REGISTER_TYPE_VF: - brw_reg = brw_imm_vf(src[i].fixed_hw_reg.dw1.ud); - break; - default: - unreachable("not reached"); - } - break; - - case UNIFORM: - brw_reg = stride(brw_vec4_grf(prog_data->base.dispatch_grf_start_reg + - (src[i].reg + src[i].reg_offset) / 2, - ((src[i].reg + src[i].reg_offset) % 2) * 4), - 0, 4, 1); - brw_reg = retype(brw_reg, src[i].type); - brw_reg.dw1.bits.swizzle = src[i].swizzle; - if (src[i].abs) - brw_reg = brw_abs(brw_reg); - if (src[i].negate) - brw_reg = negate(brw_reg); - - /* This should have been moved to pull constants. */ - assert(!src[i].reladdr); - break; - - case HW_REG: - assert(src[i].type == src[i].fixed_hw_reg.type); - brw_reg = src[i].fixed_hw_reg; - break; - - case BAD_FILE: - /* Probably unused. */ - brw_reg = brw_null_reg(); - break; - case ATTR: - default: - unreachable("not reached"); - } - - return brw_reg; -} - -vec4_generator::vec4_generator(const struct brw_compiler *compiler, - void *log_data, - struct brw_vue_prog_data *prog_data, - void *mem_ctx, - bool debug_flag, - const char *stage_name, - const char *stage_abbrev) - : compiler(compiler), log_data(log_data), devinfo(compiler->devinfo), - prog_data(prog_data), - mem_ctx(mem_ctx), stage_name(stage_name), stage_abbrev(stage_abbrev), - debug_flag(debug_flag) -{ - p = rzalloc(mem_ctx, struct brw_codegen); - brw_init_codegen(devinfo, p, mem_ctx); -} - -vec4_generator::~vec4_generator() -{ -} - -void -vec4_generator::generate_math1_gen4(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src) +static void +generate_math1_gen4(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src) { gen4_math(p, dst, @@ -178,11 +49,12 @@ check_gen6_math_src_arg(struct brw_reg src) assert(src.dw1.bits.swizzle == BRW_SWIZZLE_XYZW); } -void -vec4_generator::generate_math_gen6(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1) +static void +generate_math_gen6(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) { /* Can't do writemask because math can't be align16. */ assert(dst.dw1.bits.writemask == WRITEMASK_XYZW); @@ -196,11 +68,12 @@ vec4_generator::generate_math_gen6(vec4_instruction *inst, brw_set_default_access_mode(p, BRW_ALIGN_16); } -void -vec4_generator::generate_math2_gen4(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1) +static void +generate_math2_gen4(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) { /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13 * "Message Payload": @@ -229,12 +102,15 @@ vec4_generator::generate_math2_gen4(vec4_instruction *inst, BRW_MATH_PRECISION_FULL); } -void -vec4_generator::generate_tex(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg sampler_index) +static void +generate_tex(struct brw_codegen *p, + struct brw_vue_prog_data *prog_data, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg sampler_index) { + const struct brw_device_info *devinfo = p->devinfo; int msg_type = -1; if (devinfo->gen >= 5) { @@ -440,8 +316,8 @@ vec4_generator::generate_tex(vec4_instruction *inst, } } -void -vec4_generator::generate_vs_urb_write(vec4_instruction *inst) +static void +generate_vs_urb_write(struct brw_codegen *p, vec4_instruction *inst) { brw_urb_WRITE(p, brw_null_reg(), /* dest */ @@ -454,8 +330,8 @@ vec4_generator::generate_vs_urb_write(vec4_instruction *inst) BRW_URB_SWIZZLE_INTERLEAVE); } -void -vec4_generator::generate_gs_urb_write(vec4_instruction *inst) +static void +generate_gs_urb_write(struct brw_codegen *p, vec4_instruction *inst) { struct brw_reg src = brw_message_reg(inst->base_mrf); brw_urb_WRITE(p, @@ -469,14 +345,14 @@ vec4_generator::generate_gs_urb_write(vec4_instruction *inst) BRW_URB_SWIZZLE_INTERLEAVE); } -void -vec4_generator::generate_gs_urb_write_allocate(vec4_instruction *inst) +static void +generate_gs_urb_write_allocate(struct brw_codegen *p, vec4_instruction *inst) { struct brw_reg src = brw_message_reg(inst->base_mrf); /* We pass the temporary passed in src0 as the writeback register */ brw_urb_WRITE(p, - inst->get_src(this->prog_data, 0), /* dest */ + inst->src[0].fixed_hw_reg, /* dest */ inst->base_mrf, /* starting mrf reg nr */ src, BRW_URB_WRITE_ALLOCATE_COMPLETE, @@ -489,14 +365,13 @@ vec4_generator::generate_gs_urb_write_allocate(vec4_instruction *inst) brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_MOV(p, get_element_ud(inst->get_dst(devinfo->gen), 0), - get_element_ud(inst->get_src(this->prog_data, 0), 0)); - brw_set_default_access_mode(p, BRW_ALIGN_16); + brw_MOV(p, get_element_ud(inst->dst.fixed_hw_reg, 0), + get_element_ud(inst->src[0].fixed_hw_reg, 0)); brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_thread_end(vec4_instruction *inst) +static void +generate_gs_thread_end(struct brw_codegen *p, vec4_instruction *inst) { struct brw_reg src = brw_message_reg(inst->base_mrf); brw_urb_WRITE(p, @@ -510,10 +385,11 @@ vec4_generator::generate_gs_thread_end(vec4_instruction *inst) BRW_URB_SWIZZLE_INTERLEAVE); } -void -vec4_generator::generate_gs_set_write_offset(struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1) +static void +generate_gs_set_write_offset(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) { /* From p22 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message * Header: M0.3): @@ -536,29 +412,29 @@ vec4_generator::generate_gs_set_write_offset(struct brw_reg dst, brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); - assert(devinfo->gen >= 7 && + assert(p->devinfo->gen >= 7 && src1.file == BRW_IMMEDIATE_VALUE && src1.type == BRW_REGISTER_TYPE_UD && src1.dw1.ud <= USHRT_MAX); - if (src0.file == IMM) { + if (src0.file == BRW_IMMEDIATE_VALUE) { brw_MOV(p, suboffset(stride(dst, 2, 2, 1), 3), brw_imm_ud(src0.dw1.ud * src1.dw1.ud)); } else { brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4), retype(src1, BRW_REGISTER_TYPE_UW)); } - brw_set_default_access_mode(p, BRW_ALIGN_16); brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_set_vertex_count(struct brw_reg dst, - struct brw_reg src) +static void +generate_gs_set_vertex_count(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg src) { brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); - if (devinfo->gen >= 8) { + if (p->devinfo->gen >= 8) { /* Move the vertex count into the second MRF for the EOT write. */ brw_MOV(p, retype(brw_message_reg(dst.nr + 1), BRW_REGISTER_TYPE_UD), src); @@ -580,16 +456,17 @@ vec4_generator::generate_gs_set_vertex_count(struct brw_reg dst, brw_MOV(p, suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4), stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0)); - brw_set_default_access_mode(p, BRW_ALIGN_16); } brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_svb_write(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1) +static void +generate_gs_svb_write(struct brw_codegen *p, + struct brw_vue_prog_data *prog_data, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) { int binding = inst->sol_binding; bool final_write = inst->sol_final_write; @@ -623,12 +500,12 @@ vec4_generator::generate_gs_svb_write(vec4_instruction *inst, brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_svb_set_destination_index(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src) +static void +generate_gs_svb_set_destination_index(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src) { - int vertex = inst->sol_vertex; brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); @@ -637,8 +514,10 @@ vec4_generator::generate_gs_svb_set_destination_index(vec4_instruction *inst, brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src) +static void +generate_gs_set_dword_2(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg src) { brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); @@ -647,8 +526,9 @@ vec4_generator::generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src) brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst) +static void +generate_gs_prepare_channel_masks(struct brw_codegen *p, + struct brw_reg dst) { /* We want to left shift just DWORD 4 (the x component belonging to the * second geometry shader invocation) by 4 bits. So generate the @@ -664,9 +544,10 @@ vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst) brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst, - struct brw_reg src) +static void +generate_gs_set_channel_masks(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg src) { /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message * Header: M0.5): @@ -727,8 +608,9 @@ vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst, brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_get_instance_id(struct brw_reg dst) +static void +generate_gs_get_instance_id(struct brw_codegen *p, + struct brw_reg dst) { /* We want to right shift R0.0 & R0.1 by GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT * and store into dst.0 & dst.4. So generate the instruction: @@ -744,11 +626,12 @@ vec4_generator::generate_gs_get_instance_id(struct brw_reg dst) brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1, - struct brw_reg src2) +static void +generate_gs_ff_sync_set_primitives(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1, + struct brw_reg src2) { brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); @@ -765,11 +648,12 @@ vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst, brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_ff_sync(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src0, - struct brw_reg src1) +static void +generate_gs_ff_sync(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) { /* This opcode uses an implied MRF register for: * - the header of the ff_sync message. And as such it is expected to be @@ -811,8 +695,8 @@ vec4_generator::generate_gs_ff_sync(vec4_instruction *inst, brw_pop_insn_state(p); } -void -vec4_generator::generate_gs_set_primitive_id(struct brw_reg dst) +static void +generate_gs_set_primitive_id(struct brw_codegen *p, struct brw_reg dst) { /* In gen6, PrimitiveID is delivered in R0.1 of the payload */ struct brw_reg src = brw_vec8_grf(0, 0); @@ -823,13 +707,14 @@ vec4_generator::generate_gs_set_primitive_id(struct brw_reg dst) brw_pop_insn_state(p); } -void -vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1, - struct brw_reg index) +static void +generate_oword_dual_block_offsets(struct brw_codegen *p, + struct brw_reg m1, + struct brw_reg index) { int second_vertex_offset; - if (devinfo->gen >= 6) + if (p->devinfo->gen >= 6) second_vertex_offset = 1; else second_vertex_offset = 16; @@ -860,8 +745,9 @@ vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1, brw_pop_insn_state(p); } -void -vec4_generator::generate_unpack_flags(struct brw_reg dst) +static void +generate_unpack_flags(struct brw_codegen *p, + struct brw_reg dst) { brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); @@ -878,16 +764,18 @@ vec4_generator::generate_unpack_flags(struct brw_reg dst) brw_pop_insn_state(p); } -void -vec4_generator::generate_scratch_read(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg index) +static void +generate_scratch_read(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg index) { + const struct brw_device_info *devinfo = p->devinfo; struct brw_reg header = brw_vec8_grf(0, 0); gen6_resolve_implied_move(p, &header, inst->base_mrf); - generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), + generate_oword_dual_block_offsets(p, brw_message_reg(inst->base_mrf + 1), index); uint32_t msg_type; @@ -906,7 +794,7 @@ vec4_generator::generate_scratch_read(vec4_instruction *inst, brw_set_dest(p, send, dst); brw_set_src0(p, send, header); if (devinfo->gen < 6) - brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf); + brw_inst_set_cond_modifier(devinfo, send, inst->base_mrf); brw_set_dp_read_message(p, send, 255, /* binding table index: stateless access */ BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, @@ -917,12 +805,14 @@ vec4_generator::generate_scratch_read(vec4_instruction *inst, 1 /* rlen */); } -void -vec4_generator::generate_scratch_write(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg index) +static void +generate_scratch_write(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg index) { + const struct brw_device_info *devinfo = p->devinfo; struct brw_reg header = brw_vec8_grf(0, 0); bool write_commit; @@ -933,7 +823,7 @@ vec4_generator::generate_scratch_write(vec4_instruction *inst, gen6_resolve_implied_move(p, &header, inst->base_mrf); - generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), + generate_oword_dual_block_offsets(p, brw_message_reg(inst->base_mrf + 1), index); brw_MOV(p, @@ -990,12 +880,15 @@ vec4_generator::generate_scratch_write(vec4_instruction *inst, write_commit); } -void -vec4_generator::generate_pull_constant_load(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset) +static void +generate_pull_constant_load(struct brw_codegen *p, + struct brw_vue_prog_data *prog_data, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg index, + struct brw_reg offset) { + const struct brw_device_info *devinfo = p->devinfo; assert(index.file == BRW_IMMEDIATE_VALUE && index.type == BRW_REGISTER_TYPE_UD); uint32_t surf_index = index.dw1.ud; @@ -1036,13 +929,15 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst, brw_mark_surface_used(&prog_data->base, surf_index); } -void -vec4_generator::generate_get_buffer_size(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg src, - struct brw_reg surf_index) +static void +generate_get_buffer_size(struct brw_codegen *p, + struct brw_vue_prog_data *prog_data, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg src, + struct brw_reg surf_index) { - assert(devinfo->gen >= 7); + assert(p->devinfo->gen >= 7); assert(surf_index.type == BRW_REGISTER_TYPE_UD && surf_index.file == BRW_IMMEDIATE_VALUE); @@ -1062,11 +957,13 @@ vec4_generator::generate_get_buffer_size(vec4_instruction *inst, brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); } -void -vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, - struct brw_reg dst, - struct brw_reg surf_index, - struct brw_reg offset) +static void +generate_pull_constant_load_gen7(struct brw_codegen *p, + struct brw_vue_prog_data *prog_data, + vec4_instruction *inst, + struct brw_reg dst, + struct brw_reg surf_index, + struct brw_reg offset) { assert(surf_index.type == BRW_REGISTER_TYPE_UD); @@ -1123,9 +1020,10 @@ vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst, } } -void -vec4_generator::generate_set_simd4x2_header_gen9(vec4_instruction *inst, - struct brw_reg dst) +static void +generate_set_simd4x2_header_gen9(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst) { brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); @@ -1140,9 +1038,18 @@ vec4_generator::generate_set_simd4x2_header_gen9(vec4_instruction *inst, brw_pop_insn_state(p); } -void -vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir) +static void +generate_code(struct brw_codegen *p, + const struct brw_compiler *compiler, + void *log_data, + const nir_shader *nir, + struct brw_vue_prog_data *prog_data, + const struct cfg_t *cfg) { + const struct brw_device_info *devinfo = p->devinfo; + const char *stage_abbrev = _mesa_shader_stage_to_abbrev(nir->stage); + bool debug_flag = INTEL_DEBUG & + intel_debug_flag_for_shader_stage(nir->stage); struct annotation_info annotation; memset(&annotation, 0, sizeof(annotation)); int loop_count = 0; @@ -1154,9 +1061,9 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir) annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset); for (unsigned int i = 0; i < 3; i++) { - src[i] = inst->get_src(this->prog_data, i); + src[i] = inst->src[i].fixed_hw_reg; } - dst = inst->get_dst(devinfo->gen); + dst = inst->dst.fixed_hw_reg; brw_set_default_predicate_control(p, inst->predicate); brw_set_default_predicate_inverse(p, inst->predicate_inverse); @@ -1383,9 +1290,9 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir) gen6_math(p, dst, brw_math_function(inst->opcode), src[0], brw_null_reg()); } else if (devinfo->gen == 6) { - generate_math_gen6(inst, dst, src[0], brw_null_reg()); + generate_math_gen6(p, inst, dst, src[0], brw_null_reg()); } else { - generate_math1_gen4(inst, dst, src[0]); + generate_math1_gen4(p, inst, dst, src[0]); } break; @@ -1396,9 +1303,9 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir) if (devinfo->gen >= 7) { gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]); } else if (devinfo->gen == 6) { - generate_math_gen6(inst, dst, src[0], src[1]); + generate_math_gen6(p, inst, dst, src[0], src[1]); } else { - generate_math2_gen4(inst, dst, src[0], src[1]); + generate_math2_gen4(p, inst, dst, src[0], src[1]); } break; @@ -1412,92 +1319,92 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir) case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: case SHADER_OPCODE_SAMPLEINFO: - generate_tex(inst, dst, src[0], src[1]); + generate_tex(p, prog_data, inst, dst, src[0], src[1]); break; case VS_OPCODE_URB_WRITE: - generate_vs_urb_write(inst); + generate_vs_urb_write(p, inst); break; case SHADER_OPCODE_GEN4_SCRATCH_READ: - generate_scratch_read(inst, dst, src[0]); + generate_scratch_read(p, inst, dst, src[0]); break; case SHADER_OPCODE_GEN4_SCRATCH_WRITE: - generate_scratch_write(inst, dst, src[0], src[1]); + generate_scratch_write(p, inst, dst, src[0], src[1]); break; case VS_OPCODE_PULL_CONSTANT_LOAD: - generate_pull_constant_load(inst, dst, src[0], src[1]); + generate_pull_constant_load(p, prog_data, inst, dst, src[0], src[1]); break; case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: - generate_pull_constant_load_gen7(inst, dst, src[0], src[1]); + generate_pull_constant_load_gen7(p, prog_data, inst, dst, src[0], src[1]); break; case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: - generate_set_simd4x2_header_gen9(inst, dst); + generate_set_simd4x2_header_gen9(p, inst, dst); break; case VS_OPCODE_GET_BUFFER_SIZE: - generate_get_buffer_size(inst, dst, src[0], src[1]); + generate_get_buffer_size(p, prog_data, inst, dst, src[0], src[1]); break; case GS_OPCODE_URB_WRITE: - generate_gs_urb_write(inst); + generate_gs_urb_write(p, inst); break; case GS_OPCODE_URB_WRITE_ALLOCATE: - generate_gs_urb_write_allocate(inst); + generate_gs_urb_write_allocate(p, inst); break; case GS_OPCODE_SVB_WRITE: - generate_gs_svb_write(inst, dst, src[0], src[1]); + generate_gs_svb_write(p, prog_data, inst, dst, src[0], src[1]); break; case GS_OPCODE_SVB_SET_DST_INDEX: - generate_gs_svb_set_destination_index(inst, dst, src[0]); + generate_gs_svb_set_destination_index(p, inst, dst, src[0]); break; case GS_OPCODE_THREAD_END: - generate_gs_thread_end(inst); + generate_gs_thread_end(p, inst); break; case GS_OPCODE_SET_WRITE_OFFSET: - generate_gs_set_write_offset(dst, src[0], src[1]); + generate_gs_set_write_offset(p, dst, src[0], src[1]); break; case GS_OPCODE_SET_VERTEX_COUNT: - generate_gs_set_vertex_count(dst, src[0]); + generate_gs_set_vertex_count(p, dst, src[0]); break; case GS_OPCODE_FF_SYNC: - generate_gs_ff_sync(inst, dst, src[0], src[1]); + generate_gs_ff_sync(p, inst, dst, src[0], src[1]); break; case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: - generate_gs_ff_sync_set_primitives(dst, src[0], src[1], src[2]); + generate_gs_ff_sync_set_primitives(p, dst, src[0], src[1], src[2]); break; case GS_OPCODE_SET_PRIMITIVE_ID: - generate_gs_set_primitive_id(dst); + generate_gs_set_primitive_id(p, dst); break; case GS_OPCODE_SET_DWORD_2: - generate_gs_set_dword_2(dst, src[0]); + generate_gs_set_dword_2(p, dst, src[0]); break; case GS_OPCODE_PREPARE_CHANNEL_MASKS: - generate_gs_prepare_channel_masks(dst); + generate_gs_prepare_channel_masks(p, dst); break; case GS_OPCODE_SET_CHANNEL_MASKS: - generate_gs_set_channel_masks(dst, src[0]); + generate_gs_set_channel_masks(p, dst, src[0]); break; case GS_OPCODE_GET_INSTANCE_ID: - generate_gs_get_instance_id(dst); + generate_gs_get_instance_id(p, dst); break; case SHADER_OPCODE_SHADER_TIME_ADD: @@ -1556,7 +1463,7 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir) break; case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: - generate_unpack_flags(dst); + generate_unpack_flags(p, dst); break; case VEC4_OPCODE_MOV_BYTES: { @@ -1651,10 +1558,10 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir) nir->info.label ? nir->info.label : "unnamed", _mesa_shader_stage_to_string(nir->stage), nir->info.name); - fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. Compacted %d to %d" - " bytes (%.0f%%)\n", + fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles." + "Compacted %d to %d bytes (%.0f%%)\n", stage_abbrev, - before_size / 16, loop_count, before_size, after_size, + before_size / 16, loop_count, cfg->cycle_count, before_size, after_size, 100.0f * (before_size - after_size) / before_size); dump_assembly(p->store, annotation.ann_count, annotation.ann, @@ -1663,21 +1570,27 @@ vec4_generator::generate_code(const cfg_t *cfg, const nir_shader *nir) } compiler->shader_debug_log(log_data, - "%s vec4 shader: %d inst, %d loops, " + "%s vec4 shader: %d inst, %d loops, %u cycles, " "compacted %d to %d bytes.\n", - stage_abbrev, before_size / 16, loop_count, + stage_abbrev, before_size / 16, + loop_count, cfg->cycle_count, before_size, after_size); } -const unsigned * -vec4_generator::generate_assembly(const cfg_t *cfg, - unsigned *assembly_size, - const nir_shader *nir) +extern "C" const unsigned * +brw_vec4_generate_assembly(const struct brw_compiler *compiler, + void *log_data, + void *mem_ctx, + const nir_shader *nir, + struct brw_vue_prog_data *prog_data, + const struct cfg_t *cfg, + unsigned *out_assembly_size) { + struct brw_codegen *p = rzalloc(mem_ctx, struct brw_codegen); + brw_init_codegen(compiler->devinfo, p, mem_ctx); brw_set_default_access_mode(p, BRW_ALIGN_16); - generate_code(cfg, nir); - return brw_get_program(p, assembly_size); -} + generate_code(p, compiler, log_data, nir, prog_data, cfg); -} /* namespace brw */ + return brw_get_program(p, out_assembly_size); +} diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 9402489e628..cfb5cd95cb1 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -768,7 +768,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, output_size_bytes += 32; assert(output_size_bytes >= 1); - int max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES; + unsigned max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES; if (compiler->devinfo->gen == 6) max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES; if (output_size_bytes > max_output_size_bytes) @@ -824,9 +824,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader, mem_ctx, true /* no_spills */, shader_time_index); if (v.run()) { - vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx, - INTEL_DEBUG & DEBUG_GS, "geometry", "GS"); - return g.generate_assembly(v.cfg, final_assembly_size, shader); + return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, + shader, &prog_data->base, v.cfg, + final_assembly_size); } } } @@ -875,9 +875,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, if (error_str) *error_str = ralloc_strdup(mem_ctx, gs->fail_msg); } else { - vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx, - INTEL_DEBUG & DEBUG_GS, "geometry", "GS"); - ret = g.generate_assembly(gs->cfg, final_assembly_size, shader); + ret = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader, + &prog_data->base, gs->cfg, + final_assembly_size); } delete gs; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp index 678237901f2..aa9a6572eee 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp @@ -86,9 +86,10 @@ vec4_live_variables::setup_def_use() } } } - if (inst->reads_flag()) { - if (!BITSET_TEST(bd->flag_def, 0)) { - BITSET_SET(bd->flag_use, 0); + for (unsigned c = 0; c < 4; c++) { + if (inst->reads_flag(c) && + !BITSET_TEST(bd->flag_def, c)) { + BITSET_SET(bd->flag_use, c); } } @@ -110,8 +111,11 @@ vec4_live_variables::setup_def_use() } } if (inst->writes_flag()) { - if (!BITSET_TEST(bd->flag_use, 0)) { - BITSET_SET(bd->flag_def, 0); + for (unsigned c = 0; c < 4; c++) { + if ((inst->dst.writemask & (1 << c)) && + !BITSET_TEST(bd->flag_use, c)) { + BITSET_SET(bd->flag_def, c); + } } } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index e79a9f3b5b9..1fb1773f856 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -193,7 +193,9 @@ vec4_visitor::nir_emit_if(nir_if *if_stmt) vec4_instruction *inst = emit(MOV(dst_null_d(), condition)); inst->conditional_mod = BRW_CONDITIONAL_NZ; - emit(IF(BRW_PREDICATE_NORMAL)); + /* We can just predicate based on the X channel, as the condition only + * goes on its own line */ + emit(IF(BRW_PREDICATE_ALIGN16_REPLICATE_X)); nir_emit_cf_list(&if_stmt->then_list); @@ -806,6 +808,16 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) break; } + case nir_intrinsic_shader_clock: { + /* We cannot do anything if there is an event, so ignore it for now */ + const src_reg shader_clock = get_timestamp(); + const enum brw_reg_type type = brw_type_for_base_type(glsl_type::uvec2_type); + + dest = get_nir_dest(instr->dest, type); + emit(MOV(dest, shader_clock)); + break; + } + default: unreachable("Unknown intrinsic"); } @@ -1144,26 +1156,10 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_ball_iequal3: case nir_op_ball_fequal4: case nir_op_ball_iequal4: { - dst_reg tmp = dst_reg(this, glsl_type::bool_type); - - switch (instr->op) { - case nir_op_ball_fequal2: - case nir_op_ball_iequal2: - tmp.writemask = WRITEMASK_XY; - break; - case nir_op_ball_fequal3: - case nir_op_ball_iequal3: - tmp.writemask = WRITEMASK_XYZ; - break; - case nir_op_ball_fequal4: - case nir_op_ball_iequal4: - tmp.writemask = WRITEMASK_XYZW; - break; - default: - unreachable("not reached"); - } + unsigned swiz = + brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]); - emit(CMP(tmp, op[0], op[1], + emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz), brw_conditional_for_nir_comparison(instr->op))); emit(MOV(dst, src_reg(0))); inst = emit(MOV(dst, src_reg(~0))); @@ -1177,26 +1173,10 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_bany_inequal3: case nir_op_bany_fnequal4: case nir_op_bany_inequal4: { - dst_reg tmp = dst_reg(this, glsl_type::bool_type); + unsigned swiz = + brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]); - switch (instr->op) { - case nir_op_bany_fnequal2: - case nir_op_bany_inequal2: - tmp.writemask = WRITEMASK_XY; - break; - case nir_op_bany_fnequal3: - case nir_op_bany_inequal3: - tmp.writemask = WRITEMASK_XYZ; - break; - case nir_op_bany_fnequal4: - case nir_op_bany_inequal4: - tmp.writemask = WRITEMASK_XYZW; - break; - default: - unreachable("not reached"); - } - - emit(CMP(tmp, op[0], op[1], + emit(CMP(dst_null_d(), swizzle(op[0], swiz), swizzle(op[1], swiz), brw_conditional_for_nir_comparison(instr->op))); emit(MOV(dst, src_reg(0))); @@ -1321,26 +1301,18 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_ufind_msb: case nir_op_ifind_msb: { - src_reg temp = src_reg(this, glsl_type::uint_type); - - inst = emit(FBH(dst_reg(temp), op[0])); - inst->dst.writemask = WRITEMASK_XYZW; + emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0])); /* FBH counts from the MSB side, while GLSL's findMSB() wants the count * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then * subtract the result from 31 to convert the MSB count into an LSB count. */ + src_reg src(dst); + emit(CMP(dst_null_d(), src, src_reg(-1), BRW_CONDITIONAL_NZ)); - /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */ - temp.swizzle = BRW_SWIZZLE_NOOP; - emit(MOV(dst, temp)); - - src_reg src_tmp = src_reg(dst); - emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ)); - - src_tmp.negate = true; - inst = emit(ADD(dst, src_tmp, src_reg(31))); + inst = emit(ADD(dst, src, src_reg(31))); inst->predicate = BRW_PREDICATE_NORMAL; + inst->src[0].negate = true; break; } @@ -1461,11 +1433,11 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_bany2: case nir_op_bany3: case nir_op_bany4: { - dst_reg tmp = dst_reg(this, glsl_type::bool_type); - tmp.writemask = brw_writemask_for_size(nir_op_infos[instr->op].input_sizes[0]); - - emit(CMP(tmp, op[0], src_reg(0), BRW_CONDITIONAL_NZ)); + unsigned swiz = + brw_swizzle_for_size(nir_op_infos[instr->op].input_sizes[0]); + emit(CMP(dst_null_d(), swizzle(op[0], swiz), src_reg(0), + BRW_CONDITIONAL_NZ)); emit(MOV(dst, src_reg(0))); inst = emit(MOV(dst, src_reg(~0))); inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 6d155285820..92b089d7ff6 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -883,6 +883,18 @@ vec4_visitor::emit_texture(ir_texture_opcode op, uint32_t sampler, src_reg sampler_reg) { + /* The sampler can only meaningfully compute LOD for fragment shader + * messages. For all other stages, we change the opcode to TXL and hardcode + * the LOD to 0. + * + * textureQueryLevels() is implemented in terms of TXS so we need to pass a + * valid LOD argument. + */ + if (op == ir_tex || op == ir_query_levels) { + assert(lod.file == BAD_FILE); + lod = src_reg(0.0f); + } + enum opcode opcode; switch (op) { case ir_tex: opcode = SHADER_OPCODE_TXL; break; diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 5db4b3a86af..0b805b1c0c4 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -311,7 +311,8 @@ brw_vs_populate_key(struct brw_context *brw, key->program_string_id = vp->id; if (ctx->Transform.ClipPlanesEnabled != 0 && - ctx->API == API_OPENGL_COMPAT && + (ctx->API == API_OPENGL_COMPAT || + ctx->API == API_OPENGLES) && vp->program.Base.ClipDistanceArraySize == 0) { key->nr_userclip_plane_consts = _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1; diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index f65258a52a5..d7473845c72 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -177,8 +177,8 @@ brw_upload_vs_abo_surfaces(struct brw_context *brw) if (prog) { /* BRW_NEW_VS_PROG_DATA */ - brw_upload_abo_surfaces(brw, prog, &brw->vs.base, - &brw->vs.prog_data->base.base); + brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX], + &brw->vs.base, &brw->vs.prog_data->base.base); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 6ebe6481c32..f88f8d59196 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1029,7 +1029,7 @@ const struct brw_tracked_state brw_cs_ubo_surfaces = { void brw_upload_abo_surfaces(struct brw_context *brw, - struct gl_shader_program *prog, + struct gl_shader *shader, struct brw_stage_state *stage_state, struct brw_stage_prog_data *prog_data) { @@ -1037,21 +1037,22 @@ brw_upload_abo_surfaces(struct brw_context *brw, uint32_t *surf_offsets = &stage_state->surf_offset[prog_data->binding_table.abo_start]; - for (unsigned i = 0; i < prog->NumAtomicBuffers; i++) { - struct gl_atomic_buffer_binding *binding = - &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding]; - struct intel_buffer_object *intel_bo = - intel_buffer_object(binding->BufferObject); - drm_intel_bo *bo = intel_bufferobj_buffer( - brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset); - - brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo, - binding->Offset, BRW_SURFACEFORMAT_RAW, - bo->size - binding->Offset, 1, true); - } + if (shader && shader->NumAtomicBuffers) { + for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) { + struct gl_atomic_buffer_binding *binding = + &ctx->AtomicBufferBindings[shader->AtomicBuffers[i]->Binding]; + struct intel_buffer_object *intel_bo = + intel_buffer_object(binding->BufferObject); + drm_intel_bo *bo = intel_bufferobj_buffer( + brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset); + + brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo, + binding->Offset, BRW_SURFACEFORMAT_RAW, + bo->size - binding->Offset, 1, true); + } - if (prog->NumAtomicBuffers) brw->ctx.NewDriverState |= BRW_NEW_SURFACES; + } } static void @@ -1063,8 +1064,8 @@ brw_upload_wm_abo_surfaces(struct brw_context *brw) if (prog) { /* BRW_NEW_FS_PROG_DATA */ - brw_upload_abo_surfaces(brw, prog, &brw->wm.base, - &brw->wm.prog_data->base); + brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT], + &brw->wm.base, &brw->wm.prog_data->base); } } @@ -1088,8 +1089,8 @@ brw_upload_cs_abo_surfaces(struct brw_context *brw) if (prog) { /* BRW_NEW_CS_PROG_DATA */ - brw_upload_abo_surfaces(brw, prog, &brw->cs.base, - &brw->cs.prog_data->base); + brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE], + &brw->cs.base, &brw->cs.prog_data->base); } } diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 4068f2844a2..2634e6ba6fd 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -60,6 +60,23 @@ get_attr_override(const struct brw_vue_map *vue_map, int urb_entry_read_offset, /* Find the VUE slot for this attribute. */ int slot = vue_map->varying_to_slot[fs_attr]; + /* Viewport and Layer are stored in the VUE header. We need to override + * them to zero if earlier stages didn't write them, as GL requires that + * they read back as zero when not explicitly set. + */ + if (fs_attr == VARYING_SLOT_VIEWPORT || fs_attr == VARYING_SLOT_LAYER) { + unsigned override = + ATTRIBUTE_0_OVERRIDE_X | ATTRIBUTE_0_OVERRIDE_W | + ATTRIBUTE_CONST_0000 << ATTRIBUTE_0_CONST_SOURCE_SHIFT; + + if (!(vue_map->slots_valid & VARYING_BIT_LAYER)) + override |= ATTRIBUTE_0_OVERRIDE_Y; + if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT)) + override |= ATTRIBUTE_0_OVERRIDE_Z; + + return override; + } + /* If there was only a back color written but not front, use back * as the color instead of undefined */ @@ -159,14 +176,30 @@ calculate_attr_overrides(const struct brw_context *brw, uint16_t *attr_overrides, uint32_t *point_sprite_enables, uint32_t *flat_enables, - uint32_t *urb_entry_read_length) + uint32_t *urb_entry_read_length, + uint32_t *urb_entry_read_offset) { - const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; uint32_t max_source_attr = 0; *point_sprite_enables = 0; *flat_enables = 0; + *urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; + + /* BRW_NEW_FRAGMENT_PROGRAM + * + * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in + * the full vertex header. Otherwise, we can program the SF to start + * reading at an offset of 1 (2 varying slots) to skip unnecessary data: + * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5 + * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+ + */ + + bool fs_needs_vue_header = brw->fragment_program->Base.InputsRead & + (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT); + + *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1; + /* _NEW_LIGHT */ bool shade_model_flat = brw->ctx.Light.ShadeModel == GL_FLAT; @@ -228,7 +261,7 @@ calculate_attr_overrides(const struct brw_context *brw, /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */ uint16_t attr_override = point_sprite ? 0 : get_attr_override(&brw->vue_map_geom_out, - urb_entry_read_offset, attr, + *urb_entry_read_offset, attr, brw->ctx.VertexProgram._TwoSideEnabled, &max_source_attr); @@ -276,7 +309,6 @@ upload_sf_state(struct brw_context *brw) bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; - const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; float point_size; uint16_t attr_overrides[16]; uint32_t point_sprite_origin; @@ -411,8 +443,10 @@ upload_sf_state(struct brw_context *brw) * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA */ uint32_t urb_entry_read_length; + uint32_t urb_entry_read_offset; calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables, - &flat_enables, &urb_entry_read_length); + &flat_enables, &urb_entry_read_length, + &urb_entry_read_offset); dw1 |= (urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT); diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c index 6aeb0cb243f..2d7c04f4ad2 100644 --- a/src/mesa/drivers/dri/i965/gen7_cs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c @@ -285,3 +285,34 @@ const struct brw_tracked_state gen7_cs_push_constants = { }, .emit = gen7_upload_cs_push_constants, }; + +/** + * Creates a new CS constant buffer reflecting the current CS program's + * constants, if needed by the CS program. + */ +static void +brw_upload_cs_pull_constants(struct brw_context *brw) +{ + struct brw_stage_state *stage_state = &brw->cs.base; + + /* BRW_NEW_COMPUTE_PROGRAM */ + struct brw_compute_program *cp = + (struct brw_compute_program *) brw->compute_program; + + /* BRW_NEW_CS_PROG_DATA */ + const struct brw_stage_prog_data *prog_data = &brw->cs.prog_data->base; + + /* _NEW_PROGRAM_CONSTANTS */ + brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &cp->program.Base, + stage_state, prog_data, true); +} + +const struct brw_tracked_state brw_cs_pull_constants = { + .dirty = { + .mesa = _NEW_PROGRAM_CONSTANTS, + .brw = BRW_NEW_BATCH | + BRW_NEW_COMPUTE_PROGRAM | + BRW_NEW_CS_PROG_DATA, + }, + .emit = brw_upload_cs_pull_constants, +}; diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 698b3d491bc..b1f13aceba4 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -40,7 +40,6 @@ upload_sbe_state(struct brw_context *brw) uint32_t point_sprite_enables; uint32_t flat_enables; int i; - const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; uint16_t attr_overrides[16]; /* _NEW_BUFFERS */ bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); @@ -65,8 +64,10 @@ upload_sbe_state(struct brw_context *brw) * _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | BRW_NEW_FS_PROG_DATA */ uint32_t urb_entry_read_length; + uint32_t urb_entry_read_offset; calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables, - &flat_enables, &urb_entry_read_length); + &flat_enables, &urb_entry_read_length, + &urb_entry_read_offset); dw1 |= urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | urb_entry_read_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT; diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c index 8f0507413a7..10e433b1d59 100644 --- a/src/mesa/drivers/dri/i965/gen8_ps_state.c +++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c @@ -95,6 +95,11 @@ gen8_upload_ps_extra(struct brw_context *brw, !brw_color_buffer_write_enabled(brw)) dw1 |= GEN8_PSX_SHADER_HAS_UAV; + if (prog_data->computed_stencil) { + assert(brw->gen >= 9); + dw1 |= GEN9_PSX_SHADER_COMPUTES_STENCIL; + } + BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2)); OUT_BATCH(dw1); diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c index 6b655ee493e..8b6f31f3be6 100644 --- a/src/mesa/drivers/dri/i965/gen8_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c @@ -37,6 +37,7 @@ upload_sbe(struct brw_context *brw) uint32_t num_outputs = brw->wm.prog_data->num_varying_inputs; uint16_t attr_overrides[VARYING_SLOT_MAX]; uint32_t urb_entry_read_length; + uint32_t urb_entry_read_offset; uint32_t point_sprite_enables; uint32_t flat_enables; int sbe_cmd_length; @@ -66,7 +67,8 @@ upload_sbe(struct brw_context *brw) calculate_attr_overrides(brw, attr_overrides, &point_sprite_enables, &flat_enables, - &urb_entry_read_length); + &urb_entry_read_length, + &urb_entry_read_offset); /* Typically, the URB entry read length and offset should be programmed in * 3DSTATE_VS and 3DSTATE_GS; SBE inherits it from the last active stage @@ -78,7 +80,7 @@ upload_sbe(struct brw_context *brw) */ dw1 |= urb_entry_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | - BRW_SF_URB_ENTRY_READ_OFFSET << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT | + urb_entry_read_offset << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT | GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH | GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET; diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index 18b86652fd2..140a6544983 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -183,6 +183,14 @@ gen8_emit_buffer_surface_state(struct brw_context *brw, } static void +gen8_emit_fast_clear_color(struct brw_context *brw, + struct intel_mipmap_tree *mt, + uint32_t *surf) +{ + surf[7] |= mt->fast_clear_color_value; +} + +static void gen8_emit_texture_surface_state(struct brw_context *brw, struct intel_mipmap_tree *mt, GLenum target, @@ -284,11 +292,10 @@ gen8_emit_texture_surface_state(struct brw_context *brw, SET_FIELD((aux_mt->pitch / tile_w) - 1, GEN8_SURFACE_AUX_PITCH) | aux_mode; - } else { - surf[6] = 0; } - surf[7] = mt->fast_clear_color_value | + gen8_emit_fast_clear_color(brw, mt, surf); + surf[7] |= SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0)), GEN7_SURFACE_SCS_R) | SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1)), GEN7_SURFACE_SCS_G) | SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2)), GEN7_SURFACE_SCS_B) | @@ -302,11 +309,7 @@ gen8_emit_texture_surface_state(struct brw_context *brw, aux_mt->bo, 0, I915_GEM_DOMAIN_SAMPLER, (rw ? I915_GEM_DOMAIN_SAMPLER : 0)); - } else { - surf[10] = 0; - surf[11] = 0; } - surf[12] = 0; /* Emit relocation to surface contents */ drm_intel_bo_emit_reloc(brw->batch.bo, @@ -514,15 +517,13 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, SET_FIELD((aux_mt->pitch / tile_w) - 1, GEN8_SURFACE_AUX_PITCH) | aux_mode; - } else { - surf[6] = 0; } - surf[7] = mt->fast_clear_color_value | - SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | - SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | - SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | - SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A); + gen8_emit_fast_clear_color(brw, mt, surf); + surf[7] |= SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | + SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | + SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | + SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A); assert(mt->offset % mt->cpp == 0); *((uint64_t *) &surf[8]) = mt->bo->offset64 + mt->offset; /* reloc */ @@ -533,11 +534,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, offset + 10 * 4, aux_mt->bo, 0, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); - } else { - surf[10] = 0; - surf[11] = 0; } - surf[12] = 0; drm_intel_bo_emit_reloc(brw->batch.bo, offset + 8 * 4, diff --git a/src/mesa/drivers/dri/i965/intel_debug.c b/src/mesa/drivers/dri/i965/intel_debug.c index f7c02c8a38d..c00d2e786f3 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.c +++ b/src/mesa/drivers/dri/i965/intel_debug.c @@ -73,6 +73,8 @@ static const struct debug_control debug_control[] = { { "spill_fs", DEBUG_SPILL_FS }, { "spill_vec4", DEBUG_SPILL_VEC4 }, { "cs", DEBUG_CS }, + { "hex", DEBUG_HEX }, + { "nocompact", DEBUG_NO_COMPACTION }, { NULL, 0 } }; diff --git a/src/mesa/drivers/dri/i965/intel_debug.h b/src/mesa/drivers/dri/i965/intel_debug.h index 0a6e1b90b98..98bd7e93956 100644 --- a/src/mesa/drivers/dri/i965/intel_debug.h +++ b/src/mesa/drivers/dri/i965/intel_debug.h @@ -67,6 +67,8 @@ extern uint64_t INTEL_DEBUG; #define DEBUG_SPILL_FS (1ull << 31) #define DEBUG_SPILL_VEC4 (1ull << 32) #define DEBUG_CS (1ull << 33) +#define DEBUG_HEX (1ull << 34) +#define DEBUG_NO_COMPACTION (1ull << 35) #ifdef HAVE_ANDROID_PLATFORM #define LOG_TAG "INTEL-MESA" diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 3f9afd16c71..4643ea3e87b 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -287,6 +287,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_conditional_render_inverted = true; ctx->Extensions.ARB_draw_buffers_blend = true; ctx->Extensions.ARB_ES3_compatibility = true; + ctx->Extensions.ARB_fragment_layer_viewport = true; ctx->Extensions.ARB_sample_shading = true; ctx->Extensions.ARB_shading_language_420pack = true; ctx->Extensions.ARB_shading_language_packing = true; @@ -324,6 +325,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_framebuffer_no_attachments = true; ctx->Extensions.ARB_gpu_shader5 = true; ctx->Extensions.ARB_shader_atomic_counters = true; + ctx->Extensions.ARB_shader_clock = true; ctx->Extensions.ARB_shader_image_load_store = true; ctx->Extensions.ARB_shader_image_size = true; ctx->Extensions.ARB_shader_texture_image_samples = true; @@ -358,6 +360,7 @@ intelInitExtensions(struct gl_context *ctx) if (brw->gen >= 9) { ctx->Extensions.KHR_texture_compression_astc_ldr = true; ctx->Extensions.KHR_texture_compression_astc_hdr = true; + ctx->Extensions.ARB_shader_stencil_export = true; } if (ctx->API == API_OPENGL_CORE) diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c index 5a6b0dd1ec5..3a4a53a07e6 100644 --- a/src/mesa/drivers/dri/i965/intel_fbo.c +++ b/src/mesa/drivers/dri/i965/intel_fbo.c @@ -343,19 +343,15 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx, if (image->planar_format && image->planar_format->nplanes > 1) { _mesa_error(ctx, GL_INVALID_OPERATION, "glEGLImageTargetRenderbufferStorage(planar buffers are not " - "supported as render targets."); + "supported as render targets.)"); return; } /* __DRIimage is opaque to the core so it has to be checked here */ - switch (image->format) { - case MESA_FORMAT_R8G8B8A8_UNORM: + if (!brw->format_supported_as_render_target[image->format]) { _mesa_error(ctx, GL_INVALID_OPERATION, - "glEGLImageTargetRenderbufferStorage(unsupported image format"); + "glEGLImageTargetRenderbufferStorage(unsupported image format)"); return; - break; - default: - break; } irb = intel_renderbuffer(rb); diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 590c45d93ea..fb95fb629ad 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1357,7 +1357,16 @@ set_max_gl_versions(struct intel_screen *screen) } } -static int +/** + * Return the revision (generally the revid field of the PCI header) of the + * graphics device. + * + * XXX: This function is useful to keep around even if it is not currently in + * use. It is necessary for new platforms and revision specific workarounds or + * features. Please don't remove it so that we know it at least continues to + * build. + */ +static __attribute__((__unused__)) int brw_get_revision(int fd) { struct drm_i915_getparam gp; @@ -1416,8 +1425,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) return false; intelScreen->deviceID = drm_intel_bufmgr_gem_get_devid(intelScreen->bufmgr); - intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID, - brw_get_revision(psp->fd)); + intelScreen->devinfo = brw_get_device_info(intelScreen->deviceID); if (!intelScreen->devinfo) return false; diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp index 5f80f90a91d..62d39f70ec4 100644 --- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp @@ -84,7 +84,7 @@ instruction(bblock_t *block, int num) static bool cmod_propagation(fs_visitor *v) { - const bool print = false; + const bool print = getenv("TEST_DEBUG"); if (print) { fprintf(stderr, "= Before =\n"); diff --git a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp new file mode 100644 index 00000000000..9aa2fcc7907 --- /dev/null +++ b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp @@ -0,0 +1,822 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Based on test_fs_cmod_propagation.cpp + */ + +#include <gtest/gtest.h> +#include "brw_vec4.h" +#include "brw_vec4_builder.h" +#include "brw_cfg.h" +#include "program/program.h" + +using namespace brw; + +class cmod_propagation_test : public ::testing::Test { + virtual void SetUp(); + +public: + struct brw_compiler *compiler; + struct brw_device_info *devinfo; + struct gl_context *ctx; + struct gl_shader_program *shader_prog; + struct brw_vertex_program *vp; + vec4_visitor *v; +}; + +class cmod_propagation_vec4_visitor : public vec4_visitor +{ +public: + cmod_propagation_vec4_visitor(struct brw_compiler *compiler, + nir_shader *shader) + : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL, + false, -1) {} + +protected: + /* Dummy implementation for pure virtual methods */ + virtual dst_reg *make_reg_for_system_value(int location, + const glsl_type *type) + { + unreachable("Not reached"); + } + + virtual void setup_payload() + { + unreachable("Not reached"); + } + + virtual void emit_prolog() + { + unreachable("Not reached"); + } + + virtual void emit_program_code() + { + unreachable("Not reached"); + } + + virtual void emit_thread_end() + { + unreachable("Not reached"); + } + + virtual void emit_urb_write_header(int mrf) + { + unreachable("Not reached"); + } + + virtual vec4_instruction *emit_urb_write_opcode(bool complete) + { + unreachable("Not reached"); + } +}; + + +void cmod_propagation_test::SetUp() +{ + ctx = (struct gl_context *)calloc(1, sizeof(*ctx)); + compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler)); + devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo)); + compiler->devinfo = devinfo; + + vp = ralloc(NULL, struct brw_vertex_program); + + nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL); + + v = new cmod_propagation_vec4_visitor(compiler, shader); + + _mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0); + + devinfo->gen = 4; +} + +static vec4_instruction * +instruction(bblock_t *block, int num) +{ + vec4_instruction *inst = (vec4_instruction *)block->start(); + for (int i = 0; i < num; i++) { + inst = (vec4_instruction *)inst->next; + } + return inst; +} + +static bool +cmod_propagation(vec4_visitor *v) +{ + const bool print = getenv("TEST_DEBUG"); + + if (print) { + fprintf(stderr, "= Before =\n"); + v->dump_instructions(); + } + + bool ret = v->opt_cmod_propagation(); + + if (print) { + fprintf(stderr, "\n= After =\n"); + v->dump_instructions(); + } + + return ret; +} + +TEST_F(cmod_propagation_test, basic) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + src_reg zero(0.0f); + dst_reg dest_null = bld.null_reg_f(); + dest_null.writemask = WRITEMASK_X; + + bld.ADD(dest, src0, src1); + bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add dest.x src0.xxxx src1.xxxx + * 1: cmp.ge.f0 null.x dest.xxxx 0.0f + * + * = After = + * 0: add.ge.f0 dest.x src0.xxxx src1.xxxx + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, basic_different_dst_writemask) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + src_reg zero(0.0f); + dst_reg dest_null = bld.null_reg_f(); + + bld.ADD(dest, src0, src1); + bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add dest.x src0 src1 + * 1: cmp.ge.f0 null.xyzw dest 0.0f + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, andz_one) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::int_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg zero(0.0f); + src_reg one(1); + + bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); + set_condmod(BRW_CONDITIONAL_Z, + bld.AND(bld.null_reg_d(), src_reg(dest), one)); + + /* = Before = + * 0: cmp.l.f0 dest:F src0:F 0F + * 1: and.z.f0 null:D dest:D 1D + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_EQ, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, non_cmod_instruction) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::uint_type); + src_reg src0 = src_reg(v, glsl_type::uint_type); + src_reg zero(0u); + bld.FBL(dest, src0); + bld.CMP(bld.null_reg_ud(), src_reg(dest), zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: fbl dest src0 + * 1: cmp.ge.f0 null dest 0u + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_FBL, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, intervening_flag_write) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + src_reg src2 = src_reg(v, glsl_type::float_type); + src_reg zero(0.0f); + bld.ADD(dest, src0, src1); + bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE); + bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add dest src0 src1 + * 1: cmp.ge.f0 null src2 0.0f + * 2: cmp.ge.f0 null dest 0.0f + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(2, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod); +} + +TEST_F(cmod_propagation_test, intervening_flag_read) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest0 = dst_reg(v, glsl_type::float_type); + dst_reg dest1 = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + src_reg src2 = src_reg(v, glsl_type::float_type); + src_reg zero(0.0f); + bld.ADD(dest0, src0, src1); + set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); + bld.CMP(bld.null_reg_f(), src_reg(dest0), zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add dest0 src0 src1 + * 1: (+f0) sel dest1 src2 0.0f + * 2: cmp.ge.f0 null dest0 0.0f + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(2, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod); +} + +TEST_F(cmod_propagation_test, intervening_dest_write) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::vec4_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + src_reg src2 = src_reg(v, glsl_type::vec2_type); + src_reg zero(0.0f); + bld.ADD(offset(dest, 2), src0, src1); + bld.emit(SHADER_OPCODE_TEX, dest, src2) + ->regs_written = 4; + bld.CMP(bld.null_reg_f(), offset(src_reg(dest), 2), zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add dest+2 src0 src1 + * 1: tex rlen 4 dest+0 src2 + * 2: cmp.ge.f0 null dest+2 0.0f + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(2, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(SHADER_OPCODE_TEX, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod); +} + +TEST_F(cmod_propagation_test, intervening_flag_read_same_value) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest0 = dst_reg(v, glsl_type::float_type); + dst_reg dest1 = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + src_reg src2 = src_reg(v, glsl_type::float_type); + src_reg zero(0.0f); + dst_reg dest_null = bld.null_reg_f(); + dest_null.writemask = WRITEMASK_X; + + set_condmod(BRW_CONDITIONAL_GE, bld.ADD(dest0, src0, src1)); + set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero)); + bld.CMP(dest_null, src_reg(dest0), zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add.ge.f0 dest0 src0 src1 + * 1: (+f0) sel dest1 src2 0.0f + * 2: cmp.ge.f0 null.x dest0 0.0f + * + * = After = + * 0: add.ge.f0 dest0 src0 src1 + * 1: (+f0) sel dest1 src2 0.0f + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate); +} + +TEST_F(cmod_propagation_test, negate) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + src_reg zero(0.0f); + bld.ADD(dest, src0, src1); + src_reg tmp_src = src_reg(dest); + tmp_src.negate = true; + dst_reg dest_null = bld.null_reg_f(); + dest_null.writemask = WRITEMASK_X; + bld.CMP(dest_null, tmp_src, zero, BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add dest src0 src1 + * 1: cmp.ge.f0 null.x -dest 0.0f + * + * = After = + * 0: add.le.f0 dest src0 src1 + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_LE, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, movnz) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::float_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg src1 = src_reg(v, glsl_type::float_type); + dst_reg dest_null = bld.null_reg_f(); + dest_null.writemask = WRITEMASK_X; + + bld.CMP(dest, src0, src1, BRW_CONDITIONAL_L); + set_condmod(BRW_CONDITIONAL_NZ, + bld.MOV(dest_null, src_reg(dest))); + + /* = Before = + * + * 0: cmp.l.f0 dest:F src0:F src1:F + * 1: mov.nz.f0 null.x dest:F + * + * = After = + * 0: cmp.l.f0 dest src0:F src1:F + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, different_types_cmod_with_zero) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::int_type); + src_reg src0 = src_reg(v, glsl_type::int_type); + src_reg src1 = src_reg(v, glsl_type::int_type); + src_reg zero(0.0f); + bld.ADD(dest, src0, src1); + bld.CMP(bld.null_reg_f(), retype(src_reg(dest), BRW_REGISTER_TYPE_F), zero, + BRW_CONDITIONAL_GE); + + /* = Before = + * + * 0: add dest:D src0:D src1:D + * 1: cmp.ge.f0 null:F dest:F 0.0f + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, andnz_non_one) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::int_type); + src_reg src0 = src_reg(v, glsl_type::float_type); + src_reg zero(0.0f); + src_reg nonone(38); + + bld.CMP(retype(dest, BRW_REGISTER_TYPE_F), src0, zero, BRW_CONDITIONAL_L); + set_condmod(BRW_CONDITIONAL_NZ, + bld.AND(bld.null_reg_d(), src_reg(dest), nonone)); + + /* = Before = + * 0: cmp.l.f0 dest:F src0:F 0F + * 1: and.nz.f0 null:D dest:D 38D + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod); +} + +/* Note that basic is using glsl_type:float types, while this one is using + * glsl_type::vec4 */ +TEST_F(cmod_propagation_test, basic_vec4) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::vec4_type); + src_reg src0 = src_reg(v, glsl_type::vec4_type); + src_reg src1 = src_reg(v, glsl_type::vec4_type); + src_reg zero(0.0f); + + bld.MUL(dest, src0, src1); + bld.CMP(bld.null_reg_f(), src_reg(dest), zero, BRW_CONDITIONAL_NZ); + + /* = Before = + * 0: mul dest.xyzw src0.xyzw src1.xyzw + * 1: cmp.nz.f0.0 null.xyzw dest.xyzw 0.0f + * + * = After = + * 0: mul.nz.f0.0 dest.xyzw src0.xyzw src1.xyzw + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, basic_vec4_different_dst_writemask) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::vec4_type); + dest.writemask = WRITEMASK_X; + src_reg src0 = src_reg(v, glsl_type::vec4_type); + src_reg src1 = src_reg(v, glsl_type::vec4_type); + src_reg zero(0.0f); + dst_reg dest_null = bld.null_reg_f(); + + bld.MUL(dest, src0, src1); + bld.CMP(dest_null, src_reg(dest), zero, BRW_CONDITIONAL_NZ); + + /* = Before = + * 0: mul dest.x src0 src1 + * 1: cmp.nz.f0.0 null dest 0.0f + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, mad_one_component_vec4) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::vec4_type); + dest.writemask = WRITEMASK_X; + src_reg src0 = src_reg(v, glsl_type::vec4_type); + src_reg src1 = src_reg(v, glsl_type::vec4_type); + src_reg src2 = src_reg(v, glsl_type::vec4_type); + src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX; + src2.negate = true; + src_reg zero(0.0f); + src_reg tmp(dest); + tmp.swizzle = BRW_SWIZZLE_XXXX; + dst_reg dest_null = bld.null_reg_f(); + dest_null.writemask = WRITEMASK_X; + + bld.MAD(dest, src0, src1, src2); + bld.CMP(dest_null, tmp, zero, BRW_CONDITIONAL_L); + + /* = Before = + * + * 0: mad dest.x:F src0.xxxx:F src10.xxxx:F -src2.xxxx:F + * 1: cmp.l.f0.0 null.x:F dest.xxxx:F 0.0f + * + * = After = + * 0: mad.l.f0 dest.x:F src0.xxxx:F src10.xxxx:F -src2.xxxx:F + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MAD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, mad_more_one_component_vec4) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::vec4_type); + dest.writemask = WRITEMASK_XW; + src_reg src0 = src_reg(v, glsl_type::vec4_type); + src_reg src1 = src_reg(v, glsl_type::vec4_type); + src_reg src2 = src_reg(v, glsl_type::vec4_type); + src0.swizzle = src1.swizzle = src2.swizzle = BRW_SWIZZLE_XXXX; + src2.negate = true; + src_reg zero(0.0f); + src_reg tmp(dest); + tmp.swizzle = BRW_SWIZZLE_XXXX; + dst_reg dest_null = bld.null_reg_f(); + + bld.MAD(dest, src0, src1, src2); + bld.CMP(dest_null, tmp, zero, BRW_CONDITIONAL_L); + + /* = Before = + * + * 0: mad dest.xw:F src0.xxxx:F src10.xxxx:F -src2.xxxx:F + * 1: cmp.l.f0.0 null:F dest.xxxx:F zeroF + * + * = After = + * (No changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MAD, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 1)->conditional_mod); +} + +TEST_F(cmod_propagation_test, cmp_mov_vec4) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::ivec4_type); + dest.writemask = WRITEMASK_X; + src_reg src0 = src_reg(v, glsl_type::ivec4_type); + src0.swizzle = BRW_SWIZZLE_XXXX; + src0.file = UNIFORM; + src_reg nonone = retype(src_reg(16), BRW_REGISTER_TYPE_D); + src_reg mov_src = src_reg(dest); + mov_src.swizzle = BRW_SWIZZLE_XXXX; + dst_reg dest_null = bld.null_reg_d(); + dest_null.writemask = WRITEMASK_X; + + bld.CMP(dest, src0, nonone, BRW_CONDITIONAL_GE); + set_condmod(BRW_CONDITIONAL_NZ, + bld.MOV(dest_null, mov_src)); + + /* = Before = + * + * 0: cmp.ge.f0 dest.x:D u.xxxx:D 16D + * 1: mov.nz.f0 null.x:D dest.xxxx:D + * + * = After = + * 0: cmp.ge.f0 dest.x:D u.xxxx:D 16D + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(0, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod); +} + +TEST_F(cmod_propagation_test, mul_cmp_different_channels_vec4) +{ + const vec4_builder bld = vec4_builder(v).at_end(); + dst_reg dest = dst_reg(v, glsl_type::vec4_type); + src_reg src0 = src_reg(v, glsl_type::vec4_type); + src_reg src1 = src_reg(v, glsl_type::vec4_type); + src_reg zero(0.0f); + src_reg cmp_src = src_reg(dest); + cmp_src.swizzle = BRW_SWIZZLE4(0,1,3,2); + + bld.MUL(dest, src0, src1); + bld.CMP(bld.null_reg_f(), cmp_src, zero, BRW_CONDITIONAL_NZ); + + /* = Before = + * 0: mul dest src0 src1 + * 1: cmp.nz.f0.0 null dest.xywz 0.0f + * + * = After = + * (No changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(cmod_propagation(v)); + + ASSERT_EQ(0, block0->start_ip); + ASSERT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NONE, instruction(block0, 0)->conditional_mod); + EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode); + EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod); +} diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c index a049d9b8de7..cb854b81933 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c @@ -188,7 +188,7 @@ nouveau_context_init(struct gl_context *ctx, gl_api api, ctx->Extensions.EXT_blend_minmax = true; ctx->Extensions.EXT_texture_filter_anisotropic = true; ctx->Extensions.NV_texture_env_combine4 = true; - ctx->Const.MaxColorAttachments = 1; + ctx->Const.MaxDrawBuffers = ctx->Const.MaxColorAttachments = 1; /* This effectively disables 3D textures */ ctx->Const.Max3DTextureLevels = 1; diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c index a46c1944e96..a49018953ae 100644 --- a/src/mesa/main/api_validate.c +++ b/src/mesa/main/api_validate.c @@ -698,16 +698,39 @@ valid_draw_indirect(struct gl_context *ctx, { const GLsizeiptr end = (GLsizeiptr)indirect + size; + /* OpenGL ES 3.1 spec. section 10.5: + * + * "DrawArraysIndirect requires that all data sourced for the + * command, including the DrawArraysIndirectCommand + * structure, be in buffer objects, and may not be called when + * the default vertex array object is bound." + */ + if (ctx->Array.VAO == ctx->Array.DefaultVAO) { + _mesa_error(ctx, GL_INVALID_OPERATION, "(no VAO bound)"); + return GL_FALSE; + } + if (!_mesa_valid_prim_mode(ctx, mode, name)) return GL_FALSE; + /* OpenGL ES 3.1 specification, section 10.5: + * + * "An INVALID_OPERATION error is generated if + * transform feedback is active and not paused." + */ + if (_mesa_is_gles31(ctx) && _mesa_is_xfb_active_and_unpaused(ctx)) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "%s(TransformFeedback is active and not paused)", name); + } - /* From the ARB_draw_indirect specification: - * "An INVALID_OPERATION error is generated [...] if <indirect> is no - * word aligned." + /* From OpenGL version 4.4. section 10.5 + * and OpenGL ES 3.1, section 10.6: + * + * "An INVALID_VALUE error is generated if indirect is not a + * multiple of the size, in basic machine units, of uint." */ if ((GLsizeiptr)indirect & (sizeof(GLuint) - 1)) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, GL_INVALID_VALUE, "%s(indirect is not aligned)", name); return GL_FALSE; } @@ -895,7 +918,12 @@ check_valid_to_compute(struct gl_context *ctx, const char *function) return false; } - prog = ctx->Shader.CurrentProgram[MESA_SHADER_COMPUTE]; + /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders: + * + * "An INVALID_OPERATION error is generated if there is no active program + * for the compute shader stage." + */ + prog = ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; if (prog == NULL || prog->_LinkedShaders[MESA_SHADER_COMPUTE] == NULL) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(no active compute shader)", @@ -917,6 +945,24 @@ _mesa_validate_DispatchCompute(struct gl_context *ctx, return GL_FALSE; for (i = 0; i < 3; i++) { + /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders: + * + * "An INVALID_VALUE error is generated if any of num_groups_x, + * num_groups_y and num_groups_z are greater than or equal to the + * maximum work group count for the corresponding dimension." + * + * However, the "or equal to" portions appears to be a specification + * bug. In all other areas, the specification appears to indicate that + * the number of workgroups can match the MAX_COMPUTE_WORK_GROUP_COUNT + * value. For example, under DispatchComputeIndirect: + * + * "If any of num_groups_x, num_groups_y or num_groups_z is greater than + * the value of MAX_COMPUTE_WORK_GROUP_COUNT for the corresponding + * dimension then the results are undefined." + * + * Additionally, the OpenGLES 3.1 specification does not contain "or + * equal to" as an error condition. + */ if (num_groups[i] > ctx->Const.MaxComputeWorkGroupCount[i]) { _mesa_error(ctx, GL_INVALID_VALUE, "glDispatchCompute(num_groups_%c)", 'x' + i); @@ -937,24 +983,29 @@ valid_dispatch_indirect(struct gl_context *ctx, if (!check_valid_to_compute(ctx, name)) return GL_FALSE; - /* From the ARB_compute_shader specification: + /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders: * - * "An INVALID_OPERATION error is generated [...] if <indirect> is less - * than zero or not a multiple of the size, in basic machine units, of - * uint." + * "An INVALID_VALUE error is generated if indirect is negative or is not a + * multiple of four." */ if ((GLintptr)indirect & (sizeof(GLuint) - 1)) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, GL_INVALID_VALUE, "%s(indirect is not aligned)", name); return GL_FALSE; } if ((GLintptr)indirect < 0) { - _mesa_error(ctx, GL_INVALID_OPERATION, + _mesa_error(ctx, GL_INVALID_VALUE, "%s(indirect is less than zero)", name); return GL_FALSE; } + /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders: + * + * "An INVALID_OPERATION error is generated if no buffer is bound to the + * DRAW_INDIRECT_BUFFER binding, or if the command would source data + * beyond the end of the buffer object." + */ if (!_mesa_is_bufferobj(ctx->DispatchIndirectBuffer)) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s: no buffer bound to DISPATCH_INDIRECT_BUFFER", name); @@ -967,11 +1018,6 @@ valid_dispatch_indirect(struct gl_context *ctx, return GL_FALSE; } - /* From the ARB_compute_shader specification: - * - * "An INVALID_OPERATION error is generated if this command sources data - * beyond the end of the buffer object [...]" - */ if (ctx->DispatchIndirectBuffer->Size < end) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(DISPATCH_INDIRECT_BUFFER too small)", name); diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index b2c88c37366..d964f030ecb 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -152,6 +152,7 @@ static const struct extension extension_table[] = { { "GL_ARB_separate_shader_objects", o(dummy_true), GL, 2010 }, { "GL_ARB_shader_atomic_counters", o(ARB_shader_atomic_counters), GL, 2011 }, { "GL_ARB_shader_bit_encoding", o(ARB_shader_bit_encoding), GL, 2010 }, + { "GL_ARB_shader_clock", o(ARB_shader_clock), GL, 2015 }, { "GL_ARB_shader_image_load_store", o(ARB_shader_image_load_store), GL, 2011 }, { "GL_ARB_shader_image_size", o(ARB_shader_image_size), GL, 2012 }, { "GL_ARB_shader_objects", o(dummy_true), GL, 2002 }, @@ -229,6 +230,7 @@ static const struct extension extension_table[] = { { "GL_EXT_depth_bounds_test", o(EXT_depth_bounds_test), GL, 2002 }, { "GL_EXT_draw_buffers", o(dummy_true), ES2, 2012 }, { "GL_EXT_draw_buffers2", o(EXT_draw_buffers2), GL, 2006 }, + { "GL_EXT_draw_elements_base_vertex", o(ARB_draw_elements_base_vertex), ES2, 2014 }, { "GL_EXT_draw_instanced", o(ARB_draw_instanced), GL, 2006 }, { "GL_EXT_draw_range_elements", o(dummy_true), GLL, 1997 }, { "GL_EXT_fog_coord", o(dummy_true), GLL, 1999 }, @@ -305,6 +307,7 @@ static const struct extension extension_table[] = { { "GL_OES_depth32", o(dummy_false), DISABLE, 2005 }, { "GL_OES_depth_texture", o(ARB_depth_texture), ES2, 2006 }, { "GL_OES_depth_texture_cube_map", o(OES_depth_texture_cube_map), ES2, 2012 }, + { "GL_OES_draw_elements_base_vertex", o(ARB_draw_elements_base_vertex), ES2, 2014 }, { "GL_OES_draw_texture", o(OES_draw_texture), ES1, 2004 }, { "GL_OES_EGL_sync", o(dummy_true), ES1 | ES2, 2010 }, /* FIXME: Mesa expects GL_OES_EGL_image to be available in OpenGL contexts. */ diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index c295615b475..fbc7b8f8602 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -460,6 +460,7 @@ descriptor=[ [ "MAX_COMPUTE_UNIFORM_COMPONENTS", "CONST(MAX_COMPUTE_UNIFORM_COMPONENTS), extra_ARB_compute_shader_es31" ], [ "MAX_COMPUTE_IMAGE_UNIFORMS", "CONST(MAX_COMPUTE_IMAGE_UNIFORMS), extra_ARB_compute_shader_es31" ], [ "DISPATCH_INDIRECT_BUFFER_BINDING", "LOC_CUSTOM, TYPE_INT, 0, extra_ARB_compute_shader_es31" ], + [ "MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS", "CONTEXT_INT(Const.Program[MESA_SHADER_COMPUTE].MaxCombinedUniformComponents), extra_ARB_compute_shader_es31" ], # GL_ARB_framebuffer_no_attachments / GLES 3.1 ["MAX_FRAMEBUFFER_WIDTH", "CONTEXT_INT(Const.MaxFramebufferWidth), extra_ARB_framebuffer_no_attachments"], diff --git a/src/mesa/main/lines.c b/src/mesa/main/lines.c index c020fb3eb9e..93b80af0dc4 100644 --- a/src/mesa/main/lines.c +++ b/src/mesa/main/lines.c @@ -45,6 +45,10 @@ _mesa_LineWidth( GLfloat width ) if (MESA_VERBOSE & VERBOSE_API) _mesa_debug(ctx, "glLineWidth %f\n", width); + /* If width is unchanged, there can't be an error */ + if (ctx->Line.Width == width) + return; + if (width <= 0.0F) { _mesa_error( ctx, GL_INVALID_VALUE, "glLineWidth" ); return; @@ -68,9 +72,6 @@ _mesa_LineWidth( GLfloat width ) return; } - if (ctx->Line.Width == width) - return; - FLUSH_VERTICES(ctx, _NEW_LINE); ctx->Line.Width = width; diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index ab4fa083672..02dd257d79d 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2292,6 +2292,7 @@ struct gl_shader struct exec_list *ir; struct exec_list *packed_varyings; + struct exec_list *fragdata_arrays; struct glsl_symbol_table *symbols; bool uses_builtin_functions; @@ -2389,6 +2390,9 @@ struct gl_shader */ GLuint NumImages; + struct gl_active_atomic_buffer **AtomicBuffers; + unsigned NumAtomicBuffers; + /** * Whether early fragment tests are enabled as defined by * ARB_shader_image_load_store. @@ -3680,6 +3684,7 @@ struct gl_extensions GLboolean ARB_seamless_cube_map; GLboolean ARB_shader_atomic_counters; GLboolean ARB_shader_bit_encoding; + GLboolean ARB_shader_clock; GLboolean ARB_shader_image_load_store; GLboolean ARB_shader_image_size; GLboolean ARB_shader_precision; @@ -4501,7 +4506,7 @@ static inline bool _mesa_active_fragment_shader_has_atomic_ops(const struct gl_context *ctx) { return ctx->Shader._CurrentFragmentProgram != NULL && - ctx->Shader._CurrentFragmentProgram->NumAtomicBuffers > 0; + ctx->Shader._CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->NumAtomicBuffers > 0; } #ifdef __cplusplus diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c index 51ee10ff858..699a2ae47eb 100644 --- a/src/mesa/main/pipelineobj.c +++ b/src/mesa/main/pipelineobj.c @@ -230,6 +230,10 @@ _mesa_UseProgramStages(GLuint pipeline, GLbitfield stages, GLuint program) struct gl_shader_program *shProg = NULL; GLbitfield any_valid_stages; + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glUseProgramStages(%u, 0x%x, %u)\n", + pipeline, stages, program); + if (!pipe) { _mesa_error(ctx, GL_INVALID_OPERATION, "glUseProgramStages(pipeline)"); return; @@ -251,6 +255,8 @@ _mesa_UseProgramStages(GLuint pipeline, GLbitfield stages, GLuint program) if (_mesa_has_tessellation(ctx)) any_valid_stages |= GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT; + if (_mesa_has_compute_shaders(ctx)) + any_valid_stages |= GL_COMPUTE_SHADER_BIT; if (stages != GL_ALL_SHADER_BITS && (stages & ~any_valid_stages) != 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glUseProgramStages(Stages)"); @@ -332,6 +338,9 @@ _mesa_UseProgramStages(GLuint pipeline, GLbitfield stages, GLuint program) if ((stages & GL_TESS_EVALUATION_SHADER_BIT) != 0) _mesa_use_shader_program(ctx, GL_TESS_EVALUATION_SHADER, shProg, pipe); + + if ((stages & GL_COMPUTE_SHADER_BIT) != 0) + _mesa_use_shader_program(ctx, GL_COMPUTE_SHADER, shProg, pipe); } /** @@ -345,6 +354,9 @@ _mesa_ActiveShaderProgram(GLuint pipeline, GLuint program) struct gl_shader_program *shProg = NULL; struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline); + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glActiveShaderProgram(%u, %u)\n", pipeline, program); + if (program != 0) { shProg = _mesa_lookup_shader_program_err(ctx, program, "glActiveShaderProgram(program)"); @@ -380,6 +392,9 @@ _mesa_BindProgramPipeline(GLuint pipeline) GET_CURRENT_CONTEXT(ctx); struct gl_pipeline_object *newObj = NULL; + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glBindProgramPipeline(%u)\n", pipeline); + /* Rebinding the same pipeline object: no change. */ if (ctx->_Shader->Name == pipeline) @@ -467,6 +482,9 @@ _mesa_DeleteProgramPipelines(GLsizei n, const GLuint *pipelines) GET_CURRENT_CONTEXT(ctx); GLsizei i; + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glDeleteProgramPipelines(%d, %p)\n", n, pipelines); + if (n < 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glDeleteProgramPipelines(n<0)"); return; @@ -551,6 +569,9 @@ _mesa_GenProgramPipelines(GLsizei n, GLuint *pipelines) { GET_CURRENT_CONTEXT(ctx); + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glGenProgramPipelines(%d, %p)\n", n, pipelines); + create_program_pipelines(ctx, n, pipelines, false); } @@ -559,6 +580,9 @@ _mesa_CreateProgramPipelines(GLsizei n, GLuint *pipelines) { GET_CURRENT_CONTEXT(ctx); + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glCreateProgramPipelines(%d, %p)\n", n, pipelines); + create_program_pipelines(ctx, n, pipelines, true); } @@ -574,6 +598,9 @@ _mesa_IsProgramPipeline(GLuint pipeline) { GET_CURRENT_CONTEXT(ctx); + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glIsProgramPipeline(%u)\n", pipeline); + struct gl_pipeline_object *obj = _mesa_lookup_pipeline_object(ctx, pipeline); if (obj == NULL) return GL_FALSE; @@ -590,6 +617,10 @@ _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint *params) GET_CURRENT_CONTEXT(ctx); struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline); + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glGetProgramPipelineiv(%u, %d, %p)\n", + pipeline, pname, params); + /* Are geometry shaders available in this context? */ const bool has_gs = _mesa_has_geometry_shaders(ctx); @@ -643,6 +674,12 @@ _mesa_GetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint *params) *params = pipe->CurrentProgram[MESA_SHADER_FRAGMENT] ? pipe->CurrentProgram[MESA_SHADER_FRAGMENT]->Name : 0; return; + case GL_COMPUTE_SHADER: + if (!_mesa_has_compute_shaders(ctx)) + break; + *params = pipe->CurrentProgram[MESA_SHADER_COMPUTE] + ? pipe->CurrentProgram[MESA_SHADER_COMPUTE]->Name : 0; + return; default: break; } @@ -857,6 +894,9 @@ _mesa_ValidateProgramPipeline(GLuint pipeline) { GET_CURRENT_CONTEXT(ctx); + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glValidateProgramPipeline(%u)\n", pipeline); + struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline); if (!pipe) { @@ -875,6 +915,10 @@ _mesa_GetProgramPipelineInfoLog(GLuint pipeline, GLsizei bufSize, { GET_CURRENT_CONTEXT(ctx); + if (MESA_VERBOSE & VERBOSE_API) + _mesa_debug(ctx, "glGetProgramPipelineInfoLog(%u, %d, %p, %p)\n", + pipeline, bufSize, length, infoLog); + struct gl_pipeline_object *pipe = _mesa_lookup_pipeline_object(ctx, pipeline); if (!pipe) { diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c index eb71fdde703..b7e25fe3840 100644 --- a/src/mesa/main/program_resource.c +++ b/src/mesa/main/program_resource.c @@ -119,7 +119,6 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface, case GL_MAX_NUM_ACTIVE_VARIABLES: switch (programInterface) { case GL_UNIFORM_BLOCK: - case GL_SHADER_STORAGE_BLOCK: for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) { if (shProg->ProgramResourceList[i].Type == programInterface) { struct gl_uniform_block *block = @@ -129,6 +128,26 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface, } } break; + case GL_SHADER_STORAGE_BLOCK: + for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) { + if (shProg->ProgramResourceList[i].Type == programInterface) { + struct gl_uniform_block *block = + (struct gl_uniform_block *) + shProg->ProgramResourceList[i].Data; + GLint block_params = 0; + for (unsigned j = 0; j < block->NumUniforms; j++) { + const char *iname = block->Uniforms[j].IndexName; + struct gl_program_resource *uni = + _mesa_program_resource_find_name(shProg, GL_BUFFER_VARIABLE, + iname, NULL); + if (!uni) + continue; + block_params++; + } + *params = MAX2(*params, block_params); + } + } + break; case GL_ATOMIC_COUNTER_BUFFER: for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) { if (shProg->ProgramResourceList[i].Type == programInterface) { diff --git a/src/mesa/main/rastpos.c b/src/mesa/main/rastpos.c index 54b2125a80f..b468219e688 100644 --- a/src/mesa/main/rastpos.c +++ b/src/mesa/main/rastpos.c @@ -36,6 +36,447 @@ #include "rastpos.h" #include "state.h" #include "main/dispatch.h" +#include "main/viewport.h" +#include "util/simple_list.h" + + + +/** + * Clip a point against the view volume. + * + * \param v vertex vector describing the point to clip. + * + * \return zero if outside view volume, or one if inside. + */ +static GLuint +viewclip_point_xy( const GLfloat v[] ) +{ + if ( v[0] > v[3] || v[0] < -v[3] + || v[1] > v[3] || v[1] < -v[3] ) { + return 0; + } + else { + return 1; + } +} + + +/** + * Clip a point against the far/near Z clipping planes. + * + * \param v vertex vector describing the point to clip. + * + * \return zero if outside view volume, or one if inside. + */ +static GLuint +viewclip_point_z( const GLfloat v[] ) +{ + if (v[2] > v[3] || v[2] < -v[3] ) { + return 0; + } + else { + return 1; + } +} + + +/** + * Clip a point against the user clipping planes. + * + * \param ctx GL context. + * \param v vertex vector describing the point to clip. + * + * \return zero if the point was clipped, or one otherwise. + */ +static GLuint +userclip_point( struct gl_context *ctx, const GLfloat v[] ) +{ + GLuint p; + + for (p = 0; p < ctx->Const.MaxClipPlanes; p++) { + if (ctx->Transform.ClipPlanesEnabled & (1 << p)) { + GLfloat dot = v[0] * ctx->Transform._ClipUserPlane[p][0] + + v[1] * ctx->Transform._ClipUserPlane[p][1] + + v[2] * ctx->Transform._ClipUserPlane[p][2] + + v[3] * ctx->Transform._ClipUserPlane[p][3]; + if (dot < 0.0F) { + return 0; + } + } + } + + return 1; +} + + +/** + * Compute lighting for the raster position. RGB modes computed. + * \param ctx the context + * \param vertex vertex location + * \param normal normal vector + * \param Rcolor returned color + * \param Rspec returned specular color (if separate specular enabled) + */ +static void +shade_rastpos(struct gl_context *ctx, + const GLfloat vertex[4], + const GLfloat normal[3], + GLfloat Rcolor[4], + GLfloat Rspec[4]) +{ + /*const*/ GLfloat (*base)[3] = ctx->Light._BaseColor; + const struct gl_light *light; + GLfloat diffuseColor[4], specularColor[4]; /* for RGB mode only */ + + COPY_3V(diffuseColor, base[0]); + diffuseColor[3] = CLAMP( + ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_DIFFUSE][3], 0.0F, 1.0F ); + ASSIGN_4V(specularColor, 0.0, 0.0, 0.0, 1.0); + + foreach (light, &ctx->Light.EnabledList) { + GLfloat attenuation = 1.0; + GLfloat VP[3]; /* vector from vertex to light pos */ + GLfloat n_dot_VP; + GLfloat diffuseContrib[3], specularContrib[3]; + + if (!(light->_Flags & LIGHT_POSITIONAL)) { + /* light at infinity */ + COPY_3V(VP, light->_VP_inf_norm); + attenuation = light->_VP_inf_spot_attenuation; + } + else { + /* local/positional light */ + GLfloat d; + + /* VP = vector from vertex pos to light[i].pos */ + SUB_3V(VP, light->_Position, vertex); + /* d = length(VP) */ + d = (GLfloat) LEN_3FV( VP ); + if (d > 1.0e-6F) { + /* normalize VP */ + GLfloat invd = 1.0F / d; + SELF_SCALE_SCALAR_3V(VP, invd); + } + + /* atti */ + attenuation = 1.0F / (light->ConstantAttenuation + d * + (light->LinearAttenuation + d * + light->QuadraticAttenuation)); + + if (light->_Flags & LIGHT_SPOT) { + GLfloat PV_dot_dir = - DOT3(VP, light->_NormSpotDirection); + + if (PV_dot_dir<light->_CosCutoff) { + continue; + } + else { + GLfloat spot = powf(PV_dot_dir, light->SpotExponent); + attenuation *= spot; + } + } + } + + if (attenuation < 1e-3F) + continue; + + n_dot_VP = DOT3( normal, VP ); + + if (n_dot_VP < 0.0F) { + ACC_SCALE_SCALAR_3V(diffuseColor, attenuation, light->_MatAmbient[0]); + continue; + } + + /* Ambient + diffuse */ + COPY_3V(diffuseContrib, light->_MatAmbient[0]); + ACC_SCALE_SCALAR_3V(diffuseContrib, n_dot_VP, light->_MatDiffuse[0]); + + /* Specular */ + { + const GLfloat *h; + GLfloat n_dot_h; + + ASSIGN_3V(specularContrib, 0.0, 0.0, 0.0); + + if (ctx->Light.Model.LocalViewer) { + GLfloat v[3]; + COPY_3V(v, vertex); + NORMALIZE_3FV(v); + SUB_3V(VP, VP, v); + NORMALIZE_3FV(VP); + h = VP; + } + else if (light->_Flags & LIGHT_POSITIONAL) { + ACC_3V(VP, ctx->_EyeZDir); + NORMALIZE_3FV(VP); + h = VP; + } + else { + h = light->_h_inf_norm; + } + + n_dot_h = DOT3(normal, h); + + if (n_dot_h > 0.0F) { + GLfloat shine; + GLfloat spec_coef; + + shine = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS][0]; + spec_coef = powf(n_dot_h, shine); + + if (spec_coef > 1.0e-10F) { + if (ctx->Light.Model.ColorControl==GL_SEPARATE_SPECULAR_COLOR) { + ACC_SCALE_SCALAR_3V( specularContrib, spec_coef, + light->_MatSpecular[0]); + } + else { + ACC_SCALE_SCALAR_3V( diffuseContrib, spec_coef, + light->_MatSpecular[0]); + } + } + } + } + + ACC_SCALE_SCALAR_3V( diffuseColor, attenuation, diffuseContrib ); + ACC_SCALE_SCALAR_3V( specularColor, attenuation, specularContrib ); + } + + Rcolor[0] = CLAMP(diffuseColor[0], 0.0F, 1.0F); + Rcolor[1] = CLAMP(diffuseColor[1], 0.0F, 1.0F); + Rcolor[2] = CLAMP(diffuseColor[2], 0.0F, 1.0F); + Rcolor[3] = CLAMP(diffuseColor[3], 0.0F, 1.0F); + Rspec[0] = CLAMP(specularColor[0], 0.0F, 1.0F); + Rspec[1] = CLAMP(specularColor[1], 0.0F, 1.0F); + Rspec[2] = CLAMP(specularColor[2], 0.0F, 1.0F); + Rspec[3] = CLAMP(specularColor[3], 0.0F, 1.0F); +} + + +/** + * Do texgen needed for glRasterPos. + * \param ctx rendering context + * \param vObj object-space vertex coordinate + * \param vEye eye-space vertex coordinate + * \param normal vertex normal + * \param unit texture unit number + * \param texcoord incoming texcoord and resulting texcoord + */ +static void +compute_texgen(struct gl_context *ctx, const GLfloat vObj[4], const GLfloat vEye[4], + const GLfloat normal[3], GLuint unit, GLfloat texcoord[4]) +{ + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + + /* always compute sphere map terms, just in case */ + GLfloat u[3], two_nu, rx, ry, rz, m, mInv; + COPY_3V(u, vEye); + NORMALIZE_3FV(u); + two_nu = 2.0F * DOT3(normal, u); + rx = u[0] - normal[0] * two_nu; + ry = u[1] - normal[1] * two_nu; + rz = u[2] - normal[2] * two_nu; + m = rx * rx + ry * ry + (rz + 1.0F) * (rz + 1.0F); + if (m > 0.0F) + mInv = 0.5F * (1.0f / sqrtf(m)); + else + mInv = 0.0F; + + if (texUnit->TexGenEnabled & S_BIT) { + switch (texUnit->GenS.Mode) { + case GL_OBJECT_LINEAR: + texcoord[0] = DOT4(vObj, texUnit->GenS.ObjectPlane); + break; + case GL_EYE_LINEAR: + texcoord[0] = DOT4(vEye, texUnit->GenS.EyePlane); + break; + case GL_SPHERE_MAP: + texcoord[0] = rx * mInv + 0.5F; + break; + case GL_REFLECTION_MAP: + texcoord[0] = rx; + break; + case GL_NORMAL_MAP: + texcoord[0] = normal[0]; + break; + default: + _mesa_problem(ctx, "Bad S texgen in compute_texgen()"); + return; + } + } + + if (texUnit->TexGenEnabled & T_BIT) { + switch (texUnit->GenT.Mode) { + case GL_OBJECT_LINEAR: + texcoord[1] = DOT4(vObj, texUnit->GenT.ObjectPlane); + break; + case GL_EYE_LINEAR: + texcoord[1] = DOT4(vEye, texUnit->GenT.EyePlane); + break; + case GL_SPHERE_MAP: + texcoord[1] = ry * mInv + 0.5F; + break; + case GL_REFLECTION_MAP: + texcoord[1] = ry; + break; + case GL_NORMAL_MAP: + texcoord[1] = normal[1]; + break; + default: + _mesa_problem(ctx, "Bad T texgen in compute_texgen()"); + return; + } + } + + if (texUnit->TexGenEnabled & R_BIT) { + switch (texUnit->GenR.Mode) { + case GL_OBJECT_LINEAR: + texcoord[2] = DOT4(vObj, texUnit->GenR.ObjectPlane); + break; + case GL_EYE_LINEAR: + texcoord[2] = DOT4(vEye, texUnit->GenR.EyePlane); + break; + case GL_REFLECTION_MAP: + texcoord[2] = rz; + break; + case GL_NORMAL_MAP: + texcoord[2] = normal[2]; + break; + default: + _mesa_problem(ctx, "Bad R texgen in compute_texgen()"); + return; + } + } + + if (texUnit->TexGenEnabled & Q_BIT) { + switch (texUnit->GenQ.Mode) { + case GL_OBJECT_LINEAR: + texcoord[3] = DOT4(vObj, texUnit->GenQ.ObjectPlane); + break; + case GL_EYE_LINEAR: + texcoord[3] = DOT4(vEye, texUnit->GenQ.EyePlane); + break; + default: + _mesa_problem(ctx, "Bad Q texgen in compute_texgen()"); + return; + } + } +} + + +/** + * glRasterPos transformation. Typically called via ctx->Driver.RasterPos(). + * + * \param vObj vertex position in object space + */ +void +_mesa_RasterPos(struct gl_context *ctx, const GLfloat vObj[4]) +{ + if (ctx->VertexProgram._Enabled) { + /* XXX implement this */ + _mesa_problem(ctx, "Vertex programs not implemented for glRasterPos"); + return; + } + else { + GLfloat eye[4], clip[4], ndc[3], d; + GLfloat *norm, eyenorm[3]; + GLfloat *objnorm = ctx->Current.Attrib[VERT_ATTRIB_NORMAL]; + float scale[3], translate[3]; + + /* apply modelview matrix: eye = MV * obj */ + TRANSFORM_POINT( eye, ctx->ModelviewMatrixStack.Top->m, vObj ); + /* apply projection matrix: clip = Proj * eye */ + TRANSFORM_POINT( clip, ctx->ProjectionMatrixStack.Top->m, eye ); + + /* clip to view volume. */ + if (!ctx->Transform.DepthClamp) { + if (viewclip_point_z(clip) == 0) { + ctx->Current.RasterPosValid = GL_FALSE; + return; + } + } + if (!ctx->Transform.RasterPositionUnclipped) { + if (viewclip_point_xy(clip) == 0) { + ctx->Current.RasterPosValid = GL_FALSE; + return; + } + } + + /* clip to user clipping planes */ + if (ctx->Transform.ClipPlanesEnabled && !userclip_point(ctx, clip)) { + ctx->Current.RasterPosValid = GL_FALSE; + return; + } + + /* ndc = clip / W */ + d = (clip[3] == 0.0F) ? 1.0F : 1.0F / clip[3]; + ndc[0] = clip[0] * d; + ndc[1] = clip[1] * d; + ndc[2] = clip[2] * d; + /* wincoord = viewport_mapping(ndc) */ + _mesa_get_viewport_xform(ctx, 0, scale, translate); + ctx->Current.RasterPos[0] = ndc[0] * scale[0] + translate[0]; + ctx->Current.RasterPos[1] = ndc[1] * scale[1] + translate[1]; + ctx->Current.RasterPos[2] = ndc[2] * scale[2] + translate[2]; + ctx->Current.RasterPos[3] = clip[3]; + + if (ctx->Transform.DepthClamp) { + ctx->Current.RasterPos[3] = CLAMP(ctx->Current.RasterPos[3], + ctx->ViewportArray[0].Near, + ctx->ViewportArray[0].Far); + } + + /* compute raster distance */ + if (ctx->Fog.FogCoordinateSource == GL_FOG_COORDINATE_EXT) + ctx->Current.RasterDistance = ctx->Current.Attrib[VERT_ATTRIB_FOG][0]; + else + ctx->Current.RasterDistance = + sqrtf( eye[0]*eye[0] + eye[1]*eye[1] + eye[2]*eye[2] ); + + /* compute transformed normal vector (for lighting or texgen) */ + if (ctx->_NeedEyeCoords) { + const GLfloat *inv = ctx->ModelviewMatrixStack.Top->inv; + TRANSFORM_NORMAL( eyenorm, objnorm, inv ); + norm = eyenorm; + } + else { + norm = objnorm; + } + + /* update raster color */ + if (ctx->Light.Enabled) { + /* lighting */ + shade_rastpos( ctx, vObj, norm, + ctx->Current.RasterColor, + ctx->Current.RasterSecondaryColor ); + } + else { + /* use current color */ + COPY_4FV(ctx->Current.RasterColor, + ctx->Current.Attrib[VERT_ATTRIB_COLOR0]); + COPY_4FV(ctx->Current.RasterSecondaryColor, + ctx->Current.Attrib[VERT_ATTRIB_COLOR1]); + } + + /* texture coords */ + { + GLuint u; + for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { + GLfloat tc[4]; + COPY_4V(tc, ctx->Current.Attrib[VERT_ATTRIB_TEX0 + u]); + if (ctx->Texture.Unit[u].TexGenEnabled) { + compute_texgen(ctx, vObj, eye, norm, u, tc); + } + TRANSFORM_POINT(ctx->Current.RasterTexCoords[u], + ctx->TextureMatrixStack[u].Top->m, tc); + } + } + + ctx->Current.RasterPosValid = GL_TRUE; + } + + if (ctx->RenderMode == GL_SELECT) { + _mesa_update_hitflag( ctx, ctx->Current.RasterPos[2] ); + } +} /** diff --git a/src/mesa/main/rastpos.h b/src/mesa/main/rastpos.h index dc28c68d41b..90b8f957b9f 100644 --- a/src/mesa/main/rastpos.h +++ b/src/mesa/main/rastpos.h @@ -41,6 +41,9 @@ struct gl_context; extern void _mesa_init_rastpos(struct gl_context *ctx); +void +_mesa_RasterPos(struct gl_context *ctx, const GLfloat vObj[4]); + void GLAPIENTRY _mesa_RasterPos2d(GLdouble x, GLdouble y); void GLAPIENTRY diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 8182d3dcc04..dd51bba3386 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -543,13 +543,55 @@ _mesa_program_resource_find_name(struct gl_shader_program *shProg, /* Resource basename. */ const char *rname = _mesa_program_resource_name(res); unsigned baselen = strlen(rname); + unsigned baselen_without_array_index = baselen; + const char *rname_last_square_bracket = strrchr(rname, '['); + bool found = false; + bool rname_has_array_index_zero = false; + /* From ARB_program_interface_query spec: + * + * "uint GetProgramResourceIndex(uint program, enum programInterface, + * const char *name); + * [...] + * If <name> exactly matches the name string of one of the active + * resources for <programInterface>, the index of the matched resource is + * returned. Additionally, if <name> would exactly match the name string + * of an active resource if "[0]" were appended to <name>, the index of + * the matched resource is returned. [...]" + * + * "A string provided to GetProgramResourceLocation or + * GetProgramResourceLocationIndex is considered to match an active variable + * if: + * + * * the string exactly matches the name of the active variable; + * + * * if the string identifies the base name of an active array, where the + * string would exactly match the name of the variable if the suffix + * "[0]" were appended to the string; [...]" + */ + /* Remove array's index from interface block name comparison only if + * array's index is zero and the resulting string length is the same + * than the provided name's length. + */ + if (rname_last_square_bracket) { + baselen_without_array_index -= strlen(rname_last_square_bracket); + rname_has_array_index_zero = + (strncmp(rname_last_square_bracket, "[0]\0", 4) == 0) && + (baselen_without_array_index == strlen(name)); + } + + if (strncmp(rname, name, baselen) == 0) + found = true; + else if (rname_has_array_index_zero && + strncmp(rname, name, baselen_without_array_index) == 0) + found = true; - if (strncmp(rname, name, baselen) == 0) { + if (found) { switch (programInterface) { case GL_UNIFORM_BLOCK: case GL_SHADER_STORAGE_BLOCK: /* Basename match, check if array or struct. */ - if (name[baselen] == '\0' || + if (rname_has_array_index_zero || + name[baselen] == '\0' || name[baselen] == '[' || name[baselen] == '.') { return res; @@ -627,6 +669,20 @@ _mesa_program_resource_index(struct gl_shader_program *shProg, } } +/** + * Find a program resource that points to given data. + */ +static struct gl_program_resource* +program_resource_find_data(struct gl_shader_program *shProg, void *data) +{ + struct gl_program_resource *res = shProg->ProgramResourceList; + for (unsigned i = 0; i < shProg->NumProgramResourceList; i++, res++) { + if (res->Data == data) + return res; + } + return NULL; +} + /* Find a program resource with specific index in given interface. */ struct gl_program_resource * @@ -808,6 +864,14 @@ program_resource_location(struct gl_shader_program *shProg, if (RESOURCE_UNI(res)->builtin) return -1; + /* From page 79 of the OpenGL 4.2 spec: + * + * "A valid name cannot be a structure, an array of structures, or any + * portion of a single vector or a matrix." + */ + if (RESOURCE_UNI(res)->type->without_array()->is_record()) + return -1; + /* From the GL_ARB_uniform_buffer_object spec: * * "The value -1 will be returned if <name> does not correspond to an @@ -1016,8 +1080,18 @@ get_buffer_property(struct gl_shader_program *shProg, *val = RESOURCE_ATC(res)->NumUniforms; return 1; case GL_ACTIVE_VARIABLES: - for (unsigned i = 0; i < RESOURCE_ATC(res)->NumUniforms; i++) - *val++ = RESOURCE_ATC(res)->Uniforms[i]; + for (unsigned i = 0; i < RESOURCE_ATC(res)->NumUniforms; i++) { + /* Active atomic buffer contains index to UniformStorage. Find + * out gl_program_resource via data pointer and then calculate + * index of that uniform. + */ + unsigned idx = RESOURCE_ATC(res)->Uniforms[i]; + struct gl_program_resource *uni = + program_resource_find_data(shProg, + &shProg->UniformStorage[idx]); + assert(uni); + *val++ = _mesa_program_resource_index(shProg, uni); + } return RESOURCE_ATC(res)->NumUniforms; } } diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index 765602e50db..ac40891f435 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -630,9 +630,16 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, case GL_ACTIVE_ATTRIBUTE_MAX_LENGTH: *params = _mesa_longest_attribute_name_length(shProg); return; - case GL_ACTIVE_UNIFORMS: - *params = shProg->NumUniformStorage - shProg->NumHiddenUniforms; + case GL_ACTIVE_UNIFORMS: { + unsigned i; + const unsigned num_uniforms = + shProg->NumUniformStorage - shProg->NumHiddenUniforms; + for (*params = 0, i = 0; i < num_uniforms; i++) { + if (!shProg->UniformStorage[i].is_shader_storage) + (*params)++; + } return; + } case GL_ACTIVE_UNIFORM_MAX_LENGTH: { unsigned i; GLint max_len = 0; @@ -640,6 +647,9 @@ get_programiv(struct gl_context *ctx, GLuint program, GLenum pname, shProg->NumUniformStorage - shProg->NumHiddenUniforms; for (i = 0; i < num_uniforms; i++) { + if (shProg->UniformStorage[i].is_shader_storage) + continue; + /* Add one for the terminating NUL character for a non-array, and * 4 for the "[0]" and the NUL for an array. */ diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c index 84973d3fe5d..a8ac19e40d7 100644 --- a/src/mesa/main/texcompress.c +++ b/src/mesa/main/texcompress.c @@ -243,28 +243,6 @@ _mesa_gl_compressed_format_base_format(GLenum format) * what GL_NUM_COMPRESSED_TEXTURE_FORMATS and * GL_COMPRESSED_TEXTURE_FORMATS return." * - * The KHR_texture_compression_astc_hdr spec says: - * - * "Interactions with OpenGL 4.2 - * - * OpenGL 4.2 supports the feature that compressed textures can be - * compressed online, by passing the compressed texture format enum as - * the internal format when uploading a texture using TexImage1D, - * TexImage2D or TexImage3D (see Section 3.9.3, Texture Image - * Specification, subsection Encoding of Special Internal Formats). - * - * Due to the complexity of the ASTC compression algorithm, it is not - * usually suitable for online use, and therefore ASTC support will be - * limited to pre-compressed textures only. Where on-device compression - * is required, a domain-specific limited compressor will typically - * be used, and this is therefore not suitable for implementation in - * the driver. - * - * In particular, the ASTC format specifiers will not be added to - * Table 3.14, and thus will not be accepted by the TexImage*D - * functions, and will not be returned by the (already deprecated) - * COMPRESSED_TEXTURE_FORMATS query." - * * There is no formal spec for GL_ATI_texture_compression_3dc. Since the * formats added by this extension are luminance-alpha formats, it is * reasonable to expect them to follow the same rules as @@ -286,7 +264,8 @@ GLuint _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats) { GLuint n = 0; - if (ctx->Extensions.TDFX_texture_compression_FXT1) { + if (_mesa_is_desktop_gl(ctx) && + ctx->Extensions.TDFX_texture_compression_FXT1) { if (formats) { formats[n++] = GL_COMPRESSED_RGB_FXT1_3DFX; formats[n++] = GL_COMPRESSED_RGBA_FXT1_3DFX; @@ -396,6 +375,69 @@ _mesa_get_compressed_formats(struct gl_context *ctx, GLint *formats) n += 10; } } + + /* The KHR_texture_compression_astc_hdr spec says: + * + * "Interactions with OpenGL 4.2 + * + * OpenGL 4.2 supports the feature that compressed textures can be + * compressed online, by passing the compressed texture format enum as + * the internal format when uploading a texture using TexImage1D, + * TexImage2D or TexImage3D (see Section 3.9.3, Texture Image + * Specification, subsection Encoding of Special Internal Formats). + * + * Due to the complexity of the ASTC compression algorithm, it is not + * usually suitable for online use, and therefore ASTC support will be + * limited to pre-compressed textures only. Where on-device compression + * is required, a domain-specific limited compressor will typically + * be used, and this is therefore not suitable for implementation in + * the driver. + * + * In particular, the ASTC format specifiers will not be added to + * Table 3.14, and thus will not be accepted by the TexImage*D + * functions, and will not be returned by the (already deprecated) + * COMPRESSED_TEXTURE_FORMATS query." + * + * The ES and the desktop specs diverge here. In OpenGL ES, the COMPRESSED_TEXTURE_FORMATS + * query returns the set of supported specific compressed formats. + */ + if (ctx->API == API_OPENGLES2 && + ctx->Extensions.KHR_texture_compression_astc_ldr) { + if (formats) { + formats[n++] = GL_COMPRESSED_RGBA_ASTC_4x4_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_5x4_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_5x5_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_6x5_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_6x6_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_8x5_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_8x6_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_8x8_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_10x5_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_10x6_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_10x8_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_10x10_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_12x10_KHR; + formats[n++] = GL_COMPRESSED_RGBA_ASTC_12x12_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR; + formats[n++] = GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR; + } + else { + n += 28; + } + } + return n; } diff --git a/src/mesa/main/texgetimage.c b/src/mesa/main/texgetimage.c index 682b72755c7..945890aeeb5 100644 --- a/src/mesa/main/texgetimage.c +++ b/src/mesa/main/texgetimage.c @@ -297,8 +297,7 @@ get_tex_rgba_compressed(struct gl_context *ctx, GLuint dimensions, uint8_t rebaseSwizzle[4]; /* Decompress into temp float buffer, then pack into user buffer */ - tempImage = malloc(width * height * depth - * 4 * sizeof(GLfloat)); + tempImage = malloc(width * height * depth * 4 * sizeof(GLfloat)); if (!tempImage) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glGetTexImage()"); return; diff --git a/src/mesa/main/vdpau.c b/src/mesa/main/vdpau.c index 0efa56e4f41..44be3a37443 100644 --- a/src/mesa/main/vdpau.c +++ b/src/mesa/main/vdpau.c @@ -163,9 +163,10 @@ register_surface(struct gl_context *ctx, GLboolean isOutput, return (GLintptr)NULL; } - if (tex->Target == 0) + if (tex->Target == 0) { tex->Target = target; - else if (tex->Target != target) { + tex->TargetIndex = _mesa_tex_target_to_index(ctx, target); + } else if (tex->Target != target) { _mesa_unlock_texture(ctx, tex); free(surf); _mesa_error(ctx, GL_INVALID_OPERATION, diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index acaa85d9356..20f8b3df99d 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -73,7 +73,8 @@ void st_upload_constants( struct st_context *st, * the parameters list are explicitly set by the user with glUniform, * glProgramParameter(), etc. */ - _mesa_load_state_parameters(st->ctx, params); + if (params->StateFlags) + _mesa_load_state_parameters(st->ctx, params); /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. diff --git a/src/mesa/state_tracker/st_cb_copyimage.c b/src/mesa/state_tracker/st_cb_copyimage.c new file mode 100644 index 00000000000..75114cdb712 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_copyimage.c @@ -0,0 +1,582 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "state_tracker/st_context.h" +#include "state_tracker/st_cb_copyimage.h" +#include "state_tracker/st_cb_fbo.h" +#include "state_tracker/st_texture.h" + +#include "util/u_box.h" +#include "util/u_format.h" +#include "util/u_inlines.h" + + +/** + * Return an equivalent canonical format without "X" channels. + * + * Copying between incompatible formats is easier when the format is + * canonicalized, meaning that it is in a standard form. + * + * The returned format has the same component sizes and swizzles as + * the source format, the type is changed to UINT or UNORM, depending on + * which one has the most swizzle combinations in their group. + * + * If it's not an array format, return a memcpy-equivalent array format. + * + * The key feature is that swizzled versions of formats of the same + * component size always return the same component type. + * + * X returns A. + * Luminance, intensity, alpha, depth, stencil, and 8-bit and 16-bit packed + * formats are not supported. (same as ARB_copy_image) + */ +static enum pipe_format +get_canonical_format(enum pipe_format format) +{ + const struct util_format_description *desc = + util_format_description(format); + + /* Packed formats. Return the equivalent array format. */ + if (format == PIPE_FORMAT_R11G11B10_FLOAT || + format == PIPE_FORMAT_R9G9B9E5_FLOAT) + return get_canonical_format(PIPE_FORMAT_R8G8B8A8_UINT); + + if (desc->nr_channels == 4 && + desc->channel[0].size == 10 && + desc->channel[1].size == 10 && + desc->channel[2].size == 10 && + desc->channel[3].size == 2) { + if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X && + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_Y && + desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_Z) + return get_canonical_format(PIPE_FORMAT_R8G8B8A8_UINT); + + return PIPE_FORMAT_NONE; + } + +#define RETURN_FOR_SWIZZLE1(x, format) \ + if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_##x) \ + return format + +#define RETURN_FOR_SWIZZLE2(x, y, format) \ + if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_##x && \ + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_##y) \ + return format + +#define RETURN_FOR_SWIZZLE3(x, y, z, format) \ + if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_##x && \ + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_##y && \ + desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_##z) \ + return format + +#define RETURN_FOR_SWIZZLE4(x, y, z, w, format) \ + if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_##x && \ + desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_##y && \ + desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_##z && \ + desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_##w) \ + return format + + /* Array formats. */ + if (desc->is_array) { + switch (desc->nr_channels) { + case 1: + switch (desc->channel[0].size) { + case 8: + RETURN_FOR_SWIZZLE1(X, PIPE_FORMAT_R8_UINT); + break; + + case 16: + RETURN_FOR_SWIZZLE1(X, PIPE_FORMAT_R16_UINT); + break; + + case 32: + RETURN_FOR_SWIZZLE1(X, PIPE_FORMAT_R32_UINT); + break; + } + break; + + case 2: + switch (desc->channel[0].size) { + case 8: + /* All formats in each group must be of the same type. + * We can't use UINT for R8G8 while using UNORM for G8R8. + */ + RETURN_FOR_SWIZZLE2(X, Y, PIPE_FORMAT_R8G8_UNORM); + RETURN_FOR_SWIZZLE2(Y, X, PIPE_FORMAT_G8R8_UNORM); + break; + + case 16: + RETURN_FOR_SWIZZLE2(X, Y, PIPE_FORMAT_R16G16_UNORM); + RETURN_FOR_SWIZZLE2(Y, X, PIPE_FORMAT_G16R16_UNORM); + break; + + case 32: + RETURN_FOR_SWIZZLE2(X, Y, PIPE_FORMAT_R32G32_UINT); + break; + } + break; + + case 3: + switch (desc->channel[0].size) { + case 8: + RETURN_FOR_SWIZZLE3(X, Y, Z, PIPE_FORMAT_R8G8B8_UINT); + break; + + case 16: + RETURN_FOR_SWIZZLE3(X, Y, Z, PIPE_FORMAT_R16G16B16_UINT); + break; + + case 32: + RETURN_FOR_SWIZZLE3(X, Y, Z, PIPE_FORMAT_R32G32B32_UINT); + break; + } + break; + + case 4: + switch (desc->channel[0].size) { + case 8: + RETURN_FOR_SWIZZLE4(X, Y, Z, W, PIPE_FORMAT_R8G8B8A8_UNORM); + RETURN_FOR_SWIZZLE4(X, Y, Z, 1, PIPE_FORMAT_R8G8B8A8_UNORM); + RETURN_FOR_SWIZZLE4(Z, Y, X, W, PIPE_FORMAT_B8G8R8A8_UNORM); + RETURN_FOR_SWIZZLE4(Z, Y, X, 1, PIPE_FORMAT_B8G8R8A8_UNORM); + RETURN_FOR_SWIZZLE4(W, Z, Y, X, PIPE_FORMAT_A8B8G8R8_UNORM); + RETURN_FOR_SWIZZLE4(1, Z, Y, X, PIPE_FORMAT_A8B8G8R8_UNORM); + RETURN_FOR_SWIZZLE4(W, X, Y, Z, PIPE_FORMAT_A8R8G8B8_UNORM); + RETURN_FOR_SWIZZLE4(1, X, Y, Z, PIPE_FORMAT_A8R8G8B8_UNORM); + break; + + case 16: + RETURN_FOR_SWIZZLE4(X, Y, Z, W, PIPE_FORMAT_R16G16B16A16_UINT); + RETURN_FOR_SWIZZLE4(X, Y, Z, 1, PIPE_FORMAT_R16G16B16A16_UINT); + break; + + case 32: + RETURN_FOR_SWIZZLE4(X, Y, Z, W, PIPE_FORMAT_R32G32B32A32_UINT); + RETURN_FOR_SWIZZLE4(X, Y, Z, 1, PIPE_FORMAT_R32G32B32A32_UINT); + break; + } + } + + assert(!"unknown array format"); + return PIPE_FORMAT_NONE; + } + + assert(!"unknown packed format"); + return PIPE_FORMAT_NONE; +} + +/** + * Return true if the swizzle is XYZW in case of a 4-channel format, + * XY in case of a 2-channel format, or X in case of a 1-channel format. + */ +static bool +has_identity_swizzle(const struct util_format_description *desc) +{ + int i; + + for (i = 0; i < desc->nr_channels; i++) + if (desc->swizzle[i] != UTIL_FORMAT_SWIZZLE_X + i) + return false; + + return true; +} + +/** + * Return a canonical format for the given bits and channel size. + */ +static enum pipe_format +canonical_format_from_bits(unsigned bits, unsigned channel_size) +{ + switch (bits) { + case 8: + if (channel_size == 8) + return get_canonical_format(PIPE_FORMAT_R8_UINT); + break; + + case 16: + if (channel_size == 8) + return get_canonical_format(PIPE_FORMAT_R8G8_UINT); + if (channel_size == 16) + return get_canonical_format(PIPE_FORMAT_R16_UINT); + break; + + case 32: + if (channel_size == 8) + return get_canonical_format(PIPE_FORMAT_R8G8B8A8_UINT); + if (channel_size == 16) + return get_canonical_format(PIPE_FORMAT_R16G16_UINT); + if (channel_size == 32) + return get_canonical_format(PIPE_FORMAT_R32_UINT); + break; + + case 64: + if (channel_size == 16) + return get_canonical_format(PIPE_FORMAT_R16G16B16A16_UINT); + if (channel_size == 32) + return get_canonical_format(PIPE_FORMAT_R32G32_UINT); + break; + + case 128: + if (channel_size == 32) + return get_canonical_format(PIPE_FORMAT_R32G32B32A32_UINT); + break; + } + + assert(!"impossible format"); + return PIPE_FORMAT_NONE; +} + +static void +blit(struct pipe_context *pipe, + struct pipe_resource *dst, + enum pipe_format dst_format, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + enum pipe_format src_format, + unsigned src_level, + const struct pipe_box *src_box) +{ + struct pipe_blit_info blit = {{0}}; + + blit.src.resource = src; + blit.dst.resource = dst; + blit.src.format = src_format; + blit.dst.format = dst_format; + blit.src.level = src_level; + blit.dst.level = dst_level; + blit.src.box = *src_box; + u_box_3d(dstx, dsty, dstz, src_box->width, src_box->height, + src_box->depth, &blit.dst.box); + blit.mask = PIPE_MASK_RGBA; + blit.filter = PIPE_TEX_FILTER_NEAREST; + + pipe->blit(pipe, &blit); +} + +static void +swizzled_copy(struct pipe_context *pipe, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + const struct util_format_description *src_desc, *dst_desc; + unsigned bits; + enum pipe_format blit_src_format, blit_dst_format; + + /* Get equivalent canonical formats. Those are always array formats and + * copying between compatible canonical formats behaves either like + * memcpy or like swizzled memcpy. The idea is that we won't have to care + * about the channel type from this point on. + * Only the swizzle and channel size. + */ + blit_src_format = get_canonical_format(src->format); + blit_dst_format = get_canonical_format(dst->format); + + assert(blit_src_format != PIPE_FORMAT_NONE); + assert(blit_dst_format != PIPE_FORMAT_NONE); + + src_desc = util_format_description(blit_src_format); + dst_desc = util_format_description(blit_dst_format); + + assert(src_desc->block.bits == dst_desc->block.bits); + bits = src_desc->block.bits; + + if (dst_desc->channel[0].size == src_desc->channel[0].size) { + /* Only the swizzle is different, which means we can just blit, + * e.g. RGBA -> BGRA. + */ + } else if (has_identity_swizzle(src_desc)) { + /* Src is unswizzled and dst can be swizzled, so src is typecast + * to an equivalent dst-compatible format. + * e.g. R32 -> BGRA8 is realized as RGBA8 -> BGRA8 + */ + blit_src_format = + canonical_format_from_bits(bits, dst_desc->channel[0].size); + } else if (has_identity_swizzle(dst_desc)) { + /* Dst is unswizzled and src can be swizzled, so dst is typecast + * to an equivalent src-compatible format. + * e.g. BGRA8 -> R32 is realized as BGRA8 -> RGBA8 + */ + blit_dst_format = + canonical_format_from_bits(bits, src_desc->channel[0].size); + } else { + assert(!"This should have been handled by handle_complex_copy."); + return; + } + + blit(pipe, dst, blit_dst_format, dst_level, dstx, dsty, dstz, + src, blit_src_format, src_level, src_box); +} + +static bool +same_size_and_swizzle(const struct util_format_description *d1, + const struct util_format_description *d2) +{ + int i; + + if (d1->layout != d2->layout || + d1->nr_channels != d2->nr_channels || + d1->is_array != d2->is_array) + return false; + + for (i = 0; i < d1->nr_channels; i++) { + if (d1->channel[i].size != d2->channel[i].size) + return false; + + if (d1->swizzle[i] <= UTIL_FORMAT_SWIZZLE_W && + d2->swizzle[i] <= UTIL_FORMAT_SWIZZLE_W && + d1->swizzle[i] != d2->swizzle[i]) + return false; + } + + return true; +} + +static struct pipe_resource * +create_texture(struct pipe_screen *screen, enum pipe_format format, + unsigned nr_samples, + unsigned width, unsigned height, unsigned depth) +{ + struct pipe_resource templ; + + memset(&templ, 0, sizeof(templ)); + templ.format = format; + templ.width0 = width; + templ.height0 = height; + templ.depth0 = 1; + templ.array_size = depth; + templ.nr_samples = nr_samples; + templ.usage = PIPE_USAGE_DEFAULT; + templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; + + if (depth > 1) + templ.target = PIPE_TEXTURE_2D_ARRAY; + else + templ.target = PIPE_TEXTURE_2D; + + return screen->resource_create(screen, &templ); +} + +/** + * Handle complex format conversions using 2 blits with a temporary texture + * in between, e.g. blitting from B10G10R10A2 to G16R16. + * + * This example is implemented this way: + * 1) First, blit from B10G10R10A2 to R10G10B10A2, which is canonical, so it + * can be reinterpreted as a different canonical format of the same bpp, + * such as R16G16. This blit only swaps R and B 10-bit components. + * 2) Finally, blit the result, which is R10G10B10A2, as R16G16 to G16R16. + * This blit only swaps R and G 16-bit components. + */ +static bool +handle_complex_copy(struct pipe_context *pipe, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box, + enum pipe_format noncanon_format, + enum pipe_format canon_format) +{ + struct pipe_box temp_box; + struct pipe_resource *temp = NULL; + const struct util_format_description *src_desc, *dst_desc; + const struct util_format_description *canon_desc, *noncanon_desc; + bool src_is_canon; + bool src_is_noncanon; + bool dst_is_canon; + bool dst_is_noncanon; + + src_desc = util_format_description(src->format); + dst_desc = util_format_description(dst->format); + canon_desc = util_format_description(canon_format); + noncanon_desc = util_format_description(noncanon_format); + + src_is_canon = same_size_and_swizzle(src_desc, canon_desc); + dst_is_canon = same_size_and_swizzle(dst_desc, canon_desc); + src_is_noncanon = same_size_and_swizzle(src_desc, noncanon_desc); + dst_is_noncanon = same_size_and_swizzle(dst_desc, noncanon_desc); + + if (src_is_noncanon) { + /* Simple case - only types differ (e.g. UNORM and UINT). */ + if (dst_is_noncanon) { + blit(pipe, dst, noncanon_format, dst_level, dstx, dsty, dstz, src, + noncanon_format, src_level, src_box); + return true; + } + + /* Simple case - only types and swizzles differ. */ + if (dst_is_canon) { + blit(pipe, dst, canon_format, dst_level, dstx, dsty, dstz, src, + noncanon_format, src_level, src_box); + return true; + } + + /* Use the temporary texture. Src is converted to a canonical format, + * then proceed the generic swizzled_copy. + */ + temp = create_texture(pipe->screen, canon_format, src->nr_samples, + src_box->width, + src_box->height, src_box->depth); + + u_box_3d(0, 0, 0, src_box->width, src_box->height, src_box->depth, + &temp_box); + + blit(pipe, temp, canon_format, 0, 0, 0, 0, src, noncanon_format, + src_level, src_box); + swizzled_copy(pipe, dst, dst_level, dstx, dsty, dstz, temp, 0, + &temp_box); + pipe_resource_reference(&temp, NULL); + return true; + } + + if (dst_is_noncanon) { + /* Simple case - only types and swizzles differ. */ + if (src_is_canon) { + blit(pipe, dst, noncanon_format, dst_level, dstx, dsty, dstz, src, + canon_format, src_level, src_box); + return true; + } + + /* Use the temporary texture. First, use the generic copy, but use + * a canonical format in the destination. Then convert */ + temp = create_texture(pipe->screen, canon_format, dst->nr_samples, + src_box->width, + src_box->height, src_box->depth); + + u_box_3d(0, 0, 0, src_box->width, src_box->height, src_box->depth, + &temp_box); + + swizzled_copy(pipe, temp, 0, 0, 0, 0, src, src_level, src_box); + blit(pipe, dst, noncanon_format, dst_level, dstx, dsty, dstz, temp, + canon_format, 0, &temp_box); + pipe_resource_reference(&temp, NULL); + return true; + } + + return false; +} + +static void +copy_image(struct pipe_context *pipe, + struct pipe_resource *dst, + unsigned dst_level, + unsigned dstx, unsigned dsty, unsigned dstz, + struct pipe_resource *src, + unsigned src_level, + const struct pipe_box *src_box) +{ + if (src->format == dst->format || + util_format_is_compressed(src->format) || + util_format_is_compressed(dst->format)) { + pipe->resource_copy_region(pipe, dst, dst_level, dstx, dsty, dstz, + src, src_level, src_box); + return; + } + + /* Copying to/from B10G10R10*2 needs 2 blits with R10G10B10A2 + * as a temporary texture in between. + */ + if (handle_complex_copy(pipe, dst, dst_level, dstx, dsty, dstz, src, + src_level, src_box, PIPE_FORMAT_B10G10R10A2_UINT, + PIPE_FORMAT_R10G10B10A2_UINT)) + return; + + /* Copying to/from G8R8 needs 2 blits with R8G8 as a temporary texture + * in between. + */ + if (handle_complex_copy(pipe, dst, dst_level, dstx, dsty, dstz, src, + src_level, src_box, PIPE_FORMAT_G8R8_UNORM, + PIPE_FORMAT_R8G8_UNORM)) + return; + + /* Copying to/from G16R16 needs 2 blits with R16G16 as a temporary texture + * in between. + */ + if (handle_complex_copy(pipe, dst, dst_level, dstx, dsty, dstz, src, + src_level, src_box, PIPE_FORMAT_G16R16_UNORM, + PIPE_FORMAT_R16G16_UNORM)) + return; + + /* Only allow non-identity swizzling on RGBA8 formats. */ + + /* Simple copy, memcpy with swizzling, no format conversion. */ + swizzled_copy(pipe, dst, dst_level, dstx, dsty, dstz, src, src_level, + src_box); +} + +static void +st_CopyImageSubData(struct gl_context *ctx, + struct gl_texture_image *src_image, + struct gl_renderbuffer *src_renderbuffer, + int src_x, int src_y, int src_z, + struct gl_texture_image *dst_image, + struct gl_renderbuffer *dst_renderbuffer, + int dst_x, int dst_y, int dst_z, + int src_width, int src_height) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct pipe_resource *src_res, *dst_res; + struct pipe_box box; + int src_level, dst_level; + + if (src_image) { + struct st_texture_image *src = st_texture_image(src_image); + src_res = src->pt; + src_level = src_image->Level; + src_z += src_image->Face; + } else { + struct st_renderbuffer *src = st_renderbuffer(src_renderbuffer); + src_res = src->texture; + src_level = 0; + } + + if (dst_image) { + struct st_texture_image *dst = st_texture_image(dst_image); + dst_res = dst->pt; + dst_level = dst_image->Level; + dst_z += dst_image->Face; + } else { + struct st_renderbuffer *dst = st_renderbuffer(dst_renderbuffer); + dst_res = dst->texture; + dst_level = 0; + } + + u_box_2d_zslice(src_x, src_y, src_z, src_width, src_height, &box); + + copy_image(pipe, dst_res, dst_level, dst_x, dst_y, dst_z, + src_res, src_level, &box); +} + +void +st_init_copy_image_functions(struct dd_function_table *functions) +{ + functions->CopyImageSubData = st_CopyImageSubData; +} diff --git a/src/mesa/state_tracker/st_cb_copyimage.h b/src/mesa/state_tracker/st_cb_copyimage.h new file mode 100644 index 00000000000..d17f35c0953 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_copyimage.h @@ -0,0 +1,33 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef ST_CB_COPY_IMAGE_H +#define ST_CB_COPY_IMAGE_H + +struct dd_function_table; + +extern void +st_init_copy_image_functions(struct dd_function_table *functions); + +#endif /* ST_CB_COPY_IMAGE_H */ diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c index b9997dacfd2..747b41464ae 100644 --- a/src/mesa/state_tracker/st_cb_rasterpos.c +++ b/src/mesa/state_tracker/st_cb_rasterpos.c @@ -39,6 +39,7 @@ #include "main/imports.h" #include "main/macros.h" #include "main/feedback.h" +#include "main/rastpos.h" #include "st_context.h" #include "st_atom.h" @@ -224,6 +225,15 @@ st_RasterPos(struct gl_context *ctx, const GLfloat v[4]) struct rastpos_stage *rs; const struct gl_client_array **saved_arrays = ctx->Array._DrawArrays; + if (ctx->VertexProgram._Current == NULL || + ctx->VertexProgram._Current == ctx->VertexProgram._TnlProgram) { + /* No vertex shader/program is enabled, used the simple/fast fixed- + * function implementation of RasterPos. + */ + _mesa_RasterPos(ctx, v); + return; + } + if (st->rastpos_stage) { /* get rastpos stage info */ rs = rastpos_stage(st->rastpos_stage); diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 5d25fed317e..d4c916e8057 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -1873,55 +1873,6 @@ st_TextureView(struct gl_context *ctx, return GL_TRUE; } -/* HACK: this is only enough for the most basic uses of CopyImage. Must fix - * before actually exposing the extension. - */ -static void -st_CopyImageSubData(struct gl_context *ctx, - struct gl_texture_image *src_image, - struct gl_renderbuffer *src_renderbuffer, - int src_x, int src_y, int src_z, - struct gl_texture_image *dst_image, - struct gl_renderbuffer *dst_renderbuffer, - int dst_x, int dst_y, int dst_z, - int src_width, int src_height) -{ - struct st_context *st = st_context(ctx); - struct pipe_context *pipe = st->pipe; - struct pipe_resource *src_res, *dst_res; - struct pipe_box box; - int src_level, dst_level; - - if (src_image) { - struct st_texture_image *src = st_texture_image(src_image); - src_res = src->pt; - src_level = src_image->Level; - } - else { - struct st_renderbuffer *src = st_renderbuffer(src_renderbuffer); - src_res = src->texture; - src_level = 0; - } - - if (dst_image) { - struct st_texture_image *dst = st_texture_image(dst_image); - dst_res = dst->pt; - dst_level = dst_image->Level; - } - else { - struct st_renderbuffer *dst = st_renderbuffer(dst_renderbuffer); - dst_res = dst->texture; - dst_level = 0; - } - - u_box_2d_zslice(src_x, src_y, src_z, src_width, src_height, &box); - pipe->resource_copy_region(pipe, dst_res, dst_level, - dst_x, dst_y, dst_z, - src_res, src_level, - &box); -} - - void st_init_texture_functions(struct dd_function_table *functions) { @@ -1953,6 +1904,4 @@ st_init_texture_functions(struct dd_function_table *functions) functions->AllocTextureStorage = st_AllocTextureStorage; functions->TextureView = st_TextureView; - - functions->CopyImageSubData = st_CopyImageSubData; } diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 5abb17385c2..6e20fd1fda2 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -44,6 +44,7 @@ #include "st_cb_bufferobjects.h" #include "st_cb_clear.h" #include "st_cb_condrender.h" +#include "st_cb_copyimage.h" #include "st_cb_drawpixels.h" #include "st_cb_rasterpos.h" #include "st_cb_drawtex.h" @@ -430,6 +431,7 @@ void st_init_driver_functions(struct pipe_screen *screen, st_init_bufferobject_functions(functions); st_init_clear_functions(functions); st_init_bitmap_functions(functions); + st_init_copy_image_functions(functions); st_init_drawpixels_functions(functions); st_init_rasterpos_functions(functions); diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index d4724b46e0a..bd7cbccc20c 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -439,6 +439,7 @@ void st_init_extensions(struct pipe_screen *screen, { o(ARB_base_instance), PIPE_CAP_START_INSTANCE }, { o(ARB_buffer_storage), PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT }, { o(ARB_color_buffer_float), PIPE_CAP_VERTEX_COLOR_UNCLAMPED }, + { o(ARB_copy_image), PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS }, { o(ARB_depth_clamp), PIPE_CAP_DEPTH_CLIP_DISABLE }, { o(ARB_depth_texture), PIPE_CAP_TEXTURE_SHADOW_MAP }, { o(ARB_draw_buffers_blend), PIPE_CAP_INDEP_BLEND_FUNC }, diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 26e1c21f6c5..b3700406df0 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -61,6 +61,8 @@ compute_num_levels(struct gl_context *ctx, numLevels = texObj->BaseLevel + baseImage->MaxNumLevels; numLevels = MIN2(numLevels, (GLuint) texObj->MaxLevel + 1); + if (texObj->Immutable) + numLevels = MIN2(numLevels, texObj->NumLevels); assert(numLevels >= 1); return numLevels; @@ -99,38 +101,40 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target, */ stObj->lastLevel = lastLevel; - if (pt->last_level < lastLevel) { - /* The current gallium texture doesn't have space for all the - * mipmap levels we need to generate. So allocate a new texture. - */ - struct pipe_resource *oldTex = stObj->pt; - - /* create new texture with space for more levels */ - stObj->pt = st_texture_create(st, - oldTex->target, - oldTex->format, - lastLevel, - oldTex->width0, - oldTex->height0, - oldTex->depth0, - oldTex->array_size, - 0, - oldTex->bind); - - /* This will copy the old texture's base image into the new texture - * which we just allocated. - */ - st_finalize_texture(ctx, st->pipe, texObj); - - /* release the old tex (will likely be freed too) */ - pipe_resource_reference(&oldTex, NULL); - st_texture_release_all_sampler_views(st, stObj); - } - else { - /* Make sure that the base texture image data is present in the - * texture buffer. - */ - st_finalize_texture(ctx, st->pipe, texObj); + if (!texObj->Immutable) { + if (pt->last_level < lastLevel) { + /* The current gallium texture doesn't have space for all the + * mipmap levels we need to generate. So allocate a new texture. + */ + struct pipe_resource *oldTex = stObj->pt; + + /* create new texture with space for more levels */ + stObj->pt = st_texture_create(st, + oldTex->target, + oldTex->format, + lastLevel, + oldTex->width0, + oldTex->height0, + oldTex->depth0, + oldTex->array_size, + 0, + oldTex->bind); + + /* This will copy the old texture's base image into the new texture + * which we just allocated. + */ + st_finalize_texture(ctx, st->pipe, texObj); + + /* release the old tex (will likely be freed too) */ + pipe_resource_reference(&oldTex, NULL); + st_texture_release_all_sampler_views(st, stObj); + } + else { + /* Make sure that the base texture image data is present in the + * texture buffer. + */ + st_finalize_texture(ctx, st->pipe, texObj); + } } pt = stObj->pt; diff --git a/src/mesa/tnl/t_rasterpos.c b/src/mesa/tnl/t_rasterpos.c deleted file mode 100644 index 4bd9ac8539e..00000000000 --- a/src/mesa/tnl/t_rasterpos.c +++ /dev/null @@ -1,478 +0,0 @@ -/* - * Mesa 3-D graphics library - * - * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - - -#include "c99_math.h" -#include "main/glheader.h" -#include "main/feedback.h" -#include "main/light.h" -#include "main/macros.h" -#include "util/simple_list.h" -#include "main/mtypes.h" -#include "main/viewport.h" - -#include "math/m_matrix.h" -#include "tnl/tnl.h" - - - -/** - * Clip a point against the view volume. - * - * \param v vertex vector describing the point to clip. - * - * \return zero if outside view volume, or one if inside. - */ -static GLuint -viewclip_point_xy( const GLfloat v[] ) -{ - if ( v[0] > v[3] || v[0] < -v[3] - || v[1] > v[3] || v[1] < -v[3] ) { - return 0; - } - else { - return 1; - } -} - - -/** - * Clip a point against the far/near Z clipping planes. - * - * \param v vertex vector describing the point to clip. - * - * \return zero if outside view volume, or one if inside. - */ -static GLuint -viewclip_point_z( const GLfloat v[] ) -{ - if (v[2] > v[3] || v[2] < -v[3] ) { - return 0; - } - else { - return 1; - } -} - - -/** - * Clip a point against the user clipping planes. - * - * \param ctx GL context. - * \param v vertex vector describing the point to clip. - * - * \return zero if the point was clipped, or one otherwise. - */ -static GLuint -userclip_point( struct gl_context *ctx, const GLfloat v[] ) -{ - GLuint p; - - for (p = 0; p < ctx->Const.MaxClipPlanes; p++) { - if (ctx->Transform.ClipPlanesEnabled & (1 << p)) { - GLfloat dot = v[0] * ctx->Transform._ClipUserPlane[p][0] - + v[1] * ctx->Transform._ClipUserPlane[p][1] - + v[2] * ctx->Transform._ClipUserPlane[p][2] - + v[3] * ctx->Transform._ClipUserPlane[p][3]; - if (dot < 0.0F) { - return 0; - } - } - } - - return 1; -} - - -/** - * Compute lighting for the raster position. RGB modes computed. - * \param ctx the context - * \param vertex vertex location - * \param normal normal vector - * \param Rcolor returned color - * \param Rspec returned specular color (if separate specular enabled) - */ -static void -shade_rastpos(struct gl_context *ctx, - const GLfloat vertex[4], - const GLfloat normal[3], - GLfloat Rcolor[4], - GLfloat Rspec[4]) -{ - /*const*/ GLfloat (*base)[3] = ctx->Light._BaseColor; - const struct gl_light *light; - GLfloat diffuseColor[4], specularColor[4]; /* for RGB mode only */ - - COPY_3V(diffuseColor, base[0]); - diffuseColor[3] = CLAMP( - ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_DIFFUSE][3], 0.0F, 1.0F ); - ASSIGN_4V(specularColor, 0.0, 0.0, 0.0, 1.0); - - foreach (light, &ctx->Light.EnabledList) { - GLfloat attenuation = 1.0; - GLfloat VP[3]; /* vector from vertex to light pos */ - GLfloat n_dot_VP; - GLfloat diffuseContrib[3], specularContrib[3]; - - if (!(light->_Flags & LIGHT_POSITIONAL)) { - /* light at infinity */ - COPY_3V(VP, light->_VP_inf_norm); - attenuation = light->_VP_inf_spot_attenuation; - } - else { - /* local/positional light */ - GLfloat d; - - /* VP = vector from vertex pos to light[i].pos */ - SUB_3V(VP, light->_Position, vertex); - /* d = length(VP) */ - d = (GLfloat) LEN_3FV( VP ); - if (d > 1.0e-6F) { - /* normalize VP */ - GLfloat invd = 1.0F / d; - SELF_SCALE_SCALAR_3V(VP, invd); - } - - /* atti */ - attenuation = 1.0F / (light->ConstantAttenuation + d * - (light->LinearAttenuation + d * - light->QuadraticAttenuation)); - - if (light->_Flags & LIGHT_SPOT) { - GLfloat PV_dot_dir = - DOT3(VP, light->_NormSpotDirection); - - if (PV_dot_dir<light->_CosCutoff) { - continue; - } - else { - GLfloat spot = powf(PV_dot_dir, light->SpotExponent); - attenuation *= spot; - } - } - } - - if (attenuation < 1e-3F) - continue; - - n_dot_VP = DOT3( normal, VP ); - - if (n_dot_VP < 0.0F) { - ACC_SCALE_SCALAR_3V(diffuseColor, attenuation, light->_MatAmbient[0]); - continue; - } - - /* Ambient + diffuse */ - COPY_3V(diffuseContrib, light->_MatAmbient[0]); - ACC_SCALE_SCALAR_3V(diffuseContrib, n_dot_VP, light->_MatDiffuse[0]); - - /* Specular */ - { - const GLfloat *h; - GLfloat n_dot_h; - - ASSIGN_3V(specularContrib, 0.0, 0.0, 0.0); - - if (ctx->Light.Model.LocalViewer) { - GLfloat v[3]; - COPY_3V(v, vertex); - NORMALIZE_3FV(v); - SUB_3V(VP, VP, v); - NORMALIZE_3FV(VP); - h = VP; - } - else if (light->_Flags & LIGHT_POSITIONAL) { - ACC_3V(VP, ctx->_EyeZDir); - NORMALIZE_3FV(VP); - h = VP; - } - else { - h = light->_h_inf_norm; - } - - n_dot_h = DOT3(normal, h); - - if (n_dot_h > 0.0F) { - GLfloat shine; - GLfloat spec_coef; - - shine = ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS][0]; - spec_coef = powf(n_dot_h, shine); - - if (spec_coef > 1.0e-10F) { - if (ctx->Light.Model.ColorControl==GL_SEPARATE_SPECULAR_COLOR) { - ACC_SCALE_SCALAR_3V( specularContrib, spec_coef, - light->_MatSpecular[0]); - } - else { - ACC_SCALE_SCALAR_3V( diffuseContrib, spec_coef, - light->_MatSpecular[0]); - } - } - } - } - - ACC_SCALE_SCALAR_3V( diffuseColor, attenuation, diffuseContrib ); - ACC_SCALE_SCALAR_3V( specularColor, attenuation, specularContrib ); - } - - Rcolor[0] = CLAMP(diffuseColor[0], 0.0F, 1.0F); - Rcolor[1] = CLAMP(diffuseColor[1], 0.0F, 1.0F); - Rcolor[2] = CLAMP(diffuseColor[2], 0.0F, 1.0F); - Rcolor[3] = CLAMP(diffuseColor[3], 0.0F, 1.0F); - Rspec[0] = CLAMP(specularColor[0], 0.0F, 1.0F); - Rspec[1] = CLAMP(specularColor[1], 0.0F, 1.0F); - Rspec[2] = CLAMP(specularColor[2], 0.0F, 1.0F); - Rspec[3] = CLAMP(specularColor[3], 0.0F, 1.0F); -} - - -/** - * Do texgen needed for glRasterPos. - * \param ctx rendering context - * \param vObj object-space vertex coordinate - * \param vEye eye-space vertex coordinate - * \param normal vertex normal - * \param unit texture unit number - * \param texcoord incoming texcoord and resulting texcoord - */ -static void -compute_texgen(struct gl_context *ctx, const GLfloat vObj[4], const GLfloat vEye[4], - const GLfloat normal[3], GLuint unit, GLfloat texcoord[4]) -{ - const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - - /* always compute sphere map terms, just in case */ - GLfloat u[3], two_nu, rx, ry, rz, m, mInv; - COPY_3V(u, vEye); - NORMALIZE_3FV(u); - two_nu = 2.0F * DOT3(normal, u); - rx = u[0] - normal[0] * two_nu; - ry = u[1] - normal[1] * two_nu; - rz = u[2] - normal[2] * two_nu; - m = rx * rx + ry * ry + (rz + 1.0F) * (rz + 1.0F); - if (m > 0.0F) - mInv = 0.5F * (1.0f / sqrtf(m)); - else - mInv = 0.0F; - - if (texUnit->TexGenEnabled & S_BIT) { - switch (texUnit->GenS.Mode) { - case GL_OBJECT_LINEAR: - texcoord[0] = DOT4(vObj, texUnit->GenS.ObjectPlane); - break; - case GL_EYE_LINEAR: - texcoord[0] = DOT4(vEye, texUnit->GenS.EyePlane); - break; - case GL_SPHERE_MAP: - texcoord[0] = rx * mInv + 0.5F; - break; - case GL_REFLECTION_MAP: - texcoord[0] = rx; - break; - case GL_NORMAL_MAP: - texcoord[0] = normal[0]; - break; - default: - _mesa_problem(ctx, "Bad S texgen in compute_texgen()"); - return; - } - } - - if (texUnit->TexGenEnabled & T_BIT) { - switch (texUnit->GenT.Mode) { - case GL_OBJECT_LINEAR: - texcoord[1] = DOT4(vObj, texUnit->GenT.ObjectPlane); - break; - case GL_EYE_LINEAR: - texcoord[1] = DOT4(vEye, texUnit->GenT.EyePlane); - break; - case GL_SPHERE_MAP: - texcoord[1] = ry * mInv + 0.5F; - break; - case GL_REFLECTION_MAP: - texcoord[1] = ry; - break; - case GL_NORMAL_MAP: - texcoord[1] = normal[1]; - break; - default: - _mesa_problem(ctx, "Bad T texgen in compute_texgen()"); - return; - } - } - - if (texUnit->TexGenEnabled & R_BIT) { - switch (texUnit->GenR.Mode) { - case GL_OBJECT_LINEAR: - texcoord[2] = DOT4(vObj, texUnit->GenR.ObjectPlane); - break; - case GL_EYE_LINEAR: - texcoord[2] = DOT4(vEye, texUnit->GenR.EyePlane); - break; - case GL_REFLECTION_MAP: - texcoord[2] = rz; - break; - case GL_NORMAL_MAP: - texcoord[2] = normal[2]; - break; - default: - _mesa_problem(ctx, "Bad R texgen in compute_texgen()"); - return; - } - } - - if (texUnit->TexGenEnabled & Q_BIT) { - switch (texUnit->GenQ.Mode) { - case GL_OBJECT_LINEAR: - texcoord[3] = DOT4(vObj, texUnit->GenQ.ObjectPlane); - break; - case GL_EYE_LINEAR: - texcoord[3] = DOT4(vEye, texUnit->GenQ.EyePlane); - break; - default: - _mesa_problem(ctx, "Bad Q texgen in compute_texgen()"); - return; - } - } -} - - -/** - * glRasterPos transformation. Typically called via ctx->Driver.RasterPos(). - * XXX some of this code (such as viewport xform, clip testing and setting - * of ctx->Current.Raster* fields) could get lifted up into the - * main/rasterpos.c code. - * - * \param vObj vertex position in object space - */ -void -_tnl_RasterPos(struct gl_context *ctx, const GLfloat vObj[4]) -{ - if (ctx->VertexProgram._Enabled) { - /* XXX implement this */ - _mesa_problem(ctx, "Vertex programs not implemented for glRasterPos"); - return; - } - else { - GLfloat eye[4], clip[4], ndc[3], d; - GLfloat *norm, eyenorm[3]; - GLfloat *objnorm = ctx->Current.Attrib[VERT_ATTRIB_NORMAL]; - float scale[3], translate[3]; - - /* apply modelview matrix: eye = MV * obj */ - TRANSFORM_POINT( eye, ctx->ModelviewMatrixStack.Top->m, vObj ); - /* apply projection matrix: clip = Proj * eye */ - TRANSFORM_POINT( clip, ctx->ProjectionMatrixStack.Top->m, eye ); - - /* clip to view volume. */ - if (!ctx->Transform.DepthClamp) { - if (viewclip_point_z(clip) == 0) { - ctx->Current.RasterPosValid = GL_FALSE; - return; - } - } - if (!ctx->Transform.RasterPositionUnclipped) { - if (viewclip_point_xy(clip) == 0) { - ctx->Current.RasterPosValid = GL_FALSE; - return; - } - } - - /* clip to user clipping planes */ - if (ctx->Transform.ClipPlanesEnabled && !userclip_point(ctx, clip)) { - ctx->Current.RasterPosValid = GL_FALSE; - return; - } - - /* ndc = clip / W */ - d = (clip[3] == 0.0F) ? 1.0F : 1.0F / clip[3]; - ndc[0] = clip[0] * d; - ndc[1] = clip[1] * d; - ndc[2] = clip[2] * d; - /* wincoord = viewport_mapping(ndc) */ - _mesa_get_viewport_xform(ctx, 0, scale, translate); - ctx->Current.RasterPos[0] = ndc[0] * scale[0] + translate[0]; - ctx->Current.RasterPos[1] = ndc[1] * scale[1] + translate[1]; - ctx->Current.RasterPos[2] = ndc[2] * scale[2] + translate[2]; - ctx->Current.RasterPos[3] = clip[3]; - - if (ctx->Transform.DepthClamp) { - ctx->Current.RasterPos[3] = CLAMP(ctx->Current.RasterPos[3], - ctx->ViewportArray[0].Near, - ctx->ViewportArray[0].Far); - } - - /* compute raster distance */ - if (ctx->Fog.FogCoordinateSource == GL_FOG_COORDINATE_EXT) - ctx->Current.RasterDistance = ctx->Current.Attrib[VERT_ATTRIB_FOG][0]; - else - ctx->Current.RasterDistance = - sqrtf( eye[0]*eye[0] + eye[1]*eye[1] + eye[2]*eye[2] ); - - /* compute transformed normal vector (for lighting or texgen) */ - if (ctx->_NeedEyeCoords) { - const GLfloat *inv = ctx->ModelviewMatrixStack.Top->inv; - TRANSFORM_NORMAL( eyenorm, objnorm, inv ); - norm = eyenorm; - } - else { - norm = objnorm; - } - - /* update raster color */ - if (ctx->Light.Enabled) { - /* lighting */ - shade_rastpos( ctx, vObj, norm, - ctx->Current.RasterColor, - ctx->Current.RasterSecondaryColor ); - } - else { - /* use current color */ - COPY_4FV(ctx->Current.RasterColor, - ctx->Current.Attrib[VERT_ATTRIB_COLOR0]); - COPY_4FV(ctx->Current.RasterSecondaryColor, - ctx->Current.Attrib[VERT_ATTRIB_COLOR1]); - } - - /* texture coords */ - { - GLuint u; - for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { - GLfloat tc[4]; - COPY_4V(tc, ctx->Current.Attrib[VERT_ATTRIB_TEX0 + u]); - if (ctx->Texture.Unit[u].TexGenEnabled) { - compute_texgen(ctx, vObj, eye, norm, u, tc); - } - TRANSFORM_POINT(ctx->Current.RasterTexCoords[u], - ctx->TextureMatrixStack[u].Top->m, tc); - } - } - - ctx->Current.RasterPosValid = GL_TRUE; - } - - if (ctx->RenderMode == GL_SELECT) { - _mesa_update_hitflag( ctx, ctx->Current.RasterPos[2] ); - } -} diff --git a/src/mesa/vbo/vbo_context.h b/src/mesa/vbo/vbo_context.h index e6b9d890d5f..6293a8b9edc 100644 --- a/src/mesa/vbo/vbo_context.h +++ b/src/mesa/vbo/vbo_context.h @@ -207,7 +207,8 @@ vbo_compute_max_verts(const struct vbo_exec_context *exec) { unsigned n = (VBO_VERT_BUFFER_SIZE - exec->vtx.buffer_used) / (exec->vtx.vertex_size * sizeof(GLfloat)); - assert(n > 0); + if (n == 0) + return 0; /* Subtract one so we're always sure to have room for an extra * vertex for GL_LINE_LOOP -> GL_LINE_STRIP conversion. */ diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index a23d5aa08aa..a614b26cae4 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -132,8 +132,7 @@ static void vbo_exec_wrap_buffers( struct vbo_exec_context *exec ) static void vbo_exec_vtx_wrap(struct vbo_exec_context *exec) { - fi_type *data = exec->vtx.copied.buffer; - GLuint i; + unsigned numComponents; /* Run pipeline on current vertices, copy wrapped vertices * to exec->vtx.copied. @@ -149,13 +148,12 @@ vbo_exec_vtx_wrap(struct vbo_exec_context *exec) */ assert(exec->vtx.max_vert - exec->vtx.vert_count > exec->vtx.copied.nr); - for (i = 0 ; i < exec->vtx.copied.nr ; i++) { - memcpy( exec->vtx.buffer_ptr, data, - exec->vtx.vertex_size * sizeof(GLfloat)); - exec->vtx.buffer_ptr += exec->vtx.vertex_size; - data += exec->vtx.vertex_size; - exec->vtx.vert_count++; - } + numComponents = exec->vtx.copied.nr * exec->vtx.vertex_size; + memcpy(exec->vtx.buffer_ptr, + exec->vtx.copied.buffer, + numComponents * sizeof(fi_type)); + exec->vtx.buffer_ptr += numComponents; + exec->vtx.vert_count += exec->vtx.copied.nr; exec->vtx.copied.nr = 0; } diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 34d2c1d3d6b..e27fdd90532 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -1807,13 +1807,20 @@ vbo_initialize_exec_dispatch(const struct gl_context *ctx, SET_EvalMesh2(exec, vbo_exec_EvalMesh2); } - if (_mesa_is_desktop_gl(ctx)) { + if (ctx->API != API_OPENGLES && + ctx->Extensions.ARB_draw_elements_base_vertex) { SET_DrawElementsBaseVertex(exec, vbo_exec_DrawElementsBaseVertex); - SET_DrawRangeElementsBaseVertex(exec, vbo_exec_DrawRangeElementsBaseVertex); SET_MultiDrawElementsBaseVertex(exec, vbo_exec_MultiDrawElementsBaseVertex); + + if (_mesa_is_desktop_gl(ctx) || _mesa_is_gles3(ctx)) { + SET_DrawRangeElementsBaseVertex(exec, vbo_exec_DrawRangeElementsBaseVertex); + SET_DrawElementsInstancedBaseVertex(exec, vbo_exec_DrawElementsInstancedBaseVertex); + } + } + + if (_mesa_is_desktop_gl(ctx)) { SET_DrawArraysInstancedBaseInstance(exec, vbo_exec_DrawArraysInstancedBaseInstance); SET_DrawElementsInstancedBaseInstance(exec, vbo_exec_DrawElementsInstancedBaseInstance); - SET_DrawElementsInstancedBaseVertex(exec, vbo_exec_DrawElementsInstancedBaseVertex); SET_DrawElementsInstancedBaseVertexBaseInstance(exec, vbo_exec_DrawElementsInstancedBaseVertexBaseInstance); } diff --git a/src/mesa/vbo/vbo_save_api.c b/src/mesa/vbo/vbo_save_api.c index d49aa15b1b7..97a1dfdeb3f 100644 --- a/src/mesa/vbo/vbo_save_api.c +++ b/src/mesa/vbo/vbo_save_api.c @@ -601,8 +601,7 @@ static void _save_wrap_filled_vertex(struct gl_context *ctx) { struct vbo_save_context *save = &vbo_context(ctx)->save; - fi_type *data = save->copied.buffer; - GLuint i; + unsigned numComponents; /* Emit a glEnd to close off the last vertex list. */ @@ -612,12 +611,12 @@ _save_wrap_filled_vertex(struct gl_context *ctx) */ assert(save->max_vert - save->vert_count > save->copied.nr); - for (i = 0; i < save->copied.nr; i++) { - memcpy(save->buffer_ptr, data, save->vertex_size * sizeof(GLfloat)); - data += save->vertex_size; - save->buffer_ptr += save->vertex_size; - save->vert_count++; - } + numComponents = save->copied.nr * save->vertex_size; + memcpy(save->buffer_ptr, + save->copied.buffer, + numComponents * sizeof(fi_type)); + save->buffer_ptr += numComponents; + save->vert_count += save->copied.nr; } |