diff options
author | Axel Davy <[email protected]> | 2019-01-20 22:55:01 +0100 |
---|---|---|
committer | Axel Davy <[email protected]> | 2019-04-30 19:18:52 +0200 |
commit | 7761cda686d8cb59c39d363ed5b6d95d1591bb66 (patch) | |
tree | 5f71ec87cecf9867646dc5e2f823c9797b341359 /src/gallium/state_trackers | |
parent | db404507b4adc39a2f67fcd3c28c80f41ad2d46a (diff) |
st/nine: Prepare constant compaction in nine_shader
When indirect addressing is not used, we know exactly
which constants are accessed, and thus can
have them located in consecutive slots.
We thus parse again the shader with a slot map
for compaction.
The path contains the work inside nine_shader.c for this
path, but it needs some other commits to work, and thus
is not enabled yet by this commit.
Signed-off-by: Axel Davy <[email protected]>
Diffstat (limited to 'src/gallium/state_trackers')
-rw-r--r-- | src/gallium/state_trackers/nine/nine_shader.c | 144 | ||||
-rw-r--r-- | src/gallium/state_trackers/nine/nine_shader.h | 2 |
2 files changed, 118 insertions, 28 deletions
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index b651f065585..130d63b61dd 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -509,6 +509,7 @@ struct shader_translator unsigned num_lconstb; boolean slots_used[NINE_MAX_CONST_ALL]; + unsigned *slot_map; unsigned num_slots; boolean indirect_const_access; @@ -556,6 +557,9 @@ nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex, static struct ureg_src nine_float_constant_src(struct shader_translator *tx, int idx) { struct ureg_src src; + + if (tx->slot_map) + idx = tx->slot_map[idx]; /* vswp constant handling: we use two buffers * to fit all the float constants. The special handling * doesn't need to be elsewhere, because all the instructions @@ -589,6 +593,8 @@ static struct ureg_src nine_integer_constant_src(struct shader_translator *tx, i src = ureg_src_dimension(src, 2); } else { unsigned slot_idx = tx->info->const_i_base + idx; + if (tx->slot_map) + slot_idx = tx->slot_map[slot_idx]; src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx); src = ureg_src_dimension(src, 0); tx->slots_used[slot_idx] = TRUE; @@ -615,6 +621,8 @@ static struct ureg_src nine_boolean_constant_src(struct shader_translator *tx, i src = ureg_src_dimension(src, 3); } else { unsigned slot_idx = tx->info->const_b_base + r; + if (tx->slot_map) + slot_idx = tx->slot_map[slot_idx]; src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx); src = ureg_src_dimension(src, 0); tx->slots_used[slot_idx] = TRUE; @@ -3656,6 +3664,8 @@ tx_ctor(struct shader_translator *tx, struct pipe_screen *screen, struct nine_sh static void tx_dtor(struct shader_translator *tx) { + if (tx->slot_map) + FREE(tx->slot_map); if (tx->num_inst_labels) FREE(tx->inst_labels); FREE(tx->lconstf); @@ -3736,6 +3746,48 @@ shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col) ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col); } +static void parse_shader(struct shader_translator *tx) +{ + struct nine_shader_info *info = tx->info; + + while (!sm1_parse_eof(tx) && !tx->failure) + sm1_parse_instruction(tx); + tx->parse++; /* for byte_size */ + + if (tx->failure) + return; + + if (IS_PS && tx->version.major < 3) { + if (tx->version.major < 2) { + assert(tx->num_temp); /* there must be color output */ + info->rt_mask |= 0x1; + shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0])); + } else { + shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0])); + } + } + + if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) { + tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0); + ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f)); + } + + if (info->position_t) + ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE); + + if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) { + struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0); + ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min)); + ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max)); + info->point_size = TRUE; + } + + if (info->process_vertices) + shader_add_vs_viewport_transform(tx); + + ureg_END(tx->ureg); +} + HRESULT nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe) { @@ -3743,6 +3795,7 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, HRESULT hr = D3D_OK; const unsigned processor = info->type; struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen; + unsigned *const_ranges = NULL; user_assert(processor != ~0, D3DERR_INVALIDCALL); @@ -3771,9 +3824,7 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS", tx->version.major, tx->version.minor); - while (!sm1_parse_eof(tx) && !tx->failure) - sm1_parse_instruction(tx); - tx->parse++; /* for byte_size */ + parse_shader(tx); if (tx->failure) { /* For VS shaders, we print the warning later, @@ -3785,36 +3836,69 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, goto out; } - if (IS_PS && tx->version.major < 3) { - if (tx->version.major < 2) { - assert(tx->num_temp); /* there must be color output */ - info->rt_mask |= 0x1; - shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0])); - } else { - shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0])); - } - } + /* Recompile after compacting constant slots if possible */ + if (!tx->indirect_const_access && !info->swvp_on && tx->num_slots > 0 && 0) { + unsigned *slot_map; + unsigned c; + int i, j, num_ranges, prev; - if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) { - tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0); - ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f)); - } + DBG("Recompiling shader for constant compaction\n"); + ureg_destroy(tx->ureg); - if (info->position_t) - ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE); + if (tx->num_inst_labels) + FREE(tx->inst_labels); + FREE(tx->lconstf); + FREE(tx->regs.r); + + num_ranges = 0; + prev = -2; + for (i = 0; i < NINE_MAX_CONST_ALL; i++) { + if (tx->slots_used[i]) { + if (prev != i - 1) + num_ranges++; + prev = i; + } + } + slot_map = MALLOC(NINE_MAX_CONST_ALL * sizeof(unsigned)); + const_ranges = CALLOC(num_ranges + 1, 2 * sizeof(unsigned)); /* ranges stop when last is of size 0 */ + if (!slot_map || !const_ranges) { + hr = E_OUTOFMEMORY; + goto out; + } + c = 0; + j = -1; + prev = -2; + for (i = 0; i < NINE_MAX_CONST_ALL; i++) { + if (tx->slots_used[i]) { + if (prev != i - 1) + j++; + /* Initialize first slot of the range */ + if (!const_ranges[2*j+1]) + const_ranges[2*j] = i; + const_ranges[2*j+1]++; + prev = i; + slot_map[i] = c++; + } + } - if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) { - struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0); - ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min)); - ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max)); - info->point_size = TRUE; + if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) { + hr = E_OUTOFMEMORY; + goto out; + } + tx->slot_map = slot_map; + parse_shader(tx); + assert(!tx->failure); +#if !defined(NDEBUG) + i = 0; + j = 0; + while (const_ranges[i*2+1] != 0) { + j += const_ranges[i*2+1]; + i++; + } + assert(j == tx->num_slots); +#endif } - if (info->process_vertices) - shader_add_vs_viewport_transform(tx); - - ureg_END(tx->ureg); - /* record local constants */ if (tx->num_lconstf && tx->indirect_const_access) { struct nine_range *ranges; @@ -3920,8 +4004,12 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, goto out; } + info->const_ranges = const_ranges; + const_ranges = NULL; info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD); out: + if (const_ranges) + FREE(const_ranges); tx_dtor(tx); return hr; } diff --git a/src/gallium/state_trackers/nine/nine_shader.h b/src/gallium/state_trackers/nine/nine_shader.h index a2eb91d0d8c..6eb97127b9c 100644 --- a/src/gallium/state_trackers/nine/nine_shader.h +++ b/src/gallium/state_trackers/nine/nine_shader.h @@ -82,6 +82,8 @@ struct nine_shader_info unsigned const_int_slots; unsigned const_bool_slots; + unsigned *const_ranges; + struct nine_lconstf lconstf; /* out, NOTE: members to be free'd by user */ uint8_t bumpenvmat_needed; |