summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorAxel Davy <[email protected]>2019-01-20 22:55:01 +0100
committerAxel Davy <[email protected]>2019-04-30 19:18:52 +0200
commit7761cda686d8cb59c39d363ed5b6d95d1591bb66 (patch)
tree5f71ec87cecf9867646dc5e2f823c9797b341359 /src/gallium
parentdb404507b4adc39a2f67fcd3c28c80f41ad2d46a (diff)
st/nine: Prepare constant compaction in nine_shader
When indirect addressing is not used, we know exactly which constants are accessed, and thus can have them located in consecutive slots. We thus parse again the shader with a slot map for compaction. The path contains the work inside nine_shader.c for this path, but it needs some other commits to work, and thus is not enabled yet by this commit. Signed-off-by: Axel Davy <[email protected]>
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/state_trackers/nine/nine_shader.c144
-rw-r--r--src/gallium/state_trackers/nine/nine_shader.h2
2 files changed, 118 insertions, 28 deletions
diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c
index b651f065585..130d63b61dd 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -509,6 +509,7 @@ struct shader_translator
unsigned num_lconstb;
boolean slots_used[NINE_MAX_CONST_ALL];
+ unsigned *slot_map;
unsigned num_slots;
boolean indirect_const_access;
@@ -556,6 +557,9 @@ nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
static struct ureg_src nine_float_constant_src(struct shader_translator *tx, int idx)
{
struct ureg_src src;
+
+ if (tx->slot_map)
+ idx = tx->slot_map[idx];
/* vswp constant handling: we use two buffers
* to fit all the float constants. The special handling
* doesn't need to be elsewhere, because all the instructions
@@ -589,6 +593,8 @@ static struct ureg_src nine_integer_constant_src(struct shader_translator *tx, i
src = ureg_src_dimension(src, 2);
} else {
unsigned slot_idx = tx->info->const_i_base + idx;
+ if (tx->slot_map)
+ slot_idx = tx->slot_map[slot_idx];
src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
src = ureg_src_dimension(src, 0);
tx->slots_used[slot_idx] = TRUE;
@@ -615,6 +621,8 @@ static struct ureg_src nine_boolean_constant_src(struct shader_translator *tx, i
src = ureg_src_dimension(src, 3);
} else {
unsigned slot_idx = tx->info->const_b_base + r;
+ if (tx->slot_map)
+ slot_idx = tx->slot_map[slot_idx];
src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
src = ureg_src_dimension(src, 0);
tx->slots_used[slot_idx] = TRUE;
@@ -3656,6 +3664,8 @@ tx_ctor(struct shader_translator *tx, struct pipe_screen *screen, struct nine_sh
static void
tx_dtor(struct shader_translator *tx)
{
+ if (tx->slot_map)
+ FREE(tx->slot_map);
if (tx->num_inst_labels)
FREE(tx->inst_labels);
FREE(tx->lconstf);
@@ -3736,6 +3746,48 @@ shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
}
+static void parse_shader(struct shader_translator *tx)
+{
+ struct nine_shader_info *info = tx->info;
+
+ while (!sm1_parse_eof(tx) && !tx->failure)
+ sm1_parse_instruction(tx);
+ tx->parse++; /* for byte_size */
+
+ if (tx->failure)
+ return;
+
+ if (IS_PS && tx->version.major < 3) {
+ if (tx->version.major < 2) {
+ assert(tx->num_temp); /* there must be color output */
+ info->rt_mask |= 0x1;
+ shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
+ } else {
+ shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
+ }
+ }
+
+ if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
+ tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
+ ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
+ }
+
+ if (info->position_t)
+ ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
+
+ if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
+ struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
+ ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
+ ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
+ info->point_size = TRUE;
+ }
+
+ if (info->process_vertices)
+ shader_add_vs_viewport_transform(tx);
+
+ ureg_END(tx->ureg);
+}
+
HRESULT
nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
{
@@ -3743,6 +3795,7 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info,
HRESULT hr = D3D_OK;
const unsigned processor = info->type;
struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
+ unsigned *const_ranges = NULL;
user_assert(processor != ~0, D3DERR_INVALIDCALL);
@@ -3771,9 +3824,7 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info,
DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
tx->version.major, tx->version.minor);
- while (!sm1_parse_eof(tx) && !tx->failure)
- sm1_parse_instruction(tx);
- tx->parse++; /* for byte_size */
+ parse_shader(tx);
if (tx->failure) {
/* For VS shaders, we print the warning later,
@@ -3785,36 +3836,69 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info,
goto out;
}
- if (IS_PS && tx->version.major < 3) {
- if (tx->version.major < 2) {
- assert(tx->num_temp); /* there must be color output */
- info->rt_mask |= 0x1;
- shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
- } else {
- shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
- }
- }
+ /* Recompile after compacting constant slots if possible */
+ if (!tx->indirect_const_access && !info->swvp_on && tx->num_slots > 0 && 0) {
+ unsigned *slot_map;
+ unsigned c;
+ int i, j, num_ranges, prev;
- if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
- tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
- ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
- }
+ DBG("Recompiling shader for constant compaction\n");
+ ureg_destroy(tx->ureg);
- if (info->position_t)
- ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
+ if (tx->num_inst_labels)
+ FREE(tx->inst_labels);
+ FREE(tx->lconstf);
+ FREE(tx->regs.r);
+
+ num_ranges = 0;
+ prev = -2;
+ for (i = 0; i < NINE_MAX_CONST_ALL; i++) {
+ if (tx->slots_used[i]) {
+ if (prev != i - 1)
+ num_ranges++;
+ prev = i;
+ }
+ }
+ slot_map = MALLOC(NINE_MAX_CONST_ALL * sizeof(unsigned));
+ const_ranges = CALLOC(num_ranges + 1, 2 * sizeof(unsigned)); /* ranges stop when last is of size 0 */
+ if (!slot_map || !const_ranges) {
+ hr = E_OUTOFMEMORY;
+ goto out;
+ }
+ c = 0;
+ j = -1;
+ prev = -2;
+ for (i = 0; i < NINE_MAX_CONST_ALL; i++) {
+ if (tx->slots_used[i]) {
+ if (prev != i - 1)
+ j++;
+ /* Initialize first slot of the range */
+ if (!const_ranges[2*j+1])
+ const_ranges[2*j] = i;
+ const_ranges[2*j+1]++;
+ prev = i;
+ slot_map[i] = c++;
+ }
+ }
- if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
- struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
- ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
- ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
- info->point_size = TRUE;
+ if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
+ hr = E_OUTOFMEMORY;
+ goto out;
+ }
+ tx->slot_map = slot_map;
+ parse_shader(tx);
+ assert(!tx->failure);
+#if !defined(NDEBUG)
+ i = 0;
+ j = 0;
+ while (const_ranges[i*2+1] != 0) {
+ j += const_ranges[i*2+1];
+ i++;
+ }
+ assert(j == tx->num_slots);
+#endif
}
- if (info->process_vertices)
- shader_add_vs_viewport_transform(tx);
-
- ureg_END(tx->ureg);
-
/* record local constants */
if (tx->num_lconstf && tx->indirect_const_access) {
struct nine_range *ranges;
@@ -3920,8 +4004,12 @@ nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info,
goto out;
}
+ info->const_ranges = const_ranges;
+ const_ranges = NULL;
info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
out:
+ if (const_ranges)
+ FREE(const_ranges);
tx_dtor(tx);
return hr;
}
diff --git a/src/gallium/state_trackers/nine/nine_shader.h b/src/gallium/state_trackers/nine/nine_shader.h
index a2eb91d0d8c..6eb97127b9c 100644
--- a/src/gallium/state_trackers/nine/nine_shader.h
+++ b/src/gallium/state_trackers/nine/nine_shader.h
@@ -82,6 +82,8 @@ struct nine_shader_info
unsigned const_int_slots;
unsigned const_bool_slots;
+ unsigned *const_ranges;
+
struct nine_lconstf lconstf; /* out, NOTE: members to be free'd by user */
uint8_t bumpenvmat_needed;