diff options
author | Chris Wilson <[email protected]> | 2017-07-21 16:36:52 +0100 |
---|---|---|
committer | Kenneth Graunke <[email protected]> | 2017-08-04 10:26:37 -0700 |
commit | 6c530ad1160518d9f035da4aba5a9d4df7369972 (patch) | |
tree | 8b144bdd9332dacd3466ba17211ffe381ec7c84e /src/mesa/drivers/dri/i965/hsw_queryobj.c | |
parent | 2aacd22c0b7935b40911593c3b01f0b8d12eddd4 (diff) |
i965: Reduce passing 2x32b of reloc_domains to 2 bits
The kernel only cares about whether the object is to be written to or
not, only reduces (reloc.read_domains, reloc.write_domain) down to just
!!reloc.write_domain. When we use NO_RELOC, the kernel doesn't even read
those relocs and instead userspace has to pass that information in the
execobject.flags. We can simplify our reloc api by also removing the
unused read/write domains and only pass the resultant flags.
The caveat to the above are when we need to make the kernel aware that
certain objects need to take into account different work arounds.
Previously, this was done using the magic (INSTRUCTION, INSTRUCTION)
reloc domains. NO_RELOC requires this to be passed in the execobject
flags as well, and now we push that up the callstack.
The API is more compact, more expressive of what happens underneath, but
unfortunately requires more knowledge of the system at the point of use.
Conversely it also means that knowledge is specific and not generally
applied and so not overused.
text data bss dec hex filename
8502991 356912 424944 9284847 8dacef lib/i965_dri.so (before)
8500455 356912 424944 9282311 8da307 lib/i965_dri.so (after)
v2: (by Ken) Rebase.
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965/hsw_queryobj.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/hsw_queryobj.c | 43 |
1 files changed, 6 insertions, 37 deletions
diff --git a/src/mesa/drivers/dri/i965/hsw_queryobj.c b/src/mesa/drivers/dri/i965/hsw_queryobj.c index b81ab3b6f88..9dc3b3de865 100644 --- a/src/mesa/drivers/dri/i965/hsw_queryobj.c +++ b/src/mesa/drivers/dri/i965/hsw_queryobj.c @@ -193,36 +193,16 @@ load_overflow_data_to_cs_gprs(struct brw_context *brw, { int offset = idx * sizeof(uint64_t) * 4; - brw_load_register_mem64(brw, - HSW_CS_GPR(1), - query->bo, - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, - offset); + brw_load_register_mem64(brw, HSW_CS_GPR(1), query->bo, offset); offset += sizeof(uint64_t); - brw_load_register_mem64(brw, - HSW_CS_GPR(2), - query->bo, - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, - offset); + brw_load_register_mem64(brw, HSW_CS_GPR(2), query->bo, offset); offset += sizeof(uint64_t); - brw_load_register_mem64(brw, - HSW_CS_GPR(3), - query->bo, - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, - offset); + brw_load_register_mem64(brw, HSW_CS_GPR(3), query->bo, offset); offset += sizeof(uint64_t); - brw_load_register_mem64(brw, - HSW_CS_GPR(4), - query->bo, - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, - offset); + brw_load_register_mem64(brw, HSW_CS_GPR(4), query->bo, offset); } /* @@ -302,8 +282,6 @@ hsw_result_to_gpr0(struct gl_context *ctx, struct brw_query_object *query, brw_load_register_mem64(brw, HSW_CS_GPR(0), query->bo, - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, 2 * sizeof(uint64_t)); return; } @@ -321,8 +299,6 @@ hsw_result_to_gpr0(struct gl_context *ctx, struct brw_query_object *query, brw_load_register_mem64(brw, HSW_CS_GPR(0), query->bo, - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, 0 * sizeof(uint64_t)); } else if (query->Base.Target == GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB || query->Base.Target == GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB) { @@ -333,14 +309,10 @@ hsw_result_to_gpr0(struct gl_context *ctx, struct brw_query_object *query, brw_load_register_mem64(brw, HSW_CS_GPR(1), query->bo, - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, 0 * sizeof(uint64_t)); brw_load_register_mem64(brw, HSW_CS_GPR(2), query->bo, - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, 1 * sizeof(uint64_t)); BEGIN_BATCH(5); @@ -417,7 +389,6 @@ set_predicate(struct brw_context *brw, struct brw_bo *query_bo) /* Load query availability into SRC0 */ brw_load_register_mem64(brw, MI_PREDICATE_SRC0, query_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, 2 * sizeof(uint64_t)); /* predicate = !(query_availability == 0); */ @@ -450,11 +421,9 @@ store_query_result_reg(struct brw_context *brw, struct brw_bo *bo, (cmd_size - 2)); OUT_BATCH(reg + 4 * i); if (brw->gen >= 8) { - OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, offset + 4 * i); + OUT_RELOC64(bo, RELOC_WRITE, offset + 4 * i); } else { - OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION, offset + 4 * i); + OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, offset + 4 * i); } } ADVANCE_BATCH(); |