diff options
author | Jason Ekstrand <[email protected]> | 2019-02-21 10:32:01 -0600 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2019-02-28 16:58:20 -0600 |
commit | 95ae400abcda4f692fd31c9132462d904f939ec3 (patch) | |
tree | bf0407b0d37574601b858790144558075f80fa53 /src/intel/compiler | |
parent | aeaba24fcb98839be73a59f6bb74a39523d79a3d (diff) |
intel/schedule_instructions: Move some comments
Reviewed-by: Caio Marcelo de Oliveira Filho <[email protected]>
Diffstat (limited to 'src/intel/compiler')
-rw-r--r-- | src/intel/compiler/brw_schedule_instructions.cpp | 74 |
1 files changed, 37 insertions, 37 deletions
diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 4a516223cf9..1d5ee56bd4a 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -368,44 +368,13 @@ schedule_node::set_latency_gen7(bool is_haswell) break; case SHADER_OPCODE_UNTYPED_ATOMIC: - /* Test code: - * mov(8) g112<1>ud 0x00000000ud { align1 WE_all 1Q }; - * mov(1) g112.7<1>ud g1.7<0,1,0>ud { align1 WE_all }; - * mov(8) g113<1>ud 0x00000000ud { align1 WE_normal 1Q }; - * send(8) g4<1>ud g112<8,8,1>ud - * data (38, 5, 6) mlen 2 rlen 1 { align1 WE_normal 1Q }; - * - * Running it 100 times as fragment shader on a 128x128 quad - * gives an average latency of 13867 cycles per atomic op, - * standard deviation 3%. Note that this is a rather - * pessimistic estimate, the actual latency in cases with few - * collisions between threads and favorable pipelining has been - * seen to be reduced by a factor of 100. - */ + /* See GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP */ latency = 14000; break; case SHADER_OPCODE_UNTYPED_SURFACE_READ: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: - /* Test code: - * mov(8) g112<1>UD 0x00000000UD { align1 WE_all 1Q }; - * mov(1) g112.7<1>UD g1.7<0,1,0>UD { align1 WE_all }; - * mov(8) g113<1>UD 0x00000000UD { align1 WE_normal 1Q }; - * send(8) g4<1>UD g112<8,8,1>UD - * data (38, 6, 5) mlen 2 rlen 1 { align1 WE_normal 1Q }; - * . - * . [repeats 8 times] - * . - * mov(8) g112<1>UD 0x00000000UD { align1 WE_all 1Q }; - * mov(1) g112.7<1>UD g1.7<0,1,0>UD { align1 WE_all }; - * mov(8) g113<1>UD 0x00000000UD { align1 WE_normal 1Q }; - * send(8) g4<1>UD g112<8,8,1>UD - * data (38, 6, 5) mlen 2 rlen 1 { align1 WE_normal 1Q }; - * - * Running it 100 times as fragment shader on a 128x128 quad - * gives an average latency of 583 cycles per surface read, - * standard deviation 0.9%. - */ + /* See also GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ */ latency = is_haswell ? 300 : 600; break; @@ -460,13 +429,44 @@ schedule_node::set_latency_gen7(bool is_haswell) case GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ: case GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE: - /* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */ + /* Test code: + * mov(8) g112<1>UD 0x00000000UD { align1 WE_all 1Q }; + * mov(1) g112.7<1>UD g1.7<0,1,0>UD { align1 WE_all }; + * mov(8) g113<1>UD 0x00000000UD { align1 WE_normal 1Q }; + * send(8) g4<1>UD g112<8,8,1>UD + * data (38, 6, 5) mlen 2 rlen 1 { align1 WE_normal 1Q }; + * . + * . [repeats 8 times] + * . + * mov(8) g112<1>UD 0x00000000UD { align1 WE_all 1Q }; + * mov(1) g112.7<1>UD g1.7<0,1,0>UD { align1 WE_all }; + * mov(8) g113<1>UD 0x00000000UD { align1 WE_normal 1Q }; + * send(8) g4<1>UD g112<8,8,1>UD + * data (38, 6, 5) mlen 2 rlen 1 { align1 WE_normal 1Q }; + * + * Running it 100 times as fragment shader on a 128x128 quad + * gives an average latency of 583 cycles per surface read, + * standard deviation 0.9%. + */ assert(!is_haswell); latency = 600; break; case GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP: - /* See also SHADER_OPCODE_UNTYPED_ATOMIC */ + /* Test code: + * mov(8) g112<1>ud 0x00000000ud { align1 WE_all 1Q }; + * mov(1) g112.7<1>ud g1.7<0,1,0>ud { align1 WE_all }; + * mov(8) g113<1>ud 0x00000000ud { align1 WE_normal 1Q }; + * send(8) g4<1>ud g112<8,8,1>ud + * data (38, 5, 6) mlen 2 rlen 1 { align1 WE_normal 1Q }; + * + * Running it 100 times as fragment shader on a 128x128 quad + * gives an average latency of 13867 cycles per atomic op, + * standard deviation 3%. Note that this is a rather + * pessimistic estimate, the actual latency in cases with few + * collisions between threads and favorable pipelining has been + * seen to be reduced by a factor of 100. + */ assert(!is_haswell); latency = 14000; break; @@ -486,7 +486,7 @@ schedule_node::set_latency_gen7(bool is_haswell) case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ: case GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE: case GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ: - /* See also SHADER_OPCODE_UNTYPED_SURFACE_READ */ + /* See also GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ */ latency = 300; break; @@ -497,7 +497,7 @@ schedule_node::set_latency_gen7(bool is_haswell) case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP: case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP: case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP: - /* See also SHADER_OPCODE_UNTYPED_ATOMIC */ + /* See also GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP */ latency = 14000; break; |