diff options
Diffstat (limited to 'src/gallium/drivers')
112 files changed, 7006 insertions, 2077 deletions
diff --git a/src/gallium/drivers/ddebug/dd_context.c b/src/gallium/drivers/ddebug/dd_context.c index 3ae7764ff3f..9dfaa0af289 100644 --- a/src/gallium/drivers/ddebug/dd_context.c +++ b/src/gallium/drivers/ddebug/dd_context.c @@ -415,30 +415,6 @@ dd_context_sampler_view_destroy(struct pipe_context *_pipe, pipe->sampler_view_destroy(pipe, view); } -static struct pipe_image_view * -dd_context_create_image_view(struct pipe_context *_pipe, - struct pipe_resource *resource, - const struct pipe_image_view *templ) -{ - struct pipe_context *pipe = dd_context(_pipe)->pipe; - struct pipe_image_view *view = - pipe->create_image_view(pipe, resource, templ); - - if (!view) - return NULL; - view->context = _pipe; - return view; -} - -static void -dd_context_image_view_destroy(struct pipe_context *_pipe, - struct pipe_image_view *view) -{ - struct pipe_context *pipe = dd_context(_pipe)->pipe; - - pipe->image_view_destroy(pipe, view); -} - static struct pipe_stream_output_target * dd_context_create_stream_output_target(struct pipe_context *_pipe, struct pipe_resource *res, @@ -486,7 +462,7 @@ dd_context_set_sampler_views(struct pipe_context *_pipe, unsigned shader, static void dd_context_set_shader_images(struct pipe_context *_pipe, unsigned shader, unsigned start, unsigned num, - struct pipe_image_view **views) + struct pipe_image_view *views) { struct dd_context *dctx = dd_context(_pipe); struct pipe_context *pipe = dctx->pipe; @@ -744,8 +720,6 @@ dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe) CTX_INIT(sampler_view_destroy); CTX_INIT(create_surface); CTX_INIT(surface_destroy); - CTX_INIT(create_image_view); - CTX_INIT(image_view_destroy); CTX_INIT(transfer_map); CTX_INIT(transfer_flush_region); CTX_INIT(transfer_unmap); diff --git a/src/gallium/drivers/ddebug/dd_pipe.h b/src/gallium/drivers/ddebug/dd_pipe.h index 80098dcb644..c9bbd569abe 100644 --- a/src/gallium/drivers/ddebug/dd_pipe.h +++ b/src/gallium/drivers/ddebug/dd_pipe.h @@ -94,7 +94,7 @@ struct dd_context struct pipe_constant_buffer constant_buffers[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS]; struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; struct dd_state *sampler_states[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS]; - struct pipe_image_view *shader_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES]; + struct pipe_image_view shader_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES]; struct pipe_shader_buffer shader_buffers[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS]; struct dd_state *velems; diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h index d23111352b7..71ee55054d3 100644 --- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h +++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h @@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) Copyright (C) 2013-2015 by the following authors: - Rob Clark <[email protected]> (robclark) +- Ilia Mirkin <[email protected]> (imirkin) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h index c4f253b836c..c6286a1f290 100644 --- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h +++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h @@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) -Copyright (C) 2013-2015 by the following authors: +Copyright (C) 2013-2016 by the following authors: - Rob Clark <[email protected]> (robclark) +- Ilia Mirkin <[email protected]> (imirkin) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -255,11 +256,273 @@ enum a3xx_color_fmt { RB_R32G32B32A32_UINT = 59, }; +enum a3xx_cp_perfcounter_select { + CP_ALWAYS_COUNT = 0, + CP_AHB_PFPTRANS_WAIT = 3, + CP_AHB_NRTTRANS_WAIT = 6, + CP_CSF_NRT_READ_WAIT = 8, + CP_CSF_I1_FIFO_FULL = 9, + CP_CSF_I2_FIFO_FULL = 10, + CP_CSF_ST_FIFO_FULL = 11, + CP_RESERVED_12 = 12, + CP_CSF_RING_ROQ_FULL = 13, + CP_CSF_I1_ROQ_FULL = 14, + CP_CSF_I2_ROQ_FULL = 15, + CP_CSF_ST_ROQ_FULL = 16, + CP_RESERVED_17 = 17, + CP_MIU_TAG_MEM_FULL = 18, + CP_MIU_NRT_WRITE_STALLED = 22, + CP_MIU_NRT_READ_STALLED = 23, + CP_ME_REGS_RB_DONE_FIFO_FULL = 26, + CP_ME_REGS_VS_EVENT_FIFO_FULL = 27, + CP_ME_REGS_PS_EVENT_FIFO_FULL = 28, + CP_ME_REGS_CF_EVENT_FIFO_FULL = 29, + CP_ME_MICRO_RB_STARVED = 30, + CP_AHB_RBBM_DWORD_SENT = 40, + CP_ME_BUSY_CLOCKS = 41, + CP_ME_WAIT_CONTEXT_AVAIL = 42, + CP_PFP_TYPE0_PACKET = 43, + CP_PFP_TYPE3_PACKET = 44, + CP_CSF_RB_WPTR_NEQ_RPTR = 45, + CP_CSF_I1_SIZE_NEQ_ZERO = 46, + CP_CSF_I2_SIZE_NEQ_ZERO = 47, + CP_CSF_RBI1I2_FETCHING = 48, +}; + +enum a3xx_gras_tse_perfcounter_select { + GRAS_TSEPERF_INPUT_PRIM = 0, + GRAS_TSEPERF_INPUT_NULL_PRIM = 1, + GRAS_TSEPERF_TRIVAL_REJ_PRIM = 2, + GRAS_TSEPERF_CLIPPED_PRIM = 3, + GRAS_TSEPERF_NEW_PRIM = 4, + GRAS_TSEPERF_ZERO_AREA_PRIM = 5, + GRAS_TSEPERF_FACENESS_CULLED_PRIM = 6, + GRAS_TSEPERF_ZERO_PIXEL_PRIM = 7, + GRAS_TSEPERF_OUTPUT_NULL_PRIM = 8, + GRAS_TSEPERF_OUTPUT_VISIBLE_PRIM = 9, + GRAS_TSEPERF_PRE_CLIP_PRIM = 10, + GRAS_TSEPERF_POST_CLIP_PRIM = 11, + GRAS_TSEPERF_WORKING_CYCLES = 12, + GRAS_TSEPERF_PC_STARVE = 13, + GRAS_TSERASPERF_STALL = 14, +}; + +enum a3xx_gras_ras_perfcounter_select { + GRAS_RASPERF_16X16_TILES = 0, + GRAS_RASPERF_8X8_TILES = 1, + GRAS_RASPERF_4X4_TILES = 2, + GRAS_RASPERF_WORKING_CYCLES = 3, + GRAS_RASPERF_STALL_CYCLES_BY_RB = 4, + GRAS_RASPERF_STALL_CYCLES_BY_VSC = 5, + GRAS_RASPERF_STARVE_CYCLES_BY_TSE = 6, +}; + +enum a3xx_hlsq_perfcounter_select { + HLSQ_PERF_SP_VS_CONSTANT = 0, + HLSQ_PERF_SP_VS_INSTRUCTIONS = 1, + HLSQ_PERF_SP_FS_CONSTANT = 2, + HLSQ_PERF_SP_FS_INSTRUCTIONS = 3, + HLSQ_PERF_TP_STATE = 4, + HLSQ_PERF_QUADS = 5, + HLSQ_PERF_PIXELS = 6, + HLSQ_PERF_VERTICES = 7, + HLSQ_PERF_FS8_THREADS = 8, + HLSQ_PERF_FS16_THREADS = 9, + HLSQ_PERF_FS32_THREADS = 10, + HLSQ_PERF_VS8_THREADS = 11, + HLSQ_PERF_VS16_THREADS = 12, + HLSQ_PERF_SP_VS_DATA_BYTES = 13, + HLSQ_PERF_SP_FS_DATA_BYTES = 14, + HLSQ_PERF_ACTIVE_CYCLES = 15, + HLSQ_PERF_STALL_CYCLES_SP_STATE = 16, + HLSQ_PERF_STALL_CYCLES_SP_VS = 17, + HLSQ_PERF_STALL_CYCLES_SP_FS = 18, + HLSQ_PERF_STALL_CYCLES_UCHE = 19, + HLSQ_PERF_RBBM_LOAD_CYCLES = 20, + HLSQ_PERF_DI_TO_VS_START_SP0 = 21, + HLSQ_PERF_DI_TO_FS_START_SP0 = 22, + HLSQ_PERF_VS_START_TO_DONE_SP0 = 23, + HLSQ_PERF_FS_START_TO_DONE_SP0 = 24, + HLSQ_PERF_SP_STATE_COPY_CYCLES_VS = 25, + HLSQ_PERF_SP_STATE_COPY_CYCLES_FS = 26, + HLSQ_PERF_UCHE_LATENCY_CYCLES = 27, + HLSQ_PERF_UCHE_LATENCY_COUNT = 28, +}; + +enum a3xx_pc_perfcounter_select { + PC_PCPERF_VISIBILITY_STREAMS = 0, + PC_PCPERF_TOTAL_INSTANCES = 1, + PC_PCPERF_PRIMITIVES_PC_VPC = 2, + PC_PCPERF_PRIMITIVES_KILLED_BY_VS = 3, + PC_PCPERF_PRIMITIVES_VISIBLE_BY_VS = 4, + PC_PCPERF_DRAWCALLS_KILLED_BY_VS = 5, + PC_PCPERF_DRAWCALLS_VISIBLE_BY_VS = 6, + PC_PCPERF_VERTICES_TO_VFD = 7, + PC_PCPERF_REUSED_VERTICES = 8, + PC_PCPERF_CYCLES_STALLED_BY_VFD = 9, + PC_PCPERF_CYCLES_STALLED_BY_TSE = 10, + PC_PCPERF_CYCLES_STALLED_BY_VBIF = 11, + PC_PCPERF_CYCLES_IS_WORKING = 12, +}; + +enum a3xx_rb_perfcounter_select { + RB_RBPERF_ACTIVE_CYCLES_ANY = 0, + RB_RBPERF_ACTIVE_CYCLES_ALL = 1, + RB_RBPERF_STARVE_CYCLES_BY_SP = 2, + RB_RBPERF_STARVE_CYCLES_BY_RAS = 3, + RB_RBPERF_STARVE_CYCLES_BY_MARB = 4, + RB_RBPERF_STALL_CYCLES_BY_MARB = 5, + RB_RBPERF_STALL_CYCLES_BY_HLSQ = 6, + RB_RBPERF_RB_MARB_DATA = 7, + RB_RBPERF_SP_RB_QUAD = 8, + RB_RBPERF_RAS_EARLY_Z_QUADS = 9, + RB_RBPERF_GMEM_CH0_READ = 10, + RB_RBPERF_GMEM_CH1_READ = 11, + RB_RBPERF_GMEM_CH0_WRITE = 12, + RB_RBPERF_GMEM_CH1_WRITE = 13, + RB_RBPERF_CP_CONTEXT_DONE = 14, + RB_RBPERF_CP_CACHE_FLUSH = 15, + RB_RBPERF_CP_ZPASS_DONE = 16, +}; + +enum a3xx_rbbm_perfcounter_select { + RBBM_ALAWYS_ON = 0, + RBBM_VBIF_BUSY = 1, + RBBM_TSE_BUSY = 2, + RBBM_RAS_BUSY = 3, + RBBM_PC_DCALL_BUSY = 4, + RBBM_PC_VSD_BUSY = 5, + RBBM_VFD_BUSY = 6, + RBBM_VPC_BUSY = 7, + RBBM_UCHE_BUSY = 8, + RBBM_VSC_BUSY = 9, + RBBM_HLSQ_BUSY = 10, + RBBM_ANY_RB_BUSY = 11, + RBBM_ANY_TEX_BUSY = 12, + RBBM_ANY_USP_BUSY = 13, + RBBM_ANY_MARB_BUSY = 14, + RBBM_ANY_ARB_BUSY = 15, + RBBM_AHB_STATUS_BUSY = 16, + RBBM_AHB_STATUS_STALLED = 17, + RBBM_AHB_STATUS_TXFR = 18, + RBBM_AHB_STATUS_TXFR_SPLIT = 19, + RBBM_AHB_STATUS_TXFR_ERROR = 20, + RBBM_AHB_STATUS_LONG_STALL = 21, + RBBM_RBBM_STATUS_MASKED = 22, +}; + enum a3xx_sp_perfcounter_select { + SP_LM_LOAD_INSTRUCTIONS = 0, + SP_LM_STORE_INSTRUCTIONS = 1, + SP_LM_ATOMICS = 2, + SP_UCHE_LOAD_INSTRUCTIONS = 3, + SP_UCHE_STORE_INSTRUCTIONS = 4, + SP_UCHE_ATOMICS = 5, + SP_VS_TEX_INSTRUCTIONS = 6, + SP_VS_CFLOW_INSTRUCTIONS = 7, + SP_VS_EFU_INSTRUCTIONS = 8, + SP_VS_FULL_ALU_INSTRUCTIONS = 9, + SP_VS_HALF_ALU_INSTRUCTIONS = 10, + SP_FS_TEX_INSTRUCTIONS = 11, SP_FS_CFLOW_INSTRUCTIONS = 12, + SP_FS_EFU_INSTRUCTIONS = 13, SP_FS_FULL_ALU_INSTRUCTIONS = 14, - SP0_ICL1_MISSES = 26, + SP_FS_HALF_ALU_INSTRUCTIONS = 15, + SP_FS_BARY_INSTRUCTIONS = 16, + SP_VS_INSTRUCTIONS = 17, + SP_FS_INSTRUCTIONS = 18, + SP_ADDR_LOCK_COUNT = 19, + SP_UCHE_READ_TRANS = 20, + SP_UCHE_WRITE_TRANS = 21, + SP_EXPORT_VPC_TRANS = 22, + SP_EXPORT_RB_TRANS = 23, + SP_PIXELS_KILLED = 24, + SP_ICL1_REQUESTS = 25, + SP_ICL1_MISSES = 26, + SP_ICL0_REQUESTS = 27, + SP_ICL0_MISSES = 28, SP_ALU_ACTIVE_CYCLES = 29, + SP_EFU_ACTIVE_CYCLES = 30, + SP_STALL_CYCLES_BY_VPC = 31, + SP_STALL_CYCLES_BY_TP = 32, + SP_STALL_CYCLES_BY_UCHE = 33, + SP_STALL_CYCLES_BY_RB = 34, + SP_ACTIVE_CYCLES_ANY = 35, + SP_ACTIVE_CYCLES_ALL = 36, +}; + +enum a3xx_tp_perfcounter_select { + TPL1_TPPERF_L1_REQUESTS = 0, + TPL1_TPPERF_TP0_L1_REQUESTS = 1, + TPL1_TPPERF_TP0_L1_MISSES = 2, + TPL1_TPPERF_TP1_L1_REQUESTS = 3, + TPL1_TPPERF_TP1_L1_MISSES = 4, + TPL1_TPPERF_TP2_L1_REQUESTS = 5, + TPL1_TPPERF_TP2_L1_MISSES = 6, + TPL1_TPPERF_TP3_L1_REQUESTS = 7, + TPL1_TPPERF_TP3_L1_MISSES = 8, + TPL1_TPPERF_OUTPUT_TEXELS_POINT = 9, + TPL1_TPPERF_OUTPUT_TEXELS_BILINEAR = 10, + TPL1_TPPERF_OUTPUT_TEXELS_MIP = 11, + TPL1_TPPERF_OUTPUT_TEXELS_ANISO = 12, + TPL1_TPPERF_BILINEAR_OPS = 13, + TPL1_TPPERF_QUADSQUADS_OFFSET = 14, + TPL1_TPPERF_QUADQUADS_SHADOW = 15, + TPL1_TPPERF_QUADS_ARRAY = 16, + TPL1_TPPERF_QUADS_PROJECTION = 17, + TPL1_TPPERF_QUADS_GRADIENT = 18, + TPL1_TPPERF_QUADS_1D2D = 19, + TPL1_TPPERF_QUADS_3DCUBE = 20, + TPL1_TPPERF_ZERO_LOD = 21, + TPL1_TPPERF_OUTPUT_TEXELS = 22, + TPL1_TPPERF_ACTIVE_CYCLES_ANY = 23, + TPL1_TPPERF_ACTIVE_CYCLES_ALL = 24, + TPL1_TPPERF_STALL_CYCLES_BY_ARB = 25, + TPL1_TPPERF_LATENCY = 26, + TPL1_TPPERF_LATENCY_TRANS = 27, +}; + +enum a3xx_vfd_perfcounter_select { + VFD_PERF_UCHE_BYTE_FETCHED = 0, + VFD_PERF_UCHE_TRANS = 1, + VFD_PERF_VPC_BYPASS_COMPONENTS = 2, + VFD_PERF_FETCH_INSTRUCTIONS = 3, + VFD_PERF_DECODE_INSTRUCTIONS = 4, + VFD_PERF_ACTIVE_CYCLES = 5, + VFD_PERF_STALL_CYCLES_UCHE = 6, + VFD_PERF_STALL_CYCLES_HLSQ = 7, + VFD_PERF_STALL_CYCLES_VPC_BYPASS = 8, + VFD_PERF_STALL_CYCLES_VPC_ALLOC = 9, +}; + +enum a3xx_vpc_perfcounter_select { + VPC_PERF_SP_LM_PRIMITIVES = 0, + VPC_PERF_COMPONENTS_FROM_SP = 1, + VPC_PERF_SP_LM_COMPONENTS = 2, + VPC_PERF_ACTIVE_CYCLES = 3, + VPC_PERF_STALL_CYCLES_LM = 4, + VPC_PERF_STALL_CYCLES_RAS = 5, +}; + +enum a3xx_uche_perfcounter_select { + UCHE_UCHEPERF_VBIF_READ_BEATS_TP = 0, + UCHE_UCHEPERF_VBIF_READ_BEATS_VFD = 1, + UCHE_UCHEPERF_VBIF_READ_BEATS_HLSQ = 2, + UCHE_UCHEPERF_VBIF_READ_BEATS_MARB = 3, + UCHE_UCHEPERF_VBIF_READ_BEATS_SP = 4, + UCHE_UCHEPERF_READ_REQUESTS_TP = 8, + UCHE_UCHEPERF_READ_REQUESTS_VFD = 9, + UCHE_UCHEPERF_READ_REQUESTS_HLSQ = 10, + UCHE_UCHEPERF_READ_REQUESTS_MARB = 11, + UCHE_UCHEPERF_READ_REQUESTS_SP = 12, + UCHE_UCHEPERF_WRITE_REQUESTS_MARB = 13, + UCHE_UCHEPERF_WRITE_REQUESTS_SP = 14, + UCHE_UCHEPERF_TAG_CHECK_FAILS = 15, + UCHE_UCHEPERF_EVICTS = 16, + UCHE_UCHEPERF_FLUSHES = 17, + UCHE_UCHEPERF_VBIF_LATENCY_CYCLES = 18, + UCHE_UCHEPERF_VBIF_LATENCY_SAMPLES = 19, + UCHE_UCHEPERF_ACTIVE_CYCLES = 20, }; enum a3xx_rb_blend_opcode { diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 811f58bbba2..8c37992e17d 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -33,6 +33,7 @@ #include "util/u_format.h" #include "freedreno_resource.h" +#include "freedreno_query_hw.h" #include "fd3_emit.h" #include "fd3_blend.h" @@ -888,6 +889,8 @@ fd3_emit_restore(struct fd_context *ctx) fd_wfi(ctx, ring); + fd_hw_query_enable(ctx, ring); + ctx->needs_rb_fbd = true; } diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h index e8df429441e..d6fd1bb583e 100644 --- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h +++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h @@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) -Copyright (C) 2013-2015 by the following authors: +Copyright (C) 2013-2016 by the following authors: - Rob Clark <[email protected]> (robclark) +- Ilia Mirkin <[email protected]> (imirkin) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -271,6 +272,545 @@ enum a4xx_tess_spacing { EVEN_SPACING = 3, }; +enum a4xx_ccu_perfcounter_select { + CCU_BUSY_CYCLES = 0, + CCU_RB_DEPTH_RETURN_STALL = 2, + CCU_RB_COLOR_RETURN_STALL = 3, + CCU_DEPTH_BLOCKS = 6, + CCU_COLOR_BLOCKS = 7, + CCU_DEPTH_BLOCK_HIT = 8, + CCU_COLOR_BLOCK_HIT = 9, + CCU_DEPTH_FLAG1_COUNT = 10, + CCU_DEPTH_FLAG2_COUNT = 11, + CCU_DEPTH_FLAG3_COUNT = 12, + CCU_DEPTH_FLAG4_COUNT = 13, + CCU_COLOR_FLAG1_COUNT = 14, + CCU_COLOR_FLAG2_COUNT = 15, + CCU_COLOR_FLAG3_COUNT = 16, + CCU_COLOR_FLAG4_COUNT = 17, + CCU_PARTIAL_BLOCK_READ = 18, +}; + +enum a4xx_cp_perfcounter_select { + CP_ALWAYS_COUNT = 0, + CP_BUSY = 1, + CP_PFP_IDLE = 2, + CP_PFP_BUSY_WORKING = 3, + CP_PFP_STALL_CYCLES_ANY = 4, + CP_PFP_STARVE_CYCLES_ANY = 5, + CP_PFP_STARVED_PER_LOAD_ADDR = 6, + CP_PFP_STALLED_PER_STORE_ADDR = 7, + CP_PFP_PC_PROFILE = 8, + CP_PFP_MATCH_PM4_PKT_PROFILE = 9, + CP_PFP_COND_INDIRECT_DISCARDED = 10, + CP_LONG_RESUMPTIONS = 11, + CP_RESUME_CYCLES = 12, + CP_RESUME_TO_BOUNDARY_CYCLES = 13, + CP_LONG_PREEMPTIONS = 14, + CP_PREEMPT_CYCLES = 15, + CP_PREEMPT_TO_BOUNDARY_CYCLES = 16, + CP_ME_FIFO_EMPTY_PFP_IDLE = 17, + CP_ME_FIFO_EMPTY_PFP_BUSY = 18, + CP_ME_FIFO_NOT_EMPTY_NOT_FULL = 19, + CP_ME_FIFO_FULL_ME_BUSY = 20, + CP_ME_FIFO_FULL_ME_NON_WORKING = 21, + CP_ME_WAITING_FOR_PACKETS = 22, + CP_ME_BUSY_WORKING = 23, + CP_ME_STARVE_CYCLES_ANY = 24, + CP_ME_STARVE_CYCLES_PER_PROFILE = 25, + CP_ME_STALL_CYCLES_PER_PROFILE = 26, + CP_ME_PC_PROFILE = 27, + CP_RCIU_FIFO_EMPTY = 28, + CP_RCIU_FIFO_NOT_EMPTY_NOT_FULL = 29, + CP_RCIU_FIFO_FULL = 30, + CP_RCIU_FIFO_FULL_NO_CONTEXT = 31, + CP_RCIU_FIFO_FULL_AHB_MASTER = 32, + CP_RCIU_FIFO_FULL_OTHER = 33, + CP_AHB_IDLE = 34, + CP_AHB_STALL_ON_GRANT_NO_SPLIT = 35, + CP_AHB_STALL_ON_GRANT_SPLIT = 36, + CP_AHB_STALL_ON_GRANT_SPLIT_PROFILE = 37, + CP_AHB_BUSY_WORKING = 38, + CP_AHB_BUSY_STALL_ON_HRDY = 39, + CP_AHB_BUSY_STALL_ON_HRDY_PROFILE = 40, +}; + +enum a4xx_gras_ras_perfcounter_select { + RAS_SUPER_TILES = 0, + RAS_8X8_TILES = 1, + RAS_4X4_TILES = 2, + RAS_BUSY_CYCLES = 3, + RAS_STALL_CYCLES_BY_RB = 4, + RAS_STALL_CYCLES_BY_VSC = 5, + RAS_STARVE_CYCLES_BY_TSE = 6, + RAS_SUPERTILE_CYCLES = 7, + RAS_TILE_CYCLES = 8, + RAS_FULLY_COVERED_SUPER_TILES = 9, + RAS_FULLY_COVERED_8X8_TILES = 10, + RAS_4X4_PRIM = 11, + RAS_8X4_4X8_PRIM = 12, + RAS_8X8_PRIM = 13, +}; + +enum a4xx_gras_tse_perfcounter_select { + TSE_INPUT_PRIM = 0, + TSE_INPUT_NULL_PRIM = 1, + TSE_TRIVAL_REJ_PRIM = 2, + TSE_CLIPPED_PRIM = 3, + TSE_NEW_PRIM = 4, + TSE_ZERO_AREA_PRIM = 5, + TSE_FACENESS_CULLED_PRIM = 6, + TSE_ZERO_PIXEL_PRIM = 7, + TSE_OUTPUT_NULL_PRIM = 8, + TSE_OUTPUT_VISIBLE_PRIM = 9, + TSE_PRE_CLIP_PRIM = 10, + TSE_POST_CLIP_PRIM = 11, + TSE_BUSY_CYCLES = 12, + TSE_PC_STARVE = 13, + TSE_RAS_STALL = 14, + TSE_STALL_BARYPLANE_FIFO_FULL = 15, + TSE_STALL_ZPLANE_FIFO_FULL = 16, +}; + +enum a4xx_hlsq_perfcounter_select { + HLSQ_SP_VS_STAGE_CONSTANT = 0, + HLSQ_SP_VS_STAGE_INSTRUCTIONS = 1, + HLSQ_SP_FS_STAGE_CONSTANT = 2, + HLSQ_SP_FS_STAGE_INSTRUCTIONS = 3, + HLSQ_TP_STATE = 4, + HLSQ_QUADS = 5, + HLSQ_PIXELS = 6, + HLSQ_VERTICES = 7, + HLSQ_SP_VS_STAGE_DATA_BYTES = 13, + HLSQ_SP_FS_STAGE_DATA_BYTES = 14, + HLSQ_BUSY_CYCLES = 15, + HLSQ_STALL_CYCLES_SP_STATE = 16, + HLSQ_STALL_CYCLES_SP_VS_STAGE = 17, + HLSQ_STALL_CYCLES_SP_FS_STAGE = 18, + HLSQ_STALL_CYCLES_UCHE = 19, + HLSQ_RBBM_LOAD_CYCLES = 20, + HLSQ_DI_TO_VS_START_SP = 21, + HLSQ_DI_TO_FS_START_SP = 22, + HLSQ_VS_STAGE_START_TO_DONE_SP = 23, + HLSQ_FS_STAGE_START_TO_DONE_SP = 24, + HLSQ_SP_STATE_COPY_CYCLES_VS_STAGE = 25, + HLSQ_SP_STATE_COPY_CYCLES_FS_STAGE = 26, + HLSQ_UCHE_LATENCY_CYCLES = 27, + HLSQ_UCHE_LATENCY_COUNT = 28, + HLSQ_STARVE_CYCLES_VFD = 29, +}; + +enum a4xx_pc_perfcounter_select { + PC_VIS_STREAMS_LOADED = 0, + PC_VPC_PRIMITIVES = 2, + PC_DEAD_PRIM = 3, + PC_LIVE_PRIM = 4, + PC_DEAD_DRAWCALLS = 5, + PC_LIVE_DRAWCALLS = 6, + PC_VERTEX_MISSES = 7, + PC_STALL_CYCLES_VFD = 9, + PC_STALL_CYCLES_TSE = 10, + PC_STALL_CYCLES_UCHE = 11, + PC_WORKING_CYCLES = 12, + PC_IA_VERTICES = 13, + PC_GS_PRIMITIVES = 14, + PC_HS_INVOCATIONS = 15, + PC_DS_INVOCATIONS = 16, + PC_DS_PRIMITIVES = 17, + PC_STARVE_CYCLES_FOR_INDEX = 20, + PC_STARVE_CYCLES_FOR_TESS_FACTOR = 21, + PC_STARVE_CYCLES_FOR_VIZ_STREAM = 22, + PC_STALL_CYCLES_TESS = 23, + PC_STARVE_CYCLES_FOR_POSITION = 24, + PC_MODE0_DRAWCALL = 25, + PC_MODE1_DRAWCALL = 26, + PC_MODE2_DRAWCALL = 27, + PC_MODE3_DRAWCALL = 28, + PC_MODE4_DRAWCALL = 29, + PC_PREDICATED_DEAD_DRAWCALL = 30, + PC_STALL_CYCLES_BY_TSE_ONLY = 31, + PC_STALL_CYCLES_BY_VPC_ONLY = 32, + PC_VPC_POS_DATA_TRANSACTION = 33, + PC_BUSY_CYCLES = 34, + PC_STARVE_CYCLES_DI = 35, + PC_STALL_CYCLES_VPC = 36, + TESS_WORKING_CYCLES = 37, + TESS_NUM_CYCLES_SETUP_WORKING = 38, + TESS_NUM_CYCLES_PTGEN_WORKING = 39, + TESS_NUM_CYCLES_CONNGEN_WORKING = 40, + TESS_BUSY_CYCLES = 41, + TESS_STARVE_CYCLES_PC = 42, + TESS_STALL_CYCLES_PC = 43, +}; + +enum a4xx_pwr_perfcounter_select { + PWR_CORE_CLOCK_CYCLES = 0, + PWR_BUSY_CLOCK_CYCLES = 1, +}; + +enum a4xx_rb_perfcounter_select { + RB_BUSY_CYCLES = 0, + RB_BUSY_CYCLES_BINNING = 1, + RB_BUSY_CYCLES_RENDERING = 2, + RB_BUSY_CYCLES_RESOLVE = 3, + RB_STARVE_CYCLES_BY_SP = 4, + RB_STARVE_CYCLES_BY_RAS = 5, + RB_STARVE_CYCLES_BY_MARB = 6, + RB_STALL_CYCLES_BY_MARB = 7, + RB_STALL_CYCLES_BY_HLSQ = 8, + RB_RB_RB_MARB_DATA = 9, + RB_SP_RB_QUAD = 10, + RB_RAS_RB_Z_QUADS = 11, + RB_GMEM_CH0_READ = 12, + RB_GMEM_CH1_READ = 13, + RB_GMEM_CH0_WRITE = 14, + RB_GMEM_CH1_WRITE = 15, + RB_CP_CONTEXT_DONE = 16, + RB_CP_CACHE_FLUSH = 17, + RB_CP_ZPASS_DONE = 18, + RB_STALL_FIFO0_FULL = 19, + RB_STALL_FIFO1_FULL = 20, + RB_STALL_FIFO2_FULL = 21, + RB_STALL_FIFO3_FULL = 22, + RB_RB_HLSQ_TRANSACTIONS = 23, + RB_Z_READ = 24, + RB_Z_WRITE = 25, + RB_C_READ = 26, + RB_C_WRITE = 27, + RB_C_READ_LATENCY = 28, + RB_Z_READ_LATENCY = 29, + RB_STALL_BY_UCHE = 30, + RB_MARB_UCHE_TRANSACTIONS = 31, + RB_CACHE_STALL_MISS = 32, + RB_CACHE_STALL_FIFO_FULL = 33, + RB_8BIT_BLENDER_UNITS_ACTIVE = 34, + RB_16BIT_BLENDER_UNITS_ACTIVE = 35, + RB_SAMPLER_UNITS_ACTIVE = 36, + RB_TOTAL_PASS = 38, + RB_Z_PASS = 39, + RB_Z_FAIL = 40, + RB_S_FAIL = 41, + RB_POWER0 = 42, + RB_POWER1 = 43, + RB_POWER2 = 44, + RB_POWER3 = 45, + RB_POWER4 = 46, + RB_POWER5 = 47, + RB_POWER6 = 48, + RB_POWER7 = 49, +}; + +enum a4xx_rbbm_perfcounter_select { + RBBM_ALWAYS_ON = 0, + RBBM_VBIF_BUSY = 1, + RBBM_TSE_BUSY = 2, + RBBM_RAS_BUSY = 3, + RBBM_PC_DCALL_BUSY = 4, + RBBM_PC_VSD_BUSY = 5, + RBBM_VFD_BUSY = 6, + RBBM_VPC_BUSY = 7, + RBBM_UCHE_BUSY = 8, + RBBM_VSC_BUSY = 9, + RBBM_HLSQ_BUSY = 10, + RBBM_ANY_RB_BUSY = 11, + RBBM_ANY_TPL1_BUSY = 12, + RBBM_ANY_SP_BUSY = 13, + RBBM_ANY_MARB_BUSY = 14, + RBBM_ANY_ARB_BUSY = 15, + RBBM_AHB_STATUS_BUSY = 16, + RBBM_AHB_STATUS_STALLED = 17, + RBBM_AHB_STATUS_TXFR = 18, + RBBM_AHB_STATUS_TXFR_SPLIT = 19, + RBBM_AHB_STATUS_TXFR_ERROR = 20, + RBBM_AHB_STATUS_LONG_STALL = 21, + RBBM_STATUS_MASKED = 22, + RBBM_CP_BUSY_GFX_CORE_IDLE = 23, + RBBM_TESS_BUSY = 24, + RBBM_COM_BUSY = 25, + RBBM_DCOM_BUSY = 32, + RBBM_ANY_CCU_BUSY = 33, + RBBM_DPM_BUSY = 34, +}; + +enum a4xx_sp_perfcounter_select { + SP_LM_LOAD_INSTRUCTIONS = 0, + SP_LM_STORE_INSTRUCTIONS = 1, + SP_LM_ATOMICS = 2, + SP_GM_LOAD_INSTRUCTIONS = 3, + SP_GM_STORE_INSTRUCTIONS = 4, + SP_GM_ATOMICS = 5, + SP_VS_STAGE_TEX_INSTRUCTIONS = 6, + SP_VS_STAGE_CFLOW_INSTRUCTIONS = 7, + SP_VS_STAGE_EFU_INSTRUCTIONS = 8, + SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 9, + SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 10, + SP_FS_STAGE_TEX_INSTRUCTIONS = 11, + SP_FS_STAGE_CFLOW_INSTRUCTIONS = 12, + SP_FS_STAGE_EFU_INSTRUCTIONS = 13, + SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 14, + SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 15, + SP_VS_INSTRUCTIONS = 17, + SP_FS_INSTRUCTIONS = 18, + SP_ADDR_LOCK_COUNT = 19, + SP_UCHE_READ_TRANS = 20, + SP_UCHE_WRITE_TRANS = 21, + SP_EXPORT_VPC_TRANS = 22, + SP_EXPORT_RB_TRANS = 23, + SP_PIXELS_KILLED = 24, + SP_ICL1_REQUESTS = 25, + SP_ICL1_MISSES = 26, + SP_ICL0_REQUESTS = 27, + SP_ICL0_MISSES = 28, + SP_ALU_WORKING_CYCLES = 29, + SP_EFU_WORKING_CYCLES = 30, + SP_STALL_CYCLES_BY_VPC = 31, + SP_STALL_CYCLES_BY_TP = 32, + SP_STALL_CYCLES_BY_UCHE = 33, + SP_STALL_CYCLES_BY_RB = 34, + SP_BUSY_CYCLES = 35, + SP_HS_INSTRUCTIONS = 36, + SP_DS_INSTRUCTIONS = 37, + SP_GS_INSTRUCTIONS = 38, + SP_CS_INSTRUCTIONS = 39, + SP_SCHEDULER_NON_WORKING = 40, + SP_WAVE_CONTEXTS = 41, + SP_WAVE_CONTEXT_CYCLES = 42, + SP_POWER0 = 43, + SP_POWER1 = 44, + SP_POWER2 = 45, + SP_POWER3 = 46, + SP_POWER4 = 47, + SP_POWER5 = 48, + SP_POWER6 = 49, + SP_POWER7 = 50, + SP_POWER8 = 51, + SP_POWER9 = 52, + SP_POWER10 = 53, + SP_POWER11 = 54, + SP_POWER12 = 55, + SP_POWER13 = 56, + SP_POWER14 = 57, + SP_POWER15 = 58, +}; + +enum a4xx_tp_perfcounter_select { + TP_L1_REQUESTS = 0, + TP_L1_MISSES = 1, + TP_QUADS_OFFSET = 8, + TP_QUAD_SHADOW = 9, + TP_QUADS_ARRAY = 10, + TP_QUADS_GRADIENT = 11, + TP_QUADS_1D2D = 12, + TP_QUADS_3DCUBE = 13, + TP_BUSY_CYCLES = 16, + TP_STALL_CYCLES_BY_ARB = 17, + TP_STATE_CACHE_REQUESTS = 20, + TP_STATE_CACHE_MISSES = 21, + TP_POWER0 = 22, + TP_POWER1 = 23, + TP_POWER2 = 24, + TP_POWER3 = 25, + TP_POWER4 = 26, + TP_POWER5 = 27, + TP_POWER6 = 28, + TP_POWER7 = 29, +}; + +enum a4xx_uche_perfcounter_select { + UCHE_VBIF_READ_BEATS_TP = 0, + UCHE_VBIF_READ_BEATS_VFD = 1, + UCHE_VBIF_READ_BEATS_HLSQ = 2, + UCHE_VBIF_READ_BEATS_MARB = 3, + UCHE_VBIF_READ_BEATS_SP = 4, + UCHE_READ_REQUESTS_TP = 5, + UCHE_READ_REQUESTS_VFD = 6, + UCHE_READ_REQUESTS_HLSQ = 7, + UCHE_READ_REQUESTS_MARB = 8, + UCHE_READ_REQUESTS_SP = 9, + UCHE_WRITE_REQUESTS_MARB = 10, + UCHE_WRITE_REQUESTS_SP = 11, + UCHE_TAG_CHECK_FAILS = 12, + UCHE_EVICTS = 13, + UCHE_FLUSHES = 14, + UCHE_VBIF_LATENCY_CYCLES = 15, + UCHE_VBIF_LATENCY_SAMPLES = 16, + UCHE_BUSY_CYCLES = 17, + UCHE_VBIF_READ_BEATS_PC = 18, + UCHE_READ_REQUESTS_PC = 19, + UCHE_WRITE_REQUESTS_VPC = 20, + UCHE_STALL_BY_VBIF = 21, + UCHE_WRITE_REQUESTS_VSC = 22, + UCHE_POWER0 = 23, + UCHE_POWER1 = 24, + UCHE_POWER2 = 25, + UCHE_POWER3 = 26, + UCHE_POWER4 = 27, + UCHE_POWER5 = 28, + UCHE_POWER6 = 29, + UCHE_POWER7 = 30, +}; + +enum a4xx_vbif_perfcounter_select { + AXI_READ_REQUESTS_ID_0 = 0, + AXI_READ_REQUESTS_ID_1 = 1, + AXI_READ_REQUESTS_ID_2 = 2, + AXI_READ_REQUESTS_ID_3 = 3, + AXI_READ_REQUESTS_ID_4 = 4, + AXI_READ_REQUESTS_ID_5 = 5, + AXI_READ_REQUESTS_ID_6 = 6, + AXI_READ_REQUESTS_ID_7 = 7, + AXI_READ_REQUESTS_ID_8 = 8, + AXI_READ_REQUESTS_ID_9 = 9, + AXI_READ_REQUESTS_ID_10 = 10, + AXI_READ_REQUESTS_ID_11 = 11, + AXI_READ_REQUESTS_ID_12 = 12, + AXI_READ_REQUESTS_ID_13 = 13, + AXI_READ_REQUESTS_ID_14 = 14, + AXI_READ_REQUESTS_ID_15 = 15, + AXI0_READ_REQUESTS_TOTAL = 16, + AXI1_READ_REQUESTS_TOTAL = 17, + AXI2_READ_REQUESTS_TOTAL = 18, + AXI3_READ_REQUESTS_TOTAL = 19, + AXI_READ_REQUESTS_TOTAL = 20, + AXI_WRITE_REQUESTS_ID_0 = 21, + AXI_WRITE_REQUESTS_ID_1 = 22, + AXI_WRITE_REQUESTS_ID_2 = 23, + AXI_WRITE_REQUESTS_ID_3 = 24, + AXI_WRITE_REQUESTS_ID_4 = 25, + AXI_WRITE_REQUESTS_ID_5 = 26, + AXI_WRITE_REQUESTS_ID_6 = 27, + AXI_WRITE_REQUESTS_ID_7 = 28, + AXI_WRITE_REQUESTS_ID_8 = 29, + AXI_WRITE_REQUESTS_ID_9 = 30, + AXI_WRITE_REQUESTS_ID_10 = 31, + AXI_WRITE_REQUESTS_ID_11 = 32, + AXI_WRITE_REQUESTS_ID_12 = 33, + AXI_WRITE_REQUESTS_ID_13 = 34, + AXI_WRITE_REQUESTS_ID_14 = 35, + AXI_WRITE_REQUESTS_ID_15 = 36, + AXI0_WRITE_REQUESTS_TOTAL = 37, + AXI1_WRITE_REQUESTS_TOTAL = 38, + AXI2_WRITE_REQUESTS_TOTAL = 39, + AXI3_WRITE_REQUESTS_TOTAL = 40, + AXI_WRITE_REQUESTS_TOTAL = 41, + AXI_TOTAL_REQUESTS = 42, + AXI_READ_DATA_BEATS_ID_0 = 43, + AXI_READ_DATA_BEATS_ID_1 = 44, + AXI_READ_DATA_BEATS_ID_2 = 45, + AXI_READ_DATA_BEATS_ID_3 = 46, + AXI_READ_DATA_BEATS_ID_4 = 47, + AXI_READ_DATA_BEATS_ID_5 = 48, + AXI_READ_DATA_BEATS_ID_6 = 49, + AXI_READ_DATA_BEATS_ID_7 = 50, + AXI_READ_DATA_BEATS_ID_8 = 51, + AXI_READ_DATA_BEATS_ID_9 = 52, + AXI_READ_DATA_BEATS_ID_10 = 53, + AXI_READ_DATA_BEATS_ID_11 = 54, + AXI_READ_DATA_BEATS_ID_12 = 55, + AXI_READ_DATA_BEATS_ID_13 = 56, + AXI_READ_DATA_BEATS_ID_14 = 57, + AXI_READ_DATA_BEATS_ID_15 = 58, + AXI0_READ_DATA_BEATS_TOTAL = 59, + AXI1_READ_DATA_BEATS_TOTAL = 60, + AXI2_READ_DATA_BEATS_TOTAL = 61, + AXI3_READ_DATA_BEATS_TOTAL = 62, + AXI_READ_DATA_BEATS_TOTAL = 63, + AXI_WRITE_DATA_BEATS_ID_0 = 64, + AXI_WRITE_DATA_BEATS_ID_1 = 65, + AXI_WRITE_DATA_BEATS_ID_2 = 66, + AXI_WRITE_DATA_BEATS_ID_3 = 67, + AXI_WRITE_DATA_BEATS_ID_4 = 68, + AXI_WRITE_DATA_BEATS_ID_5 = 69, + AXI_WRITE_DATA_BEATS_ID_6 = 70, + AXI_WRITE_DATA_BEATS_ID_7 = 71, + AXI_WRITE_DATA_BEATS_ID_8 = 72, + AXI_WRITE_DATA_BEATS_ID_9 = 73, + AXI_WRITE_DATA_BEATS_ID_10 = 74, + AXI_WRITE_DATA_BEATS_ID_11 = 75, + AXI_WRITE_DATA_BEATS_ID_12 = 76, + AXI_WRITE_DATA_BEATS_ID_13 = 77, + AXI_WRITE_DATA_BEATS_ID_14 = 78, + AXI_WRITE_DATA_BEATS_ID_15 = 79, + AXI0_WRITE_DATA_BEATS_TOTAL = 80, + AXI1_WRITE_DATA_BEATS_TOTAL = 81, + AXI2_WRITE_DATA_BEATS_TOTAL = 82, + AXI3_WRITE_DATA_BEATS_TOTAL = 83, + AXI_WRITE_DATA_BEATS_TOTAL = 84, + AXI_DATA_BEATS_TOTAL = 85, + CYCLES_HELD_OFF_ID_0 = 86, + CYCLES_HELD_OFF_ID_1 = 87, + CYCLES_HELD_OFF_ID_2 = 88, + CYCLES_HELD_OFF_ID_3 = 89, + CYCLES_HELD_OFF_ID_4 = 90, + CYCLES_HELD_OFF_ID_5 = 91, + CYCLES_HELD_OFF_ID_6 = 92, + CYCLES_HELD_OFF_ID_7 = 93, + CYCLES_HELD_OFF_ID_8 = 94, + CYCLES_HELD_OFF_ID_9 = 95, + CYCLES_HELD_OFF_ID_10 = 96, + CYCLES_HELD_OFF_ID_11 = 97, + CYCLES_HELD_OFF_ID_12 = 98, + CYCLES_HELD_OFF_ID_13 = 99, + CYCLES_HELD_OFF_ID_14 = 100, + CYCLES_HELD_OFF_ID_15 = 101, + AXI_READ_REQUEST_HELD_OFF = 102, + AXI_WRITE_REQUEST_HELD_OFF = 103, + AXI_REQUEST_HELD_OFF = 104, + AXI_WRITE_DATA_HELD_OFF = 105, + OCMEM_AXI_READ_REQUEST_HELD_OFF = 106, + OCMEM_AXI_WRITE_REQUEST_HELD_OFF = 107, + OCMEM_AXI_REQUEST_HELD_OFF = 108, + OCMEM_AXI_WRITE_DATA_HELD_OFF = 109, + ELAPSED_CYCLES_DDR = 110, + ELAPSED_CYCLES_OCMEM = 111, +}; + +enum a4xx_vfd_perfcounter_select { + VFD_UCHE_BYTE_FETCHED = 0, + VFD_UCHE_TRANS = 1, + VFD_FETCH_INSTRUCTIONS = 3, + VFD_BUSY_CYCLES = 5, + VFD_STALL_CYCLES_UCHE = 6, + VFD_STALL_CYCLES_HLSQ = 7, + VFD_STALL_CYCLES_VPC_BYPASS = 8, + VFD_STALL_CYCLES_VPC_ALLOC = 9, + VFD_MODE_0_FIBERS = 13, + VFD_MODE_1_FIBERS = 14, + VFD_MODE_2_FIBERS = 15, + VFD_MODE_3_FIBERS = 16, + VFD_MODE_4_FIBERS = 17, + VFD_BFIFO_STALL = 18, + VFD_NUM_VERTICES_TOTAL = 19, + VFD_PACKER_FULL = 20, + VFD_UCHE_REQUEST_FIFO_FULL = 21, + VFD_STARVE_CYCLES_PC = 22, + VFD_STARVE_CYCLES_UCHE = 23, +}; + +enum a4xx_vpc_perfcounter_select { + VPC_SP_LM_COMPONENTS = 2, + VPC_SP0_LM_BYTES = 3, + VPC_SP1_LM_BYTES = 4, + VPC_SP2_LM_BYTES = 5, + VPC_SP3_LM_BYTES = 6, + VPC_WORKING_CYCLES = 7, + VPC_STALL_CYCLES_LM = 8, + VPC_STARVE_CYCLES_RAS = 9, + VPC_STREAMOUT_CYCLES = 10, + VPC_UCHE_TRANSACTIONS = 12, + VPC_STALL_CYCLES_UCHE = 13, + VPC_BUSY_CYCLES = 14, + VPC_STARVE_CYCLES_SP = 15, +}; + +enum a4xx_vsc_perfcounter_select { + VSC_BUSY_CYCLES = 0, + VSC_WORKING_CYCLES = 1, + VSC_STALL_CYCLES_UCHE = 2, + VSC_STARVE_CYCLES_RAS = 3, + VSC_EOT_NUM = 4, +}; + enum a4xx_tex_filter { A4XX_TEX_NEAREST = 0, A4XX_TEX_LINEAR = 1, @@ -357,6 +897,12 @@ static inline uint32_t A4XX_CGC_HLSQ_EARLY_CYC(uint32_t val) #define REG_A4XX_RB_PERFCTR_RB_SEL_7 0x00000cce +#define REG_A4XX_RB_PERFCTR_CCU_SEL_0 0x00000ccf + +#define REG_A4XX_RB_PERFCTR_CCU_SEL_1 0x00000cd0 + +#define REG_A4XX_RB_PERFCTR_CCU_SEL_2 0x00000cd1 + #define REG_A4XX_RB_PERFCTR_CCU_SEL_3 0x00000cd2 #define REG_A4XX_RB_FRAME_BUFFER_DIMENSION 0x00000ce0 @@ -1070,6 +1616,380 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP_REG(uint32_t i0) { return 0x #define REG_A4XX_RBBM_PERFCTR_CP_0_LO 0x0000009c +#define REG_A4XX_RBBM_PERFCTR_CP_0_HI 0x0000009d + +#define REG_A4XX_RBBM_PERFCTR_CP_1_LO 0x0000009e + +#define REG_A4XX_RBBM_PERFCTR_CP_1_HI 0x0000009f + +#define REG_A4XX_RBBM_PERFCTR_CP_2_LO 0x000000a0 + +#define REG_A4XX_RBBM_PERFCTR_CP_2_HI 0x000000a1 + +#define REG_A4XX_RBBM_PERFCTR_CP_3_LO 0x000000a2 + +#define REG_A4XX_RBBM_PERFCTR_CP_3_HI 0x000000a3 + +#define REG_A4XX_RBBM_PERFCTR_CP_4_LO 0x000000a4 + +#define REG_A4XX_RBBM_PERFCTR_CP_4_HI 0x000000a5 + +#define REG_A4XX_RBBM_PERFCTR_CP_5_LO 0x000000a6 + +#define REG_A4XX_RBBM_PERFCTR_CP_5_HI 0x000000a7 + +#define REG_A4XX_RBBM_PERFCTR_CP_6_LO 0x000000a8 + +#define REG_A4XX_RBBM_PERFCTR_CP_6_HI 0x000000a9 + +#define REG_A4XX_RBBM_PERFCTR_CP_7_LO 0x000000aa + +#define REG_A4XX_RBBM_PERFCTR_CP_7_HI 0x000000ab + +#define REG_A4XX_RBBM_PERFCTR_RBBM_0_LO 0x000000ac + +#define REG_A4XX_RBBM_PERFCTR_RBBM_0_HI 0x000000ad + +#define REG_A4XX_RBBM_PERFCTR_RBBM_1_LO 0x000000ae + +#define REG_A4XX_RBBM_PERFCTR_RBBM_1_HI 0x000000af + +#define REG_A4XX_RBBM_PERFCTR_RBBM_2_LO 0x000000b0 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_2_HI 0x000000b1 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_3_LO 0x000000b2 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_3_HI 0x000000b3 + +#define REG_A4XX_RBBM_PERFCTR_PC_0_LO 0x000000b4 + +#define REG_A4XX_RBBM_PERFCTR_PC_0_HI 0x000000b5 + +#define REG_A4XX_RBBM_PERFCTR_PC_1_LO 0x000000b6 + +#define REG_A4XX_RBBM_PERFCTR_PC_1_HI 0x000000b7 + +#define REG_A4XX_RBBM_PERFCTR_PC_2_LO 0x000000b8 + +#define REG_A4XX_RBBM_PERFCTR_PC_2_HI 0x000000b9 + +#define REG_A4XX_RBBM_PERFCTR_PC_3_LO 0x000000ba + +#define REG_A4XX_RBBM_PERFCTR_PC_3_HI 0x000000bb + +#define REG_A4XX_RBBM_PERFCTR_PC_4_LO 0x000000bc + +#define REG_A4XX_RBBM_PERFCTR_PC_4_HI 0x000000bd + +#define REG_A4XX_RBBM_PERFCTR_PC_5_LO 0x000000be + +#define REG_A4XX_RBBM_PERFCTR_PC_5_HI 0x000000bf + +#define REG_A4XX_RBBM_PERFCTR_PC_6_LO 0x000000c0 + +#define REG_A4XX_RBBM_PERFCTR_PC_6_HI 0x000000c1 + +#define REG_A4XX_RBBM_PERFCTR_PC_7_LO 0x000000c2 + +#define REG_A4XX_RBBM_PERFCTR_PC_7_HI 0x000000c3 + +#define REG_A4XX_RBBM_PERFCTR_VFD_0_LO 0x000000c4 + +#define REG_A4XX_RBBM_PERFCTR_VFD_0_HI 0x000000c5 + +#define REG_A4XX_RBBM_PERFCTR_VFD_1_LO 0x000000c6 + +#define REG_A4XX_RBBM_PERFCTR_VFD_1_HI 0x000000c7 + +#define REG_A4XX_RBBM_PERFCTR_VFD_2_LO 0x000000c8 + +#define REG_A4XX_RBBM_PERFCTR_VFD_2_HI 0x000000c9 + +#define REG_A4XX_RBBM_PERFCTR_VFD_3_LO 0x000000ca + +#define REG_A4XX_RBBM_PERFCTR_VFD_3_HI 0x000000cb + +#define REG_A4XX_RBBM_PERFCTR_VFD_4_LO 0x000000cc + +#define REG_A4XX_RBBM_PERFCTR_VFD_4_HI 0x000000cd + +#define REG_A4XX_RBBM_PERFCTR_VFD_5_LO 0x000000ce + +#define REG_A4XX_RBBM_PERFCTR_VFD_5_HI 0x000000cf + +#define REG_A4XX_RBBM_PERFCTR_VFD_6_LO 0x000000d0 + +#define REG_A4XX_RBBM_PERFCTR_VFD_6_HI 0x000000d1 + +#define REG_A4XX_RBBM_PERFCTR_VFD_7_LO 0x000000d2 + +#define REG_A4XX_RBBM_PERFCTR_VFD_7_HI 0x000000d3 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_LO 0x000000d4 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_HI 0x000000d5 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_LO 0x000000d6 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_HI 0x000000d7 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_LO 0x000000d8 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_HI 0x000000d9 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_LO 0x000000da + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_HI 0x000000db + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_LO 0x000000dc + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_HI 0x000000dd + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_LO 0x000000de + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_HI 0x000000df + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_LO 0x000000e0 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_HI 0x000000e1 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_LO 0x000000e2 + +#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_HI 0x000000e3 + +#define REG_A4XX_RBBM_PERFCTR_VPC_0_LO 0x000000e4 + +#define REG_A4XX_RBBM_PERFCTR_VPC_0_HI 0x000000e5 + +#define REG_A4XX_RBBM_PERFCTR_VPC_1_LO 0x000000e6 + +#define REG_A4XX_RBBM_PERFCTR_VPC_1_HI 0x000000e7 + +#define REG_A4XX_RBBM_PERFCTR_VPC_2_LO 0x000000e8 + +#define REG_A4XX_RBBM_PERFCTR_VPC_2_HI 0x000000e9 + +#define REG_A4XX_RBBM_PERFCTR_VPC_3_LO 0x000000ea + +#define REG_A4XX_RBBM_PERFCTR_VPC_3_HI 0x000000eb + +#define REG_A4XX_RBBM_PERFCTR_CCU_0_LO 0x000000ec + +#define REG_A4XX_RBBM_PERFCTR_CCU_0_HI 0x000000ed + +#define REG_A4XX_RBBM_PERFCTR_CCU_1_LO 0x000000ee + +#define REG_A4XX_RBBM_PERFCTR_CCU_1_HI 0x000000ef + +#define REG_A4XX_RBBM_PERFCTR_CCU_2_LO 0x000000f0 + +#define REG_A4XX_RBBM_PERFCTR_CCU_2_HI 0x000000f1 + +#define REG_A4XX_RBBM_PERFCTR_CCU_3_LO 0x000000f2 + +#define REG_A4XX_RBBM_PERFCTR_CCU_3_HI 0x000000f3 + +#define REG_A4XX_RBBM_PERFCTR_TSE_0_LO 0x000000f4 + +#define REG_A4XX_RBBM_PERFCTR_TSE_0_HI 0x000000f5 + +#define REG_A4XX_RBBM_PERFCTR_TSE_1_LO 0x000000f6 + +#define REG_A4XX_RBBM_PERFCTR_TSE_1_HI 0x000000f7 + +#define REG_A4XX_RBBM_PERFCTR_TSE_2_LO 0x000000f8 + +#define REG_A4XX_RBBM_PERFCTR_TSE_2_HI 0x000000f9 + +#define REG_A4XX_RBBM_PERFCTR_TSE_3_LO 0x000000fa + +#define REG_A4XX_RBBM_PERFCTR_TSE_3_HI 0x000000fb + +#define REG_A4XX_RBBM_PERFCTR_RAS_0_LO 0x000000fc + +#define REG_A4XX_RBBM_PERFCTR_RAS_0_HI 0x000000fd + +#define REG_A4XX_RBBM_PERFCTR_RAS_1_LO 0x000000fe + +#define REG_A4XX_RBBM_PERFCTR_RAS_1_HI 0x000000ff + +#define REG_A4XX_RBBM_PERFCTR_RAS_2_LO 0x00000100 + +#define REG_A4XX_RBBM_PERFCTR_RAS_2_HI 0x00000101 + +#define REG_A4XX_RBBM_PERFCTR_RAS_3_LO 0x00000102 + +#define REG_A4XX_RBBM_PERFCTR_RAS_3_HI 0x00000103 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_0_LO 0x00000104 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_0_HI 0x00000105 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_1_LO 0x00000106 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_1_HI 0x00000107 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_2_LO 0x00000108 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_2_HI 0x00000109 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_3_LO 0x0000010a + +#define REG_A4XX_RBBM_PERFCTR_UCHE_3_HI 0x0000010b + +#define REG_A4XX_RBBM_PERFCTR_UCHE_4_LO 0x0000010c + +#define REG_A4XX_RBBM_PERFCTR_UCHE_4_HI 0x0000010d + +#define REG_A4XX_RBBM_PERFCTR_UCHE_5_LO 0x0000010e + +#define REG_A4XX_RBBM_PERFCTR_UCHE_5_HI 0x0000010f + +#define REG_A4XX_RBBM_PERFCTR_UCHE_6_LO 0x00000110 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_6_HI 0x00000111 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_7_LO 0x00000112 + +#define REG_A4XX_RBBM_PERFCTR_UCHE_7_HI 0x00000113 + +#define REG_A4XX_RBBM_PERFCTR_TP_0_LO 0x00000114 + +#define REG_A4XX_RBBM_PERFCTR_TP_0_HI 0x00000115 + +#define REG_A4XX_RBBM_PERFCTR_TP_0_LO 0x00000114 + +#define REG_A4XX_RBBM_PERFCTR_TP_0_HI 0x00000115 + +#define REG_A4XX_RBBM_PERFCTR_TP_1_LO 0x00000116 + +#define REG_A4XX_RBBM_PERFCTR_TP_1_HI 0x00000117 + +#define REG_A4XX_RBBM_PERFCTR_TP_2_LO 0x00000118 + +#define REG_A4XX_RBBM_PERFCTR_TP_2_HI 0x00000119 + +#define REG_A4XX_RBBM_PERFCTR_TP_3_LO 0x0000011a + +#define REG_A4XX_RBBM_PERFCTR_TP_3_HI 0x0000011b + +#define REG_A4XX_RBBM_PERFCTR_TP_4_LO 0x0000011c + +#define REG_A4XX_RBBM_PERFCTR_TP_4_HI 0x0000011d + +#define REG_A4XX_RBBM_PERFCTR_TP_5_LO 0x0000011e + +#define REG_A4XX_RBBM_PERFCTR_TP_5_HI 0x0000011f + +#define REG_A4XX_RBBM_PERFCTR_TP_6_LO 0x00000120 + +#define REG_A4XX_RBBM_PERFCTR_TP_6_HI 0x00000121 + +#define REG_A4XX_RBBM_PERFCTR_TP_7_LO 0x00000122 + +#define REG_A4XX_RBBM_PERFCTR_TP_7_HI 0x00000123 + +#define REG_A4XX_RBBM_PERFCTR_SP_0_LO 0x00000124 + +#define REG_A4XX_RBBM_PERFCTR_SP_0_HI 0x00000125 + +#define REG_A4XX_RBBM_PERFCTR_SP_1_LO 0x00000126 + +#define REG_A4XX_RBBM_PERFCTR_SP_1_HI 0x00000127 + +#define REG_A4XX_RBBM_PERFCTR_SP_2_LO 0x00000128 + +#define REG_A4XX_RBBM_PERFCTR_SP_2_HI 0x00000129 + +#define REG_A4XX_RBBM_PERFCTR_SP_3_LO 0x0000012a + +#define REG_A4XX_RBBM_PERFCTR_SP_3_HI 0x0000012b + +#define REG_A4XX_RBBM_PERFCTR_SP_4_LO 0x0000012c + +#define REG_A4XX_RBBM_PERFCTR_SP_4_HI 0x0000012d + +#define REG_A4XX_RBBM_PERFCTR_SP_5_LO 0x0000012e + +#define REG_A4XX_RBBM_PERFCTR_SP_5_HI 0x0000012f + +#define REG_A4XX_RBBM_PERFCTR_SP_6_LO 0x00000130 + +#define REG_A4XX_RBBM_PERFCTR_SP_6_HI 0x00000131 + +#define REG_A4XX_RBBM_PERFCTR_SP_7_LO 0x00000132 + +#define REG_A4XX_RBBM_PERFCTR_SP_7_HI 0x00000133 + +#define REG_A4XX_RBBM_PERFCTR_SP_8_LO 0x00000134 + +#define REG_A4XX_RBBM_PERFCTR_SP_8_HI 0x00000135 + +#define REG_A4XX_RBBM_PERFCTR_SP_9_LO 0x00000136 + +#define REG_A4XX_RBBM_PERFCTR_SP_9_HI 0x00000137 + +#define REG_A4XX_RBBM_PERFCTR_SP_10_LO 0x00000138 + +#define REG_A4XX_RBBM_PERFCTR_SP_10_HI 0x00000139 + +#define REG_A4XX_RBBM_PERFCTR_SP_11_LO 0x0000013a + +#define REG_A4XX_RBBM_PERFCTR_SP_11_HI 0x0000013b + +#define REG_A4XX_RBBM_PERFCTR_RB_0_LO 0x0000013c + +#define REG_A4XX_RBBM_PERFCTR_RB_0_HI 0x0000013d + +#define REG_A4XX_RBBM_PERFCTR_RB_1_LO 0x0000013e + +#define REG_A4XX_RBBM_PERFCTR_RB_1_HI 0x0000013f + +#define REG_A4XX_RBBM_PERFCTR_RB_2_LO 0x00000140 + +#define REG_A4XX_RBBM_PERFCTR_RB_2_HI 0x00000141 + +#define REG_A4XX_RBBM_PERFCTR_RB_3_LO 0x00000142 + +#define REG_A4XX_RBBM_PERFCTR_RB_3_HI 0x00000143 + +#define REG_A4XX_RBBM_PERFCTR_RB_4_LO 0x00000144 + +#define REG_A4XX_RBBM_PERFCTR_RB_4_HI 0x00000145 + +#define REG_A4XX_RBBM_PERFCTR_RB_5_LO 0x00000146 + +#define REG_A4XX_RBBM_PERFCTR_RB_5_HI 0x00000147 + +#define REG_A4XX_RBBM_PERFCTR_RB_6_LO 0x00000148 + +#define REG_A4XX_RBBM_PERFCTR_RB_6_HI 0x00000149 + +#define REG_A4XX_RBBM_PERFCTR_RB_7_LO 0x0000014a + +#define REG_A4XX_RBBM_PERFCTR_RB_7_HI 0x0000014b + +#define REG_A4XX_RBBM_PERFCTR_VSC_0_LO 0x0000014c + +#define REG_A4XX_RBBM_PERFCTR_VSC_0_HI 0x0000014d + +#define REG_A4XX_RBBM_PERFCTR_VSC_1_LO 0x0000014e + +#define REG_A4XX_RBBM_PERFCTR_VSC_1_HI 0x0000014f + +#define REG_A4XX_RBBM_PERFCTR_PWR_0_LO 0x00000166 + +#define REG_A4XX_RBBM_PERFCTR_PWR_0_HI 0x00000167 + +#define REG_A4XX_RBBM_PERFCTR_PWR_1_LO 0x00000168 + +#define REG_A4XX_RBBM_PERFCTR_PWR_1_HI 0x00000169 + +#define REG_A4XX_RBBM_ALWAYSON_COUNTER_LO 0x0000016e + +#define REG_A4XX_RBBM_ALWAYSON_COUNTER_HI 0x0000016f + static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP(uint32_t i0) { return 0x00000068 + 0x1*i0; } static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP_REG(uint32_t i0) { return 0x00000068 + 0x1*i0; } @@ -1136,6 +2056,14 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0) #define REG_A4XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x00000175 +#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_0 0x00000176 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_1 0x00000177 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_2 0x00000178 + +#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_3 0x00000179 + #define REG_A4XX_RBBM_GPU_BUSY_MASKED 0x0000017a #define REG_A4XX_RBBM_INT_0_STATUS 0x0000017d @@ -1272,6 +2200,20 @@ static inline uint32_t REG_A4XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000240 #define REG_A4XX_CP_PERFCTR_CP_SEL_0 0x00000500 +#define REG_A4XX_CP_PERFCTR_CP_SEL_1 0x00000501 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_2 0x00000502 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_3 0x00000503 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_4 0x00000504 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_5 0x00000505 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_6 0x00000506 + +#define REG_A4XX_CP_PERFCTR_CP_SEL_7 0x00000507 + #define REG_A4XX_CP_PERFCOMBINER_SELECT 0x0000050b static inline uint32_t REG_A4XX_CP_SCRATCH(uint32_t i0) { return 0x00000578 + 0x1*i0; } @@ -1802,6 +2744,12 @@ static inline uint32_t A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) #define REG_A4XX_VPC_DEBUG_ECO_CONTROL 0x00000e64 +#define REG_A4XX_VPC_PERFCTR_VPC_SEL_0 0x00000e65 + +#define REG_A4XX_VPC_PERFCTR_VPC_SEL_1 0x00000e66 + +#define REG_A4XX_VPC_PERFCTR_VPC_SEL_2 0x00000e67 + #define REG_A4XX_VPC_PERFCTR_VPC_SEL_3 0x00000e68 #define REG_A4XX_VPC_ATTR 0x00002140 @@ -1914,6 +2862,20 @@ static inline uint32_t REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0 #define REG_A4XX_VFD_DEBUG_CONTROL 0x00000e40 +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_0 0x00000e43 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_1 0x00000e44 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_2 0x00000e45 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_3 0x00000e46 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_4 0x00000e47 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_5 0x00000e48 + +#define REG_A4XX_VFD_PERFCTR_VFD_SEL_6 0x00000e49 + #define REG_A4XX_VFD_PERFCTR_VFD_SEL_7 0x00000e4a #define REG_A4XX_VGT_CL_INITIATOR 0x000021d0 @@ -2070,6 +3032,20 @@ static inline uint32_t A4XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val) #define REG_A4XX_TPL1_TP_MODE_CONTROL 0x00000f03 +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_0 0x00000f04 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_1 0x00000f05 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_2 0x00000f06 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_3 0x00000f07 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_4 0x00000f08 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_5 0x00000f09 + +#define REG_A4XX_TPL1_PERFCTR_TP_SEL_6 0x00000f0a + #define REG_A4XX_TPL1_PERFCTR_TP_SEL_7 0x00000f0b #define REG_A4XX_TPL1_TP_TEX_OFFSET 0x00002380 @@ -2124,8 +3100,20 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val) #define REG_A4XX_GRAS_PERFCTR_TSE_SEL_0 0x00000c88 +#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_1 0x00000c89 + +#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_2 0x00000c8a + #define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3 0x00000c8b +#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_0 0x00000c8c + +#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_1 0x00000c8d + +#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_2 0x00000c8e + +#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_3 0x00000c8f + #define REG_A4XX_GRAS_CL_CLIP_CNTL 0x00002000 #define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00008000 #define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000 @@ -2391,6 +3379,20 @@ static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val) #define REG_A4XX_UCHE_CACHE_WAYS_VFD 0x00000e8c +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_0 0x00000e8e + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_1 0x00000e8f + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_2 0x00000e90 + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_3 0x00000e91 + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_4 0x00000e92 + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_5 0x00000e93 + +#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_6 0x00000e94 + #define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_7 0x00000e95 #define REG_A4XX_HLSQ_TIMEOUT_THRESHOLD 0x00000e00 @@ -2401,6 +3403,22 @@ static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val) #define REG_A4XX_HLSQ_PERF_PIPE_MASK 0x00000e0e +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_0 0x00000e06 + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_1 0x00000e07 + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_2 0x00000e08 + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_3 0x00000e09 + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_4 0x00000e0a + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_5 0x00000e0b + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_6 0x00000e0c + +#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_7 0x00000e0d + #define REG_A4XX_HLSQ_CONTROL_0_REG 0x000023c0 #define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000010 #define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 4 @@ -2655,6 +3673,18 @@ static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(uint32_t val) #define REG_A4XX_PC_PERFCTR_PC_SEL_0 0x00000d10 +#define REG_A4XX_PC_PERFCTR_PC_SEL_1 0x00000d11 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_2 0x00000d12 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_3 0x00000d13 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_4 0x00000d14 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_5 0x00000d15 + +#define REG_A4XX_PC_PERFCTR_PC_SEL_6 0x00000d16 + #define REG_A4XX_PC_PERFCTR_PC_SEL_7 0x00000d17 #define REG_A4XX_PC_BIN_BASE 0x000021c0 diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.h b/src/gallium/drivers/freedreno/a4xx/fd4_context.h index 074c5a752bf..0c1027d5804 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_context.h +++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.h @@ -49,6 +49,8 @@ struct fd4_context { /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We * could combine it with another allocation. + * + * (upper area used as scratch bo.. see fd4_query) */ struct fd_bo *vsc_size_mem; diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 4a3f1da30ed..72154bf286a 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -33,6 +33,7 @@ #include "util/u_format.h" #include "freedreno_resource.h" +#include "freedreno_query_hw.h" #include "fd4_emit.h" #include "fd4_blend.h" @@ -882,6 +883,8 @@ fd4_emit_restore(struct fd_context *ctx) OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1); OUT_RING(ring, 0x0); + fd_hw_query_enable(ctx, ring); + ctx->needs_rb_fbd = true; } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_query.c b/src/gallium/drivers/freedreno/a4xx/fd4_query.c index 4f69e0c1694..14a809431ac 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_query.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_query.c @@ -31,6 +31,7 @@ #include "freedreno_util.h" #include "fd4_query.h" +#include "fd4_context.h" #include "fd4_draw.h" #include "fd4_format.h" @@ -81,7 +82,12 @@ static uint64_t count_samples(const struct fd_rb_samp_ctrs *start, const struct fd_rb_samp_ctrs *end) { - return end->ctr[0] - start->ctr[0]; + uint64_t n = 0; + + for (unsigned i = 0; i < 16; i += 4) + n += end->ctr[i] - start->ctr[i]; + + return n / 2; } static void @@ -102,6 +108,127 @@ occlusion_predicate_accumulate_result(struct fd_context *ctx, result->b |= (n > 0); } +/* + * Time Elapsed Query: + * + * Note: we could in theory support timestamp queries, but they + * won't give sensible results for tilers. + */ + +static void +time_elapsed_enable(struct fd_context *ctx, struct fd_ringbuffer *ring) +{ + /* Right now, the assignment of countable to counter register is + * just hard coded. If we start exposing more countables than we + * have counters, we will need to be more clever. + */ + fd_wfi(ctx, ring); + OUT_PKT0(ring, REG_A4XX_CP_PERFCTR_CP_SEL_0, 1); + OUT_RING(ring, CP_ALWAYS_COUNT); +} + +static struct fd_hw_sample * +time_elapsed_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring) +{ + struct fd_hw_sample *samp = fd_hw_sample_init(ctx, sizeof(uint64_t)); + + /* use unused part of vsc_size_mem as scratch space, to avoid + * extra allocation: + */ + struct fd_bo *scratch_bo = fd4_context(ctx)->vsc_size_mem; + const int sample_off = 128; + const int addr_off = sample_off + 8; + + debug_assert(ctx->screen->max_freq > 0); + + /* Basic issue is that we need to read counter value to a relative + * destination (with per-tile offset) rather than absolute dest + * addr. But there is no pm4 packet that can do that. This is + * where it would be *really* nice if we could write our own fw + * since afaict implementing the sort of packet we need would be + * trivial. + * + * Instead, we: + * (1) CP_REG_TO_MEM to do a 64b copy of counter to scratch buffer + * (2) CP_MEM_WRITE to write per-sample offset to scratch buffer + * (3) CP_REG_TO_MEM w/ accumulate flag to add the per-tile base + * address to the per-sample offset in the scratch buffer + * (4) CP_MEM_TO_REG to copy resulting address from steps #2 and #3 + * to CP_ME_NRT_ADDR + * (5) CP_MEM_TO_REG's to copy saved counter value from scratch + * buffer to CP_ME_NRT_DATA to trigger the write out to query + * result buffer + * + * Straightforward, right? + * + * Maybe could swap the order of things in the scratch buffer to + * put address first, and copy back to CP_ME_NRT_ADDR+DATA in one + * shot, but that's really just polishing a turd.. + */ + + fd_wfi(ctx, ring); + + /* copy sample counter _LO and _HI to scratch: */ + OUT_PKT3(ring, CP_REG_TO_MEM, 2); + OUT_RING(ring, CP_REG_TO_MEM_0_REG(REG_A4XX_RBBM_PERFCTR_CP_0_LO) | + CP_REG_TO_MEM_0_64B | + CP_REG_TO_MEM_0_CNT(2-1)); /* write 2 regs to mem */ + OUT_RELOC(ring, scratch_bo, sample_off, 0, 0); + + /* ok... here we really *would* like to use the CP_SET_CONSTANT + * mode which can add a constant to value in reg2 and write to + * reg1... *but* that only works for banked/context registers, + * and CP_ME_NRT_DATA isn't one of those.. so we need to do some + * CP math to the scratch buffer instead: + * + * (note first 8 bytes are counter value, use offset 0x8 for + * address calculation) + */ + + /* per-sample offset to scratch bo: */ + OUT_PKT3(ring, CP_MEM_WRITE, 2); + OUT_RELOC(ring, scratch_bo, addr_off, 0, 0); + OUT_RING(ring, samp->offset); + + /* now add to that the per-tile base: */ + OUT_PKT3(ring, CP_REG_TO_MEM, 2); + OUT_RING(ring, CP_REG_TO_MEM_0_REG(HW_QUERY_BASE_REG) | + CP_REG_TO_MEM_0_ACCUMULATE | + CP_REG_TO_MEM_0_CNT(1-1)); /* readback 1 regs */ + OUT_RELOC(ring, scratch_bo, addr_off, 0, 0); + + /* now copy that back to CP_ME_NRT_ADDR: */ + OUT_PKT3(ring, CP_MEM_TO_REG, 2); + OUT_RING(ring, REG_A4XX_CP_ME_NRT_ADDR); + OUT_RELOC(ring, scratch_bo, addr_off, 0, 0); + + /* and finally, copy sample from scratch buffer to CP_ME_NRT_DATA + * to trigger the write to result buffer + */ + OUT_PKT3(ring, CP_MEM_TO_REG, 2); + OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA); + OUT_RELOC(ring, scratch_bo, sample_off, 0, 0); + + /* and again to get the value of the _HI reg from scratch: */ + OUT_PKT3(ring, CP_MEM_TO_REG, 2); + OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA); + OUT_RELOC(ring, scratch_bo, sample_off + 0x4, 0, 0); + + /* Sigh.. */ + + return samp; +} + +static void +time_elapsed_accumulate_result(struct fd_context *ctx, + const void *start, const void *end, + union pipe_query_result *result) +{ + uint64_t n = *(uint64_t *)end - *(uint64_t *)start; + /* max_freq is in Hz, convert cycle count to ns: */ + result->u64 += n * 1000000000 / ctx->screen->max_freq; +} + static const struct fd_hw_sample_provider occlusion_counter = { .query_type = PIPE_QUERY_OCCLUSION_COUNTER, .active = FD_STAGE_DRAW, @@ -116,8 +243,17 @@ static const struct fd_hw_sample_provider occlusion_predicate = { .accumulate_result = occlusion_predicate_accumulate_result, }; +static const struct fd_hw_sample_provider time_elapsed = { + .query_type = PIPE_QUERY_TIME_ELAPSED, + .active = FD_STAGE_DRAW, + .enable = time_elapsed_enable, + .get_sample = time_elapsed_get_sample, + .accumulate_result = time_elapsed_accumulate_result, +}; + void fd4_query_context_init(struct pipe_context *pctx) { fd_hw_query_register_provider(pctx, &occlusion_counter); fd_hw_query_register_provider(pctx, &occlusion_predicate); + fd_hw_query_register_provider(pctx, &time_elapsed); } diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h index f9c0e6aaa83..ac5343f1a78 100644 --- a/src/gallium/drivers/freedreno/adreno_common.xml.h +++ b/src/gallium/drivers/freedreno/adreno_common.xml.h @@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) -Copyright (C) 2013-2015 by the following authors: +Copyright (C) 2013-2016 by the following authors: - Rob Clark <[email protected]> (robclark) +- Ilia Mirkin <[email protected]> (imirkin) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h index c6741890c69..09b26a253f0 100644 --- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h +++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h @@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00) -- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21) +- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21) - /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00) -Copyright (C) 2013-2015 by the following authors: +Copyright (C) 2013-2016 by the following authors: - Rob Clark <[email protected]> (robclark) +- Ilia Mirkin <[email protected]> (imirkin) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -172,6 +173,11 @@ enum adreno_pm4_type3_packets { CP_UNKNOWN_1A = 26, CP_UNKNOWN_4E = 78, CP_WIDE_REG_WRITE = 116, + CP_SCRATCH_TO_REG = 77, + CP_REG_TO_SCRATCH = 74, + CP_WAIT_MEM_WRITES = 18, + CP_COND_REG_EXEC = 71, + CP_MEM_TO_REG = 66, IN_IB_PREFETCH_END = 23, IN_SUBBLK_PREFETCH = 31, IN_INSTR_PREFETCH = 32, @@ -503,5 +509,29 @@ static inline uint32_t CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS(uint32_t val) return ((val) << CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT) & CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK; } +#define REG_CP_REG_TO_MEM_0 0x00000000 +#define CP_REG_TO_MEM_0_REG__MASK 0x0000ffff +#define CP_REG_TO_MEM_0_REG__SHIFT 0 +static inline uint32_t CP_REG_TO_MEM_0_REG(uint32_t val) +{ + return ((val) << CP_REG_TO_MEM_0_REG__SHIFT) & CP_REG_TO_MEM_0_REG__MASK; +} +#define CP_REG_TO_MEM_0_CNT__MASK 0x3ff80000 +#define CP_REG_TO_MEM_0_CNT__SHIFT 19 +static inline uint32_t CP_REG_TO_MEM_0_CNT(uint32_t val) +{ + return ((val) << CP_REG_TO_MEM_0_CNT__SHIFT) & CP_REG_TO_MEM_0_CNT__MASK; +} +#define CP_REG_TO_MEM_0_64B 0x40000000 +#define CP_REG_TO_MEM_0_ACCUMULATE 0x80000000 + +#define REG_CP_REG_TO_MEM_1 0x00000001 +#define CP_REG_TO_MEM_1_DEST__MASK 0xffffffff +#define CP_REG_TO_MEM_1_DEST__SHIFT 0 +static inline uint32_t CP_REG_TO_MEM_1_DEST(uint32_t val) +{ + return ((val) << CP_REG_TO_MEM_1_DEST__SHIFT) & CP_REG_TO_MEM_1_DEST__MASK; +} + #endif /* ADRENO_PM4_XML */ diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index 9e7130ab915..85ce97c16b7 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -164,6 +164,9 @@ struct fd_context { */ struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS]; + /* which sample providers were active in the current batch: */ + uint32_t active_providers; + /* tracking for current stage, to know when to start/stop * any active queries: */ diff --git a/src/gallium/drivers/freedreno/freedreno_query.h b/src/gallium/drivers/freedreno/freedreno_query.h index c2c71da2790..1e4f45ffcd3 100644 --- a/src/gallium/drivers/freedreno/freedreno_query.h +++ b/src/gallium/drivers/freedreno/freedreno_query.h @@ -65,4 +65,16 @@ fd_query(struct pipe_query *pq) void fd_query_screen_init(struct pipe_screen *pscreen); void fd_query_context_init(struct pipe_context *pctx); +static inline bool +skip_begin_query(int type) +{ + switch (type) { + case PIPE_QUERY_TIMESTAMP: + case PIPE_QUERY_GPU_FINISHED: + return true; + default: + return false; + } +} + #endif /* FREEDRENO_QUERY_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c index 027fdc9de23..2ac03f22b41 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_hw.c +++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c @@ -47,6 +47,8 @@ static int pidx(unsigned query_type) return 0; case PIPE_QUERY_OCCLUSION_PREDICATE: return 1; + case PIPE_QUERY_TIME_ELAPSED: + return 2; default: return -1; } @@ -89,7 +91,9 @@ static void resume_query(struct fd_context *ctx, struct fd_hw_query *hq, struct fd_ringbuffer *ring) { + int idx = pidx(hq->provider->query_type); assert(!hq->period); + ctx->active_providers |= (1 << idx); hq->period = util_slab_alloc(&ctx->sample_period_pool); list_inithead(&hq->period->list); hq->period->start = get_sample(ctx, ring, hq->base.type); @@ -101,7 +105,9 @@ static void pause_query(struct fd_context *ctx, struct fd_hw_query *hq, struct fd_ringbuffer *ring) { + int idx = pidx(hq->provider->query_type); assert(hq->period && !hq->period->end); + assert(ctx->active_providers & (1 << idx)); hq->period->end = get_sample(ctx, ring, hq->base.type); list_addtail(&hq->period->list, &hq->current_periods); hq->period = NULL; @@ -156,6 +162,12 @@ static void fd_hw_end_query(struct fd_context *ctx, struct fd_query *q) { struct fd_hw_query *hq = fd_hw_query(q); + /* there are a couple special cases, which don't have + * a matching ->begin_query(): + */ + if (skip_begin_query(q->type) && !q->active) { + fd_hw_begin_query(ctx, q); + } if (!q->active) return; if (is_active(hq, ctx->stage)) @@ -291,6 +303,8 @@ fd_hw_sample_init(struct fd_context *ctx, uint32_t size) struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool); pipe_reference_init(&samp->reference, 1); samp->size = size; + debug_assert(util_is_power_of_two(size)); + ctx->next_sample_offset = align(ctx->next_sample_offset, size); samp->offset = ctx->next_sample_offset; /* NOTE: util_slab_alloc() does not zero out the buffer: */ samp->bo = NULL; @@ -318,7 +332,7 @@ prepare_sample(struct fd_hw_sample *samp, struct fd_bo *bo, assert(samp->tile_stride == tile_stride); return; } - samp->bo = bo; + samp->bo = fd_bo_ref(bo); samp->num_tiles = num_tiles; samp->tile_stride = tile_stride; } @@ -431,6 +445,23 @@ fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring, ctx->stage = stage; } +/* call the provider->enable() for all the hw queries that were active + * in the current batch. This sets up perfctr selector regs statically + * for the duration of the batch. + */ +void +fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring) +{ + for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) { + if (ctx->active_providers & (1 << idx)) { + assert(ctx->sample_providers[idx]); + if (ctx->sample_providers[idx]->enable) + ctx->sample_providers[idx]->enable(ctx, ring); + } + } + ctx->active_providers = 0; /* clear it for next frame */ +} + void fd_hw_query_register_provider(struct pipe_context *pctx, const struct fd_hw_sample_provider *provider) diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.h b/src/gallium/drivers/freedreno/freedreno_query_hw.h index 8f4b1f58ee5..8a5d114d806 100644 --- a/src/gallium/drivers/freedreno/freedreno_query_hw.h +++ b/src/gallium/drivers/freedreno/freedreno_query_hw.h @@ -76,6 +76,11 @@ struct fd_hw_sample_provider { /* stages applicable to the query type: */ enum fd_render_stage active; + /* Optional hook for enabling a counter. Guaranteed to happen + * at least once before the first ->get_sample() in a batch. + */ + void (*enable)(struct fd_context *ctx, struct fd_ringbuffer *ring); + /* when a new sample is required, emit appropriate cmdstream * and return a sample object: */ @@ -144,6 +149,7 @@ void fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n, struct fd_ringbuffer *ring); void fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring, enum fd_render_stage stage); +void fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring); void fd_hw_query_register_provider(struct pipe_context *pctx, const struct fd_hw_sample_provider *provider); void fd_hw_query_init(struct pipe_context *pctx); diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 27f4d267438..2b3ecfe664e 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -298,12 +298,14 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return is_a3xx(screen) ? 1 : 0; /* Queries. */ - case PIPE_CAP_QUERY_TIME_ELAPSED: case PIPE_CAP_QUERY_TIMESTAMP: case PIPE_CAP_QUERY_BUFFER_OBJECT: return 0; case PIPE_CAP_OCCLUSION_QUERY: return is_a3xx(screen) || is_a4xx(screen); + case PIPE_CAP_QUERY_TIME_ELAPSED: + /* only a4xx, requires new enough kernel so we know max_freq: */ + return (screen->max_freq > 0) && is_a4xx(screen); case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET: case PIPE_CAP_MIN_TEXEL_OFFSET: @@ -434,9 +436,12 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 16; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; } debug_printf("unknown shader param %d\n", param); @@ -534,6 +539,16 @@ fd_screen_create(struct fd_device *dev) } screen->device_id = val; + if (fd_pipe_get_param(screen->pipe, FD_MAX_FREQ, &val)) { + DBG("could not get gpu freq"); + /* this limits what performance related queries are + * supported but is not fatal + */ + screen->max_freq = 0; + } else { + screen->max_freq = val; + } + if (fd_pipe_get_param(screen->pipe, FD_GPU_ID, &val)) { DBG("could not get gpu-id"); goto fail; diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h index 8fb096a10dd..a81c7786390 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.h +++ b/src/gallium/drivers/freedreno/freedreno_screen.h @@ -56,6 +56,7 @@ struct fd_screen { uint32_t device_id; uint32_t gpu_id; /* 220, 305, etc */ uint32_t chip_id; /* coreid:8 majorrev:8 minorrev:8 patch:8 */ + uint32_t max_freq; uint32_t max_rts; /* max # of render targets */ void *compiler; /* currently unused for a2xx */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index ffa75775505..7a1812f2518 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -1365,7 +1365,6 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex) struct ir3_block *b = ctx->block; struct ir3_instruction **dst, *sam, *src0[12], *src1[4]; struct ir3_instruction **coord, *lod, *compare, *proj, **off, **ddx, **ddy; - struct ir3_instruction *const_off[4]; bool has_bias = false, has_lod = false, has_proj = false, has_off = false; unsigned i, coords, flags; unsigned nsrc0 = 0, nsrc1 = 0; diff --git a/src/gallium/drivers/ilo/ilo_gpgpu.c b/src/gallium/drivers/ilo/ilo_gpgpu.c index b7415901a88..ab165b6d43b 100644 --- a/src/gallium/drivers/ilo/ilo_gpgpu.c +++ b/src/gallium/drivers/ilo/ilo_gpgpu.c @@ -79,9 +79,7 @@ launch_grid(struct ilo_context *ilo, } static void -ilo_launch_grid(struct pipe_context *pipe, - const uint *block_layout, const uint *grid_layout, - uint32_t pc, const void *input) +ilo_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) { struct ilo_context *ilo = ilo_context(pipe); struct ilo_shader_state *cs = ilo->state_vector.cs; @@ -92,13 +90,13 @@ ilo_launch_grid(struct pipe_context *pipe, input_buf.buffer_size = ilo_shader_get_kernel_param(cs, ILO_KERNEL_CS_INPUT_SIZE); if (input_buf.buffer_size) { - u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, input, + u_upload_data(ilo->uploader, 0, input_buf.buffer_size, 16, info->input, &input_buf.buffer_offset, &input_buf.buffer); } ilo_shader_cache_upload(ilo->shader_cache, &ilo->cp->builder); - launch_grid(ilo, block_layout, grid_layout, &input_buf, pc); + launch_grid(ilo, info->block, info->grid, &input_buf, info->pc); ilo_render_invalidate_hw(ilo->render); diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index 44d7c11af43..ef9da6b8315 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -136,6 +136,8 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader, return ILO_MAX_SAMPLER_VIEWS; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED: return 1; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 8dc2d38e039..f8d2637cc6f 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -1851,7 +1851,7 @@ ilo_set_sampler_views(struct pipe_context *pipe, unsigned shader, static void ilo_set_shader_images(struct pipe_context *pipe, unsigned shader, unsigned start, unsigned count, - struct pipe_image_view **views) + struct pipe_image_view *views) { #if 0 struct ilo_state_vector *vec = &ilo_context(pipe)->state_vector; diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index d22e50777fa..9e56c962d2d 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -910,7 +910,9 @@ lp_rast_create( unsigned num_threads ) create_rast_threads(rast); /* for synchronizing rasterization threads */ - pipe_barrier_init( &rast->barrier, rast->num_threads ); + if (rast->num_threads > 0) { + pipe_barrier_init( &rast->barrier, rast->num_threads ); + } memset(lp_dummy_tile, 0, sizeof lp_dummy_tile); @@ -967,7 +969,9 @@ void lp_rast_destroy( struct lp_rasterizer *rast ) } /* for synchronizing rasterization threads */ - pipe_barrier_destroy( &rast->barrier ); + if (rast->num_threads > 0) { + pipe_barrier_destroy( &rast->barrier ); + } lp_scene_queue_destroy(rast->full_scenes); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 5ab297d7e1a..97146912704 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -169,8 +169,8 @@ struct lp_setup_context }; static inline void -scissor_planes_needed(boolean scis_planes[4], struct u_rect *bbox, - struct u_rect *scissor) +scissor_planes_needed(boolean scis_planes[4], const struct u_rect *bbox, + const struct u_rect *scissor) { /* left */ scis_planes[0] = (bbox->x0 < scissor->x0); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_line.c b/src/gallium/drivers/llvmpipe/lp_setup_line.c index af4e7900d3c..018130c3192 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_line.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_line.c @@ -719,7 +719,7 @@ try_setup_line( struct lp_setup_context *setup, */ if (nr_planes > 4) { /* why not just use draw_regions */ - struct u_rect *scissor = &setup->scissors[viewport_index]; + const struct u_rect *scissor = &setup->scissors[viewport_index]; struct lp_rast_plane *plane_s = &plane[4]; boolean s_planes[4]; scissor_planes_needed(s_planes, &bbox, scissor); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index cdb3d015dec..29aee726941 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -681,7 +681,7 @@ do_triangle_ccw(struct lp_setup_context *setup, */ if (nr_planes > 3) { /* why not just use draw_regions */ - struct u_rect *scissor = &setup->scissors[viewport_index]; + const struct u_rect *scissor = &setup->scissors[viewport_index]; struct lp_rast_plane *plane_s = &plane[3]; boolean s_planes[4]; scissor_planes_needed(s_planes, &bbox, scissor); diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 31a93659647..43ffce63a25 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -60,6 +60,8 @@ NV30_C_SOURCES := \ nv30/nvfx_vertprog.c NV50_C_SOURCES := \ + nv50/g80_defs.xml.h \ + nv50/g80_texture.xml.h \ nv50/nv50_2d.xml.h \ nv50/nv50_3ddefs.xml.h \ nv50/nv50_3d.xml.h \ @@ -68,7 +70,6 @@ NV50_C_SOURCES := \ nv50/nv50_compute.xml.h \ nv50/nv50_context.c \ nv50/nv50_context.h \ - nv50/nv50_defs.xml.h \ nv50/nv50_formats.c \ nv50/nv50_miptree.c \ nv50/nv50_program.c \ @@ -93,7 +94,6 @@ NV50_C_SOURCES := \ nv50/nv50_state_validate.c \ nv50/nv50_surface.c \ nv50/nv50_tex.c \ - nv50/nv50_texture.xml.h \ nv50/nv50_transfer.c \ nv50/nv50_transfer.h \ nv50/nv50_vbo.c \ @@ -147,6 +147,7 @@ NVC0_CODEGEN_SOURCES := \ codegen/nv50_ir_target_nvc0.h NVC0_C_SOURCES := \ + nvc0/gm107_texture.xml.h \ nvc0/nvc0_3d.xml.h \ nvc0/nvc0_compute.c \ nvc0/nvc0_compute.h \ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 9d7becf27d4..97ebed455b6 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -232,6 +232,8 @@ enum operation #define NV50_IR_SUBOP_SHFL_UP 1 #define NV50_IR_SUBOP_SHFL_DOWN 2 #define NV50_IR_SUBOP_SHFL_BFLY 3 +#define NV50_IR_SUBOP_LOAD_LOCKED 1 +#define NV50_IR_SUBOP_STORE_UNLOCKED 2 #define NV50_IR_SUBOP_MADSP_SD 0xffff // Yes, we could represent those with DataType. // Or put the type into operation and have a couple 1000 values in that enum. diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index 0c7cd1d8137..a78b3f954a4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -433,6 +433,10 @@ CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2, srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10); break; default: + if (i->op == OP_SELP) { + assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE); + srcId(i->src(s), 42); + } // ignore here, can be predicate or flags, but must not be address break; } @@ -1045,7 +1049,7 @@ void CodeEmitterGK110::emitSELP(const Instruction *i) { emitForm_21(i, 0x250, 0x050); - if ((i->cc == CC_NOT_P) ^ (bool)(i->src(2).mod & Modifier(NV50_IR_MOD_NOT))) + if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 13; } @@ -1239,7 +1243,7 @@ CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask defId(i->def(0), 2); srcId(i->src(0), 10); - srcId(i->srcExists(1) ? i->src(1) : i->src(0), 23); + srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23); if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT) code[1] |= 1 << 9; // dall diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index dee26225b7e..93c40d15e46 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -193,6 +193,8 @@ private: void emitNOP(); void emitKIL(); void emitOUT(); + + void emitMEMBAR(); }; /******************************************************************************* @@ -248,6 +250,8 @@ CodeEmitterGM107::emitSYS(int pos, const Value *val) case SV_INVOCATION_ID : id = 0x11; break; case SV_THREAD_KILL : id = 0x13; break; case SV_INVOCATION_INFO: id = 0x1d; break; + case SV_TID : id = 0x21 + val->reg.data.sv.index; break; + case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break; default: assert(!"invalid system value"); id = 0; @@ -1531,7 +1535,10 @@ CodeEmitterGM107::emitFSWZADD() emitRND (0x27); emitField(0x26, 1, insn->lanes); /* abused for .ndv */ emitField(0x1c, 8, insn->subOp); - emitGPR (0x14, insn->src(1)); + if (insn->predSrc != 1) + emitGPR (0x14, insn->src(1)); + else + emitGPR (0x14); emitGPR (0x08, insn->src(0)); emitGPR (0x00, insn->def(0)); } @@ -2327,22 +2334,34 @@ void CodeEmitterGM107::emitATOM() { unsigned dType, subOp; - switch (insn->dType) { - case TYPE_U32: dType = 0; break; - case TYPE_S32: dType = 1; break; - case TYPE_U64: dType = 2; break; - case TYPE_F32: dType = 3; break; - case TYPE_B128: dType = 4; break; - case TYPE_S64: dType = 5; break; - default: assert(!"unexpected dType"); dType = 0; break; - } - if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) - subOp = 8; - else - subOp = insn->subOp; - assert(insn->subOp != NV50_IR_SUBOP_ATOM_CAS); /* XXX */ - emitInsn (0xed000000); + if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) { + switch (insn->dType) { + case TYPE_U32: dType = 0; break; + case TYPE_U64: dType = 1; break; + default: assert(!"unexpected dType"); dType = 0; break; + } + subOp = 15; + + emitInsn (0xee000000); + } else { + switch (insn->dType) { + case TYPE_U32: dType = 0; break; + case TYPE_S32: dType = 1; break; + case TYPE_U64: dType = 2; break; + case TYPE_F32: dType = 3; break; + case TYPE_B128: dType = 4; break; + case TYPE_S64: dType = 5; break; + default: assert(!"unexpected dType"); dType = 0; break; + } + if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) + subOp = 8; + else + subOp = insn->subOp; + + emitInsn (0xed000000); + } + emitField(0x34, 4, subOp); emitField(0x31, 3, dType); emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8); @@ -2627,6 +2646,13 @@ CodeEmitterGM107::emitOUT() emitGPR (0x00, insn->def(0)); } +void +CodeEmitterGM107::emitMEMBAR() +{ + emitInsn (0xef980000); + emitField(0x08, 2, insn->subOp >> 2); +} + /******************************************************************************* * assembler front-end ******************************************************************************/ @@ -2926,6 +2952,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i) case OP_RESTART: emitOUT(); break; + case OP_MEMBAR: + emitMEMBAR(); + break; default: assert(!"invalid opcode"); emitNOP(); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp index bc8354deba1..682a19d6d78 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp @@ -527,7 +527,8 @@ CodeEmitterNV50::emitForm_ADD(const Instruction *i) setSrcFileBits(i, NV50_OP_ENC_LONG_ALT); setSrc(i, 0, 0); - setSrc(i, 1, 2); + if (i->predSrc != 1) + setSrc(i, 1, 2); if (i->getIndirect(0, 0)) { assert(!i->getIndirect(1, 0)); @@ -840,7 +841,7 @@ CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp) emitForm_ADD(i); - if (!i->srcExists(1)) + if (!i->srcExists(1) || i->predSrc == 1) srcId(i->src(0), 32 + 14); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 8637db91521..0068da5cbb7 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -398,6 +398,11 @@ CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc) srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20); break; default: + if (i->op == OP_SELP) { + // OP_SELP is used to implement shared+atomics on Fermi. + assert(s == 2 && i->src(s).getFile() == FILE_PREDICATE); + srcId(i->src(s), 49); + } // ignore here, can be predicate or flags, but must not be address break; } @@ -1174,7 +1179,7 @@ void CodeEmitterNVC0::emitSELP(const Instruction *i) { emitForm_A(i, HEX64(20000000, 00000004)); - if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT)) + if (i->src(2).mod & Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 20; } @@ -1334,7 +1339,7 @@ CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask) defId(i->def(0), 14); srcId(i->src(0), 20); - srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26); + srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26); if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT) code[0] |= 1 << 9; // dall @@ -1773,7 +1778,16 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i) switch (i->src(0).getFile()) { case FILE_MEMORY_GLOBAL: opc = 0x90000000; break; case FILE_MEMORY_LOCAL: opc = 0xc8000000; break; - case FILE_MEMORY_SHARED: opc = 0xc9000000; break; + case FILE_MEMORY_SHARED: + if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) { + if (targ->getChipset() >= NVISA_GK104_CHIPSET) + opc = 0xb8000000; + else + opc = 0xcc000000; + } else { + opc = 0xc9000000; + } + break; default: assert(!"invalid memory file"); opc = 0; @@ -1782,6 +1796,15 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i) code[0] = 0x00000005; code[1] = opc; + if (targ->getChipset() >= NVISA_GK104_CHIPSET) { + // Unlocked store on shared memory can fail. + if (i->src(0).getFile() == FILE_MEMORY_SHARED && + i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) { + assert(i->defExists(0)); + defId(i->def(0), 8); + } + } + setAddressByFile(i->src(0)); srcId(i->src(1), 14); srcId(i->src(0).getIndirect(0), 20); @@ -1804,7 +1827,16 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i) switch (i->src(0).getFile()) { case FILE_MEMORY_GLOBAL: opc = 0x80000000; break; case FILE_MEMORY_LOCAL: opc = 0xc0000000; break; - case FILE_MEMORY_SHARED: opc = 0xc1000000; break; + case FILE_MEMORY_SHARED: + if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) { + if (targ->getChipset() >= NVISA_GK104_CHIPSET) + opc = 0xa8000000; + else + opc = 0xc4000000; + } else { + opc = 0xc1000000; + } + break; case FILE_MEMORY_CONST: if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) { emitMOV(i); // not sure if this is any better @@ -1820,6 +1852,13 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i) } code[1] = opc; + if (i->src(0).getFile() == FILE_MEMORY_SHARED) { + if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) { + assert(i->defExists(1)); + defId(i->def(1), 32 + 18); + } + } + defId(i->def(0), 14); setAddressByFile(i->src(0)); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 52ac198221d..d06e9efa463 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -374,6 +374,7 @@ static nv50_ir::DataFile translateFile(uint file) case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL; + case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL; case TGSI_FILE_SAMPLER: case TGSI_FILE_NULL: default: @@ -858,6 +859,11 @@ public: }; std::vector<Resource> resources; + struct MemoryFile { + bool shared; + }; + std::vector<MemoryFile> memoryFiles; + private: int inferSysValDirection(unsigned sn) const; bool scanDeclaration(const struct tgsi_full_declaration *); @@ -904,6 +910,7 @@ bool Source::scanSource() textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1); //resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1); tempArrayId.resize(scan.file_max[TGSI_FILE_TEMPORARY] + 1); + memoryFiles.resize(scan.file_max[TGSI_FILE_MEMORY] + 1); info->immd.bufSize = 0; @@ -1213,6 +1220,11 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) for (i = first; i <= last; ++i) textureViews[i].target = decl->SamplerView.Resource; break; + case TGSI_FILE_MEMORY: + for (i = first; i <= last; ++i) + memoryFiles[i].shared = decl->Declaration.Shared; + break; + case TGSI_FILE_NULL: case TGSI_FILE_TEMPORARY: for (i = first; i <= last; ++i) tempArrayId[i] = arrayId; @@ -1220,7 +1232,6 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) tempArrayInfo.insert(std::make_pair(arrayId, std::make_pair( first, last - first + 1))); break; - case TGSI_FILE_NULL: case TGSI_FILE_ADDRESS: case TGSI_FILE_CONSTANT: case TGSI_FILE_IMMEDIATE: @@ -1516,6 +1527,9 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) sym->reg.fileIndex = fileIdx; + if (tgsiFile == TGSI_FILE_MEMORY && code->memoryFiles[fileIdx].shared) + sym->setFile(FILE_MEMORY_SHARED); + if (idx >= 0) { if (sym->reg.file == FILE_SHADER_INPUT) sym->setOffset(info->in[idx].slot[c] * 4); @@ -1769,7 +1783,7 @@ Converter::acquireDst(int d, int c) int idx = dst.getIndex(0); int idx2d = dst.is2D() ? dst.getIndex(1) : 0; - if (dst.isMasked(c) || f == TGSI_FILE_BUFFER) + if (dst.isMasked(c) || f == TGSI_FILE_BUFFER || f == TGSI_FILE_MEMORY) return NULL; if (dst.isIndirect(0) || @@ -2239,7 +2253,8 @@ Converter::handleLOAD(Value *dst0[4]) int c; std::vector<Value *> off, src, ldv, def; - if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) { + if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER || + tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) { for (c = 0; c < 4; ++c) { if (!dst0[c]) continue; @@ -2248,9 +2263,10 @@ Converter::handleLOAD(Value *dst0[4]) Symbol *sym; if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) { off = NULL; - sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(0, info) + 4 * c); + sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, + tgsi.getSrc(1).getValueU32(0, info) + 4 * c); } else { - sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c); + sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 4 * c); } Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off); @@ -2337,7 +2353,8 @@ Converter::handleSTORE() int c; std::vector<Value *> off, src, dummy; - if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER) { + if (tgsi.getDst(0).getFile() == TGSI_FILE_BUFFER || + tgsi.getDst(0).getFile() == TGSI_FILE_MEMORY) { for (c = 0; c < 4; ++c) { if (!(tgsi.getDst(0).getMask() & (1 << c))) continue; @@ -2346,11 +2363,11 @@ Converter::handleSTORE() Value *off; if (tgsi.getSrc(0).getFile() == TGSI_FILE_IMMEDIATE) { off = NULL; - sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, + sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, tgsi.getSrc(0).getValueU32(0, info) + 4 * c); } else { off = fetchSrc(0, 0); - sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 4 * c); + sym = makeSym(tgsi.getDst(0).getFile(), r, -1, c, 4 * c); } Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c)); @@ -2422,7 +2439,8 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) std::vector<Value *> defv; LValue *dst = getScratch(); - if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) { + if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER || + tgsi.getSrc(0).getFile() == TGSI_FILE_MEMORY) { for (int c = 0; c < 4; ++c) { if (!dst0[c]) continue; @@ -2431,9 +2449,10 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) Value *off = fetchSrc(1, c), *off2 = NULL; Value *sym; if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE) - sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, tgsi.getSrc(1).getValueU32(c, info)); + sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, + tgsi.getSrc(1).getValueU32(c, info)); else - sym = makeSym(TGSI_FILE_BUFFER, r, -1, c, 0); + sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0); if (tgsi.getSrc(0).isIndirect(0)) off2 = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0); if (subOp == NV50_IR_SUBOP_ATOM_CAS) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index e7cb54bc426..d181f1574f1 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -1033,6 +1033,100 @@ NVC0LoweringPass::handleSUQ(Instruction *suq) return true; } +void +NVC0LoweringPass::handleSharedATOM(Instruction *atom) +{ + assert(atom->src(0).getFile() == FILE_MEMORY_SHARED); + + BasicBlock *currBB = atom->bb; + BasicBlock *tryLockAndSetBB = atom->bb->splitBefore(atom, false); + BasicBlock *joinBB = atom->bb->splitAfter(atom); + + bld.setPosition(currBB, true); + assert(!currBB->joinAt); + currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL); + + bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_ALWAYS, NULL); + currBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::TREE); + + bld.setPosition(tryLockAndSetBB, true); + + Instruction *ld = + bld.mkLoad(TYPE_U32, atom->getDef(0), + bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0), NULL); + ld->setDef(1, bld.getSSA(1, FILE_PREDICATE)); + ld->subOp = NV50_IR_SUBOP_LOAD_LOCKED; + + Value *stVal; + if (atom->subOp == NV50_IR_SUBOP_ATOM_EXCH) { + // Read the old value, and write the new one. + stVal = atom->getSrc(1); + } else if (atom->subOp == NV50_IR_SUBOP_ATOM_CAS) { + CmpInstruction *set = + bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE), + TYPE_U32, ld->getDef(0), atom->getSrc(1)); + set->setPredicate(CC_P, ld->getDef(1)); + + Instruction *selp = + bld.mkOp3(OP_SELP, TYPE_U32, bld.getSSA(), ld->getDef(0), + atom->getSrc(2), set->getDef(0)); + selp->src(2).mod = Modifier(NV50_IR_MOD_NOT); + selp->setPredicate(CC_P, ld->getDef(1)); + + stVal = selp->getDef(0); + } else { + operation op; + + switch (atom->subOp) { + case NV50_IR_SUBOP_ATOM_ADD: + op = OP_ADD; + break; + case NV50_IR_SUBOP_ATOM_AND: + op = OP_AND; + break; + case NV50_IR_SUBOP_ATOM_OR: + op = OP_OR; + break; + case NV50_IR_SUBOP_ATOM_XOR: + op = OP_XOR; + break; + case NV50_IR_SUBOP_ATOM_MIN: + op = OP_MIN; + break; + case NV50_IR_SUBOP_ATOM_MAX: + op = OP_MAX; + break; + default: + assert(0); + } + + Instruction *i = + bld.mkOp2(op, atom->dType, bld.getSSA(), ld->getDef(0), + atom->getSrc(1)); + i->setPredicate(CC_P, ld->getDef(1)); + + stVal = i->getDef(0); + } + + Instruction *st = + bld.mkStore(OP_STORE, TYPE_U32, + bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U32, 0), + NULL, stVal); + st->setPredicate(CC_P, ld->getDef(1)); + st->subOp = NV50_IR_SUBOP_STORE_UNLOCKED; + + // Loop until the lock is acquired. + bld.mkFlow(OP_BRA, tryLockAndSetBB, CC_NOT_P, ld->getDef(1)); + tryLockAndSetBB->cfg.attach(&tryLockAndSetBB->cfg, Graph::Edge::BACK); + tryLockAndSetBB->cfg.attach(&joinBB->cfg, Graph::Edge::CROSS); + bld.mkFlow(OP_BRA, joinBB, CC_ALWAYS, NULL); + + bld.remove(atom); + + bld.setPosition(joinBB, false); + bld.mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1; +} + bool NVC0LoweringPass::handleATOM(Instruction *atom) { @@ -1044,8 +1138,8 @@ NVC0LoweringPass::handleATOM(Instruction *atom) sv = SV_LBASE; break; case FILE_MEMORY_SHARED: - sv = SV_SBASE; - break; + handleSharedATOM(atom); + return true; default: assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16); @@ -1072,6 +1166,11 @@ NVC0LoweringPass::handleATOM(Instruction *atom) bool NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl) { + if (cas->src(0).getFile() == FILE_MEMORY_SHARED) { + // ATOM_CAS and ATOM_EXCH are handled in handleSharedATOM(). + return false; + } + if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS && cas->subOp != NV50_IR_SUBOP_ATOM_EXCH) return false; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 09ec7e69ddc..6eb8aff3036 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -105,6 +105,7 @@ protected: bool handleATOM(Instruction *); bool handleCasExch(Instruction *, bool needCctl); void handleSurfaceOpNVE4(TexInstruction *); + void handleSharedATOM(Instruction *); void checkPredicate(Instruction *); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 05b8db4a3d8..6192c0665e4 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1539,6 +1539,7 @@ private: void handleCVT_CVT(Instruction *); void handleCVT_EXTBF(Instruction *); void handleSUCLAMP(Instruction *); + void handleNEG(Instruction *); BuildUtil bld; }; @@ -1634,6 +1635,9 @@ AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp) if (src->getUniqueInsn() && src->getUniqueInsn()->bb != add->bb) return false; + if (src->getInsn()->saturate) + return false; + if (src->getInsn()->postFactor) return false; if (toOp == OP_SAD) { @@ -2011,6 +2015,34 @@ AlgebraicOpt::handleSUCLAMP(Instruction *insn) insn->setSrc(0, add->getSrc(s)); } +// NEG(AND(SET, 1)) -> SET +void +AlgebraicOpt::handleNEG(Instruction *i) { + Instruction *src = i->getSrc(0)->getInsn(); + ImmediateValue imm; + int b; + + if (isFloatType(i->sType) || !src || src->op != OP_AND) + return; + + if (src->src(0).getImmediate(imm)) + b = 1; + else if (src->src(1).getImmediate(imm)) + b = 0; + else + return; + + if (!imm.isInteger(1)) + return; + + Instruction *set = src->getSrc(b)->getInsn(); + if ((set->op == OP_SET || set->op == OP_SET_AND || + set->op == OP_SET_OR || set->op == OP_SET_XOR) && + !isFloatType(set->dType)) { + i->def(0).replace(set->getDef(0), false); + } +} + bool AlgebraicOpt::visit(BasicBlock *bb) { @@ -2048,6 +2080,9 @@ AlgebraicOpt::visit(BasicBlock *bb) case OP_SUCLAMP: handleSUCLAMP(i); break; + case OP_NEG: + handleNEG(i); + break; default: break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 47285a25c33..85f77047c5c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -198,6 +198,11 @@ static const char *atomSubOpStr[] = "add", "min", "max", "inc", "dec", "and", "or", "xor", "cas", "exch" }; +static const char *ldstSubOpStr[] = +{ + "", "lock", "unlock" +}; + static const char *DataTypeStr[] = { "-", @@ -537,6 +542,11 @@ void Instruction::print() const if (subOp < Elements(atomSubOpStr)) PRINT("%s ", atomSubOpStr[subOp]); break; + case OP_LOAD: + case OP_STORE: + if (subOp < Elements(ldstSubOpStr)) + PRINT("%s ", ldstSubOpStr[subOp]); + break; default: if (subOp) PRINT("(SUBOP:%u) ", subOp); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index de39be872e4..d877c253a17 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -968,6 +968,7 @@ GCRA::coalesce(ArrayList& insns) case 0xf0: case 0x100: case 0x110: + case 0x120: ret = doCoalesce(insns, JOIN_MASK_UNION); break; default: @@ -2231,6 +2232,7 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb) texConstraintNVE0(tex); break; case 0x110: + case 0x120: texConstraintGM107(tex); break; default: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index ae0a8bb61d1..89d3a08937f 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -143,6 +143,7 @@ Target *Target::create(unsigned int chipset) STATIC_ASSERT(Elements(operationClass) == OP_LAST + 1); switch (chipset & ~0xf) { case 0x110: + case 0x120: return getTargetGM107(chipset); case 0xc0: case 0xd0: diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index b62889119c5..5be7a3dab76 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -280,6 +280,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; @@ -324,6 +325,7 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; diff --git a/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h new file mode 100644 index 00000000000..5d40624bb9e --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h @@ -0,0 +1,279 @@ +#ifndef G80_DEFS_XML +#define G80_DEFS_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/envytools/envytools/ +git clone https://github.com/envytools/envytools.git + +The rules-ng-ng source files this header was generated from are: +- /home/skeggsb/git/envytools/rnndb/../rnndb/graph/g80_texture.xml ( 18837 bytes, from 2016-01-14 23:54:22) +- /home/skeggsb/git/envytools/rnndb/copyright.xml ( 6456 bytes, from 2015-09-10 02:57:40) +- /home/skeggsb/git/envytools/rnndb/nvchipsets.xml ( 2908 bytes, from 2016-02-02 23:45:00) +- /home/skeggsb/git/envytools/rnndb/g80_defs.xml ( 21739 bytes, from 2016-02-04 00:29:42) +- /home/skeggsb/git/envytools/rnndb/nv_defs.xml ( 5388 bytes, from 2016-01-14 23:54:22) + +Copyright (C) 2006-2016 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- Ilia Mirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define G80_VSTATUS_IDLE 0x00000000 +#define G80_VSTATUS_BUSY 0x00000001 +#define G80_VSTATUS_UNK2 0x00000002 +#define G80_VSTATUS_WAITING 0x00000003 +#define G80_VSTATUS_BLOCKED 0x00000005 +#define G80_VSTATUS_FAULTED 0x00000006 +#define G80_VSTATUS_PAUSED 0x00000007 +#define G80_TIC_SOURCE_ZERO 0x00000000 +#define G80_TIC_SOURCE_R 0x00000002 +#define G80_TIC_SOURCE_G 0x00000003 +#define G80_TIC_SOURCE_B 0x00000004 +#define G80_TIC_SOURCE_A 0x00000005 +#define G80_TIC_SOURCE_ONE_INT 0x00000006 +#define G80_TIC_SOURCE_ONE_FLOAT 0x00000007 +#define G80_TIC_TYPE_SNORM 0x00000001 +#define G80_TIC_TYPE_UNORM 0x00000002 +#define G80_TIC_TYPE_SINT 0x00000003 +#define G80_TIC_TYPE_UINT 0x00000004 +#define G80_TIC_TYPE_SNORM_FORCE_FP16 0x00000005 +#define G80_TIC_TYPE_UNORM_FORCE_FP16 0x00000006 +#define G80_TIC_TYPE_FLOAT 0x00000007 +#define G80_SURFACE_FORMAT_BITMAP 0x0000001c +#define G80_SURFACE_FORMAT_UNK1D 0x0000001d +#define G80_SURFACE_FORMAT_RGBA32_FLOAT 0x000000c0 +#define G80_SURFACE_FORMAT_RGBA32_SINT 0x000000c1 +#define G80_SURFACE_FORMAT_RGBA32_UINT 0x000000c2 +#define G80_SURFACE_FORMAT_RGBX32_FLOAT 0x000000c3 +#define G80_SURFACE_FORMAT_RGBX32_SINT 0x000000c4 +#define G80_SURFACE_FORMAT_RGBX32_UINT 0x000000c5 +#define G80_SURFACE_FORMAT_RGBA16_UNORM 0x000000c6 +#define G80_SURFACE_FORMAT_RGBA16_SNORM 0x000000c7 +#define G80_SURFACE_FORMAT_RGBA16_SINT 0x000000c8 +#define G80_SURFACE_FORMAT_RGBA16_UINT 0x000000c9 +#define G80_SURFACE_FORMAT_RGBA16_FLOAT 0x000000ca +#define G80_SURFACE_FORMAT_RG32_FLOAT 0x000000cb +#define G80_SURFACE_FORMAT_RG32_SINT 0x000000cc +#define G80_SURFACE_FORMAT_RG32_UINT 0x000000cd +#define G80_SURFACE_FORMAT_RGBX16_FLOAT 0x000000ce +#define G80_SURFACE_FORMAT_BGRA8_UNORM 0x000000cf +#define G80_SURFACE_FORMAT_BGRA8_SRGB 0x000000d0 +#define G80_SURFACE_FORMAT_RGB10_A2_UNORM 0x000000d1 +#define G80_SURFACE_FORMAT_RGB10_A2_UINT 0x000000d2 +#define G80_SURFACE_FORMAT_RGBA8_UNORM 0x000000d5 +#define G80_SURFACE_FORMAT_RGBA8_SRGB 0x000000d6 +#define G80_SURFACE_FORMAT_RGBA8_SNORM 0x000000d7 +#define G80_SURFACE_FORMAT_RGBA8_SINT 0x000000d8 +#define G80_SURFACE_FORMAT_RGBA8_UINT 0x000000d9 +#define G80_SURFACE_FORMAT_RG16_UNORM 0x000000da +#define G80_SURFACE_FORMAT_RG16_SNORM 0x000000db +#define G80_SURFACE_FORMAT_RG16_SINT 0x000000dc +#define G80_SURFACE_FORMAT_RG16_UINT 0x000000dd +#define G80_SURFACE_FORMAT_RG16_FLOAT 0x000000de +#define G80_SURFACE_FORMAT_BGR10_A2_UNORM 0x000000df +#define G80_SURFACE_FORMAT_R11G11B10_FLOAT 0x000000e0 +#define G80_SURFACE_FORMAT_R32_SINT 0x000000e3 +#define G80_SURFACE_FORMAT_R32_UINT 0x000000e4 +#define G80_SURFACE_FORMAT_R32_FLOAT 0x000000e5 +#define G80_SURFACE_FORMAT_BGRX8_UNORM 0x000000e6 +#define G80_SURFACE_FORMAT_BGRX8_SRGB 0x000000e7 +#define G80_SURFACE_FORMAT_B5G6R5_UNORM 0x000000e8 +#define G80_SURFACE_FORMAT_BGR5_A1_UNORM 0x000000e9 +#define G80_SURFACE_FORMAT_RG8_UNORM 0x000000ea +#define G80_SURFACE_FORMAT_RG8_SNORM 0x000000eb +#define G80_SURFACE_FORMAT_RG8_SINT 0x000000ec +#define G80_SURFACE_FORMAT_RG8_UINT 0x000000ed +#define G80_SURFACE_FORMAT_R16_UNORM 0x000000ee +#define G80_SURFACE_FORMAT_R16_SNORM 0x000000ef +#define G80_SURFACE_FORMAT_R16_SINT 0x000000f0 +#define G80_SURFACE_FORMAT_R16_UINT 0x000000f1 +#define G80_SURFACE_FORMAT_R16_FLOAT 0x000000f2 +#define G80_SURFACE_FORMAT_R8_UNORM 0x000000f3 +#define G80_SURFACE_FORMAT_R8_SNORM 0x000000f4 +#define G80_SURFACE_FORMAT_R8_SINT 0x000000f5 +#define G80_SURFACE_FORMAT_R8_UINT 0x000000f6 +#define G80_SURFACE_FORMAT_A8_UNORM 0x000000f7 +#define G80_SURFACE_FORMAT_BGR5_X1_UNORM 0x000000f8 +#define G80_SURFACE_FORMAT_RGBX8_UNORM 0x000000f9 +#define G80_SURFACE_FORMAT_RGBX8_SRGB 0x000000fa +#define G80_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFB 0x000000fb +#define G80_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFC 0x000000fc +#define G80_SURFACE_FORMAT_BGRX8_UNORM_UNKFD 0x000000fd +#define G80_SURFACE_FORMAT_BGRX8_UNORM_UNKFE 0x000000fe +#define G80_SURFACE_FORMAT_Y32_UINT_UNKFF 0x000000ff +#define G80_ZETA_FORMAT_Z32_FLOAT 0x0000000a +#define G80_ZETA_FORMAT_Z16_UNORM 0x00000013 +#define G80_ZETA_FORMAT_S8_Z24_UNORM 0x00000014 +#define G80_ZETA_FORMAT_Z24_X8_UNORM 0x00000015 +#define G80_ZETA_FORMAT_Z24_S8_UNORM 0x00000016 +#define G80_ZETA_FORMAT_Z24_C8_UNORM 0x00000018 +#define G80_ZETA_FORMAT_Z32_S8_X24_FLOAT 0x00000019 +#define G80_ZETA_FORMAT_Z24_X8_S8_C8_X16_UNORM 0x0000001d +#define G80_ZETA_FORMAT_Z32_X8_C8_X16_FLOAT 0x0000001e +#define G80_ZETA_FORMAT_Z32_S8_C8_X16_FLOAT 0x0000001f +#define GK104_IMAGE_FORMAT_RGBA32_FLOAT 0x00000002 +#define GK104_IMAGE_FORMAT_RGBA32_SINT 0x00000003 +#define GK104_IMAGE_FORMAT_RGBA32_UINT 0x00000004 +#define GK104_IMAGE_FORMAT_RGBA16_UNORM 0x00000008 +#define GK104_IMAGE_FORMAT_RGBA16_SNORM 0x00000009 +#define GK104_IMAGE_FORMAT_RGBA16_SINT 0x0000000a +#define GK104_IMAGE_FORMAT_RGBA16_UINT 0x0000000b +#define GK104_IMAGE_FORMAT_RGBA16_FLOAT 0x0000000c +#define GK104_IMAGE_FORMAT_RG32_FLOAT 0x0000000d +#define GK104_IMAGE_FORMAT_RG32_SINT 0x0000000e +#define GK104_IMAGE_FORMAT_RG32_UINT 0x0000000f +#define GK104_IMAGE_FORMAT_RGB10_A2_UNORM 0x00000013 +#define GK104_IMAGE_FORMAT_RGB10_A2_UINT 0x00000015 +#define GK104_IMAGE_FORMAT_RGBA8_UNORM 0x00000018 +#define GK104_IMAGE_FORMAT_RGBA8_SNORM 0x0000001a +#define GK104_IMAGE_FORMAT_RGBA8_SINT 0x0000001b +#define GK104_IMAGE_FORMAT_RGBA8_UINT 0x0000001c +#define GK104_IMAGE_FORMAT_RG16_UNORM 0x0000001d +#define GK104_IMAGE_FORMAT_RG16_SNORM 0x0000001e +#define GK104_IMAGE_FORMAT_RG16_SINT 0x0000001f +#define GK104_IMAGE_FORMAT_RG16_UINT 0x00000020 +#define GK104_IMAGE_FORMAT_RG16_FLOAT 0x00000021 +#define GK104_IMAGE_FORMAT_R11G11B10_FLOAT 0x00000024 +#define GK104_IMAGE_FORMAT_R32_SINT 0x00000027 +#define GK104_IMAGE_FORMAT_R32_UINT 0x00000028 +#define GK104_IMAGE_FORMAT_R32_FLOAT 0x00000029 +#define GK104_IMAGE_FORMAT_RG8_UNORM 0x0000002e +#define GK104_IMAGE_FORMAT_RG8_SNORM 0x0000002f +#define GK104_IMAGE_FORMAT_RG8_SINT 0x00000030 +#define GK104_IMAGE_FORMAT_RG8_UINT 0x00000031 +#define GK104_IMAGE_FORMAT_R16_UNORM 0x00000032 +#define GK104_IMAGE_FORMAT_R16_SNORM 0x00000033 +#define GK104_IMAGE_FORMAT_R16_SINT 0x00000034 +#define GK104_IMAGE_FORMAT_R16_UINT 0x00000035 +#define GK104_IMAGE_FORMAT_R16_FLOAT 0x00000036 +#define GK104_IMAGE_FORMAT_R8_UNORM 0x00000037 +#define GK104_IMAGE_FORMAT_R8_SNORM 0x00000038 +#define GK104_IMAGE_FORMAT_R8_SINT 0x00000039 +#define GK104_IMAGE_FORMAT_R8_UINT 0x0000003a +#define G80_PGRAPH_DATA_ERROR_INVALID_OPERATION 0x00000003 +#define G80_PGRAPH_DATA_ERROR_INVALID_VALUE 0x00000004 +#define G80_PGRAPH_DATA_ERROR_INVALID_ENUM 0x00000005 +#define G80_PGRAPH_DATA_ERROR_INVALID_OBJECT 0x00000008 +#define G80_PGRAPH_DATA_ERROR_READ_ONLY_OBJECT 0x00000009 +#define G80_PGRAPH_DATA_ERROR_SUPERVISOR_OBJECT 0x0000000a +#define G80_PGRAPH_DATA_ERROR_INVALID_ADDRESS_ALIGNMENT 0x0000000b +#define G80_PGRAPH_DATA_ERROR_INVALID_BITFIELD 0x0000000c +#define G80_PGRAPH_DATA_ERROR_BEGIN_END_ACTIVE 0x0000000d +#define G80_PGRAPH_DATA_ERROR_SEMANTIC_COLOR_BACK_OVER_LIMIT 0x0000000e +#define G80_PGRAPH_DATA_ERROR_VIEWPORT_ID_NEEDS_GP 0x0000000f +#define G80_PGRAPH_DATA_ERROR_RT_DOUBLE_BIND 0x00000010 +#define G80_PGRAPH_DATA_ERROR_RT_TYPES_MISMATCH 0x00000011 +#define G80_PGRAPH_DATA_ERROR_RT_PITCH_WITH_ZETA 0x00000012 +#define G80_PGRAPH_DATA_ERROR_FP_TOO_FEW_REGS 0x00000015 +#define G80_PGRAPH_DATA_ERROR_ZETA_FORMAT_CSAA_MISMATCH 0x00000016 +#define G80_PGRAPH_DATA_ERROR_RT_PITCH_WITH_MSAA 0x00000017 +#define G80_PGRAPH_DATA_ERROR_FP_INTERPOLANT_START_OVER_LIMIT 0x00000018 +#define G80_PGRAPH_DATA_ERROR_SEMANTIC_LAYER_OVER_LIMIT 0x00000019 +#define G80_PGRAPH_DATA_ERROR_RT_INVALID_ALIGNMENT 0x0000001a +#define G80_PGRAPH_DATA_ERROR_SAMPLER_OVER_LIMIT 0x0000001b +#define G80_PGRAPH_DATA_ERROR_TEXTURE_OVER_LIMIT 0x0000001c +#define G80_PGRAPH_DATA_ERROR_GP_TOO_MANY_OUTPUTS 0x0000001e +#define G80_PGRAPH_DATA_ERROR_RT_BPP128_WITH_MS8 0x0000001f +#define G80_PGRAPH_DATA_ERROR_Z_OUT_OF_BOUNDS 0x00000021 +#define G80_PGRAPH_DATA_ERROR_XY_OUT_OF_BOUNDS 0x00000023 +#define G80_PGRAPH_DATA_ERROR_VP_ZERO_INPUTS 0x00000024 +#define G80_PGRAPH_DATA_ERROR_CP_MORE_PARAMS_THAN_SHARED 0x00000027 +#define G80_PGRAPH_DATA_ERROR_CP_NO_REG_SPACE_STRIPED 0x00000028 +#define G80_PGRAPH_DATA_ERROR_CP_NO_REG_SPACE_PACKED 0x00000029 +#define G80_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_WARPS 0x0000002a +#define G80_PGRAPH_DATA_ERROR_CP_BLOCK_SIZE_MISMATCH 0x0000002b +#define G80_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_LOCAL_WARPS 0x0000002c +#define G80_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_STACK_WARPS 0x0000002d +#define G80_PGRAPH_DATA_ERROR_CP_NO_BLOCKDIM_LATCH 0x0000002e +#define G80_PGRAPH_DATA_ERROR_ENG2D_FORMAT_MISMATCH 0x00000031 +#define G80_PGRAPH_DATA_ERROR_ENG2D_OPERATION_ILLEGAL_FOR_DST_FORMAT 0x00000033 +#define G80_PGRAPH_DATA_ERROR_ENG2D_FORMAT_MISMATCH_B 0x00000034 +#define G80_PGRAPH_DATA_ERROR_PRIMITIVE_ID_NEEDS_GP 0x0000003f +#define G80_PGRAPH_DATA_ERROR_SEMANTIC_VIEWPORT_OVER_LIMIT 0x00000044 +#define G80_PGRAPH_DATA_ERROR_SEMANTIC_COLOR_FRONT_OVER_LIMIT 0x00000045 +#define G80_PGRAPH_DATA_ERROR_LAYER_ID_NEEDS_GP 0x00000046 +#define G80_PGRAPH_DATA_ERROR_SEMANTIC_CLIP_OVER_LIMIT 0x00000047 +#define G80_PGRAPH_DATA_ERROR_SEMANTIC_PTSZ_OVER_LIMIT 0x00000048 +#define G80_PGRAPH_DATA_ERROR_M2MF_LINE_LENGTH_EXCEEDS_PITCH_IN 0x00000051 +#define G80_PGRAPH_DATA_ERROR_M2MF_LINE_LENGTH_EXCEEDS_PITCH_OUT 0x00000053 +#define G80_PGRAPH_DATA_ERROR_RT_PITCH_WITH_ZETA_GF100 0x00000098 +#define G80_PGRAPH_DATA_ERROR_ENG2D_UNALIGNED_PITCH_GF100 0x000000a5 +#define G80_CG_IDLE_TIMEOUT__MASK 0x0000003f +#define G80_CG_IDLE_TIMEOUT__SHIFT 0 +#define G80_CG_IDLE_TIMEOUT_ENABLE 0x00000040 +#define G80_CG_INTERFACE_REENABLE_TIME__MASK 0x000f0000 +#define G80_CG_INTERFACE_REENABLE_TIME__SHIFT 16 +#define G80_CG_THROTTLE_DUTY_M1__MASK 0x00f00000 +#define G80_CG_THROTTLE_DUTY_M1__SHIFT 20 +#define G80_CG_DELAY__MASK 0x0f000000 +#define G80_CG_DELAY__SHIFT 24 +#define G80_CG_CLOCK_THROTTLE_ENABLE 0x10000000 +#define G80_CG_THROTTLE_MODE__MASK 0x20000000 +#define G80_CG_THROTTLE_MODE__SHIFT 29 +#define G80_CG_THROTTLE_MODE_AUTO 0x00000000 +#define G80_CG_THROTTLE_MODE_MANUAL 0x20000000 +#define G80_CG_INTERFACE_THROTTLE_ENABLE 0x40000000 +#define G80_QUERY__SIZE 0x00000010 +#define G80_QUERY_COUNTER 0x00000000 + +#define G80_QUERY_RES 0x00000004 + +#define G80_QUERY_TIME 0x00000008 + + +#endif /* G80_DEFS_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/g80_texture.xml.h b/src/gallium/drivers/nouveau/nv50/g80_texture.xml.h new file mode 100644 index 00000000000..542963ca452 --- /dev/null +++ b/src/gallium/drivers/nouveau/nv50/g80_texture.xml.h @@ -0,0 +1,451 @@ +#ifndef G80_TEXTURE_XML +#define G80_TEXTURE_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/envytools/envytools/ +git clone https://github.com/envytools/envytools.git + +The rules-ng-ng source files this header was generated from are: +- /home/skeggsb/git/envytools/rnndb/../rnndb/graph/g80_texture.xml ( 18837 bytes, from 2016-01-14 23:54:22) +- /home/skeggsb/git/envytools/rnndb/copyright.xml ( 6456 bytes, from 2015-09-10 02:57:40) +- /home/skeggsb/git/envytools/rnndb/nvchipsets.xml ( 2908 bytes, from 2016-02-02 23:45:00) +- /home/skeggsb/git/envytools/rnndb/g80_defs.xml ( 21739 bytes, from 2016-02-04 00:29:42) +- /home/skeggsb/git/envytools/rnndb/nv_defs.xml ( 5388 bytes, from 2016-01-14 23:54:22) + +Copyright (C) 2006-2016 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- Ilia Mirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define G80_TSC_WRAP_WRAP 0x00000000 +#define G80_TSC_WRAP_MIRROR 0x00000001 +#define G80_TSC_WRAP_CLAMP_TO_EDGE 0x00000002 +#define G80_TSC_WRAP_BORDER 0x00000003 +#define G80_TSC_WRAP_CLAMP_OGL 0x00000004 +#define G80_TSC_WRAP_MIRROR_ONCE_CLAMP_TO_EDGE 0x00000005 +#define G80_TSC_WRAP_MIRROR_ONCE_BORDER 0x00000006 +#define G80_TSC_WRAP_MIRROR_ONCE_CLAMP_OGL 0x00000007 +#define G80_TIC__SIZE 0x00000020 +#define G80_TIC_0 0x00000000 +#define GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__MASK 0x80000000 +#define GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__SHIFT 31 +#define GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED_NO 0x00000000 +#define GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED_YES 0x80000000 +#define G84_TIC_0_PACK_COMPONENTS 0x40000000 +#define G80_TIC_0_W_SOURCE__MASK 0x38000000 +#define G80_TIC_0_W_SOURCE__SHIFT 27 +#define G80_TIC_0_Z_SOURCE__MASK 0x07000000 +#define G80_TIC_0_Z_SOURCE__SHIFT 24 +#define G80_TIC_0_Y_SOURCE__MASK 0x00e00000 +#define G80_TIC_0_Y_SOURCE__SHIFT 21 +#define G80_TIC_0_X_SOURCE__MASK 0x001c0000 +#define G80_TIC_0_X_SOURCE__SHIFT 18 +#define G80_TIC_0_A_DATA_TYPE__MASK 0x00038000 +#define G80_TIC_0_A_DATA_TYPE__SHIFT 15 +#define G80_TIC_0_B_DATA_TYPE__MASK 0x00007000 +#define G80_TIC_0_B_DATA_TYPE__SHIFT 12 +#define G80_TIC_0_G_DATA_TYPE__MASK 0x00000e00 +#define G80_TIC_0_G_DATA_TYPE__SHIFT 9 +#define G80_TIC_0_R_DATA_TYPE__MASK 0x000001c0 +#define G80_TIC_0_R_DATA_TYPE__SHIFT 6 +#define G80_TIC_0_COMPONENTS_SIZES__MASK 0x0000003f +#define G80_TIC_0_COMPONENTS_SIZES__SHIFT 0 +#define G80_TIC_0_COMPONENTS_SIZES_R32_G32_B32_A32 0x00000001 +#define GF100_TIC_0_COMPONENTS_SIZES_R32_G32_B32 0x00000002 +#define G80_TIC_0_COMPONENTS_SIZES_R16_G16_B16_A16 0x00000003 +#define G80_TIC_0_COMPONENTS_SIZES_R32_G32 0x00000004 +#define G80_TIC_0_COMPONENTS_SIZES_R32_B24G8 0x00000005 +#define G80_TIC_0_COMPONENTS_SIZES_X8B8G8R8 0x00000007 +#define G80_TIC_0_COMPONENTS_SIZES_A8B8G8R8 0x00000008 +#define G80_TIC_0_COMPONENTS_SIZES_A2B10G10R10 0x00000009 +#define G80_TIC_0_COMPONENTS_SIZES_R16_G16 0x0000000c +#define G80_TIC_0_COMPONENTS_SIZES_G8R24 0x0000000d +#define G80_TIC_0_COMPONENTS_SIZES_G24R8 0x0000000e +#define G80_TIC_0_COMPONENTS_SIZES_R32 0x0000000f +#define G80_TIC_0_COMPONENTS_SIZES_A4B4G4R4 0x00000012 +#define G80_TIC_0_COMPONENTS_SIZES_A5B5G5R1 0x00000013 +#define G80_TIC_0_COMPONENTS_SIZES_A1B5G5R5 0x00000014 +#define G80_TIC_0_COMPONENTS_SIZES_B5G6R5 0x00000015 +#define G80_TIC_0_COMPONENTS_SIZES_B6G5R5 0x00000016 +#define G80_TIC_0_COMPONENTS_SIZES_G8R8 0x00000018 +#define G80_TIC_0_COMPONENTS_SIZES_R16 0x0000001b +#define G80_TIC_0_COMPONENTS_SIZES_Y8_VIDEO 0x0000001c +#define G80_TIC_0_COMPONENTS_SIZES_R8 0x0000001d +#define G80_TIC_0_COMPONENTS_SIZES_G4R4 0x0000001e +#define G80_TIC_0_COMPONENTS_SIZES_R1 0x0000001f +#define G80_TIC_0_COMPONENTS_SIZES_E5B9G9R9_SHAREDEXP 0x00000020 +#define G80_TIC_0_COMPONENTS_SIZES_BF10GF11RF11 0x00000021 +#define G80_TIC_0_COMPONENTS_SIZES_G8B8G8R8 0x00000022 +#define G80_TIC_0_COMPONENTS_SIZES_B8G8R8G8 0x00000023 +#define G80_TIC_0_COMPONENTS_SIZES_DXT1 0x00000024 +#define G80_TIC_0_COMPONENTS_SIZES_DXT23 0x00000025 +#define G80_TIC_0_COMPONENTS_SIZES_DXT45 0x00000026 +#define G80_TIC_0_COMPONENTS_SIZES_DXN1 0x00000027 +#define G80_TIC_0_COMPONENTS_SIZES_DXN2 0x00000028 +#define GF100_TIC_0_COMPONENTS_SIZES_BC6H_SF16 0x00000010 +#define GF100_TIC_0_COMPONENTS_SIZES_BC6H_UF16 0x00000011 +#define GF100_TIC_0_COMPONENTS_SIZES_BC7U 0x00000017 +#define GK20A_TIC_0_COMPONENTS_SIZES_ETC2_RGB 0x00000006 +#define GK20A_TIC_0_COMPONENTS_SIZES_ETC2_RGB_PTA 0x0000000a +#define GK20A_TIC_0_COMPONENTS_SIZES_ETC2_RGBA 0x0000000b +#define GK20A_TIC_0_COMPONENTS_SIZES_EAC 0x00000019 +#define GK20A_TIC_0_COMPONENTS_SIZES_EACX2 0x0000001a +#define G80_TIC_0_COMPONENTS_SIZES_Z24S8 0x00000029 +#define G80_TIC_0_COMPONENTS_SIZES_X8Z24 0x0000002a +#define G80_TIC_0_COMPONENTS_SIZES_S8Z24 0x0000002b +#define G80_TIC_0_COMPONENTS_SIZES_X4V4Z24__COV4R4V 0x0000002c +#define G80_TIC_0_COMPONENTS_SIZES_X4V4Z24__COV8R8V 0x0000002d +#define G80_TIC_0_COMPONENTS_SIZES_V8Z24__COV4R12V 0x0000002e +#define G80_TIC_0_COMPONENTS_SIZES_ZF32 0x0000002f +#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X24S8 0x00000030 +#define G80_TIC_0_COMPONENTS_SIZES_X8Z24_X20V4S8__COV4R4V 0x00000031 +#define G80_TIC_0_COMPONENTS_SIZES_X8Z24_X20V4S8__COV8R8V 0x00000032 +#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X20V4X8__COV4R4V 0x00000033 +#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X20V4X8__COV8R8V 0x00000034 +#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X20V4S8__COV4R4V 0x00000035 +#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X20V4S8__COV8R8V 0x00000036 +#define G80_TIC_0_COMPONENTS_SIZES_X8Z24_X16V8S8__COV4R12V 0x00000037 +#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X16V8X8__COV4R12V 0x00000038 +#define G80_TIC_0_COMPONENTS_SIZES_ZF32_X16V8S8__COV4R12V 0x00000039 +#define G200_TIC_0_COMPONENTS_SIZES_Z16 0x0000003a +#define G200_TIC_0_COMPONENTS_SIZES_V8Z24__COV8R24V 0x0000003b +#define G200_TIC_0_COMPONENTS_SIZES_X8Z24_X16V8S8__COV8R24V 0x0000003c +#define G200_TIC_0_COMPONENTS_SIZES_ZF32_X16V8X8__COV8R24V 0x0000003d +#define G200_TIC_0_COMPONENTS_SIZES_ZF32_X16V8S8__COV8R24V 0x0000003e +#define G80_TIC_0_COMPONENTS_SIZES__MASK 0x0000003f +#define G80_TIC_0_COMPONENTS_SIZES__SHIFT 0 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_4X4 0x00000000 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_5X4 0x00000010 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_5X5 0x00000001 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_6X5 0x00000011 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_6X6 0x00000002 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_8X5 0x00000015 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_8X6 0x00000012 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_8X8 0x00000004 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_10X5 0x00000016 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_10X6 0x00000017 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_10X8 0x00000013 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_10X10 0x00000005 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_12X10 0x00000014 +#define GK20A_TIC_0_COMPONENTS_SIZES_ASTC_2D_12X12 0x00000006 + +#define G80_TIC_1 0x00000004 +#define G80_TIC_1_OFFSET_LOWER__MASK 0xffffffff +#define G80_TIC_1_OFFSET_LOWER__SHIFT 0 + +#define G80_TIC_2 0x00000008 +#define G80_TIC_2_OFFSET_UPPER__MASK 0x000000ff +#define G80_TIC_2_OFFSET_UPPER__SHIFT 0 +#define G84_TIC_2_ANISO_SPREAD_MAX_LOG2_LSB__MASK 0x00000300 +#define G84_TIC_2_ANISO_SPREAD_MAX_LOG2_LSB__SHIFT 8 +#define G80_TIC_2_SRGB_CONVERSION 0x00000400 +#define G84_TIC_2_ANISO_SPREAD_MAX_LOG2_MSB 0x00000800 +#define G80_TIC_2_LOD_ANISO_QUALITY_2 0x00001000 +#define G80_TIC_2_COLOR_KEY_OP 0x00002000 +#define G80_TIC_2_TEXTURE_TYPE__MASK 0x0003c000 +#define G80_TIC_2_TEXTURE_TYPE__SHIFT 14 +#define G80_TIC_2_TEXTURE_TYPE_ONE_D 0x00000000 +#define G80_TIC_2_TEXTURE_TYPE_TWO_D 0x00004000 +#define G80_TIC_2_TEXTURE_TYPE_THREE_D 0x00008000 +#define G80_TIC_2_TEXTURE_TYPE_CUBEMAP 0x0000c000 +#define G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY 0x00010000 +#define G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY 0x00014000 +#define G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER 0x00018000 +#define G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP 0x0001c000 +#define G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY 0x00020000 +#define G80_TIC_2_LAYOUT__MASK 0x00040000 +#define G80_TIC_2_LAYOUT__SHIFT 18 +#define G80_TIC_2_LAYOUT_BLOCKLINEAR 0x00000000 +#define G80_TIC_2_LAYOUT_PITCH 0x00040000 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH__MASK 0x00380000 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH__SHIFT 19 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH__MIN 0x00000000 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH__MAX 0x00000000 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_ONE 0x00000000 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_TWO 0x00080000 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_FOUR 0x00100000 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_EIGHT 0x00180000 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_SIXTEEN 0x00200000 +#define G80_TIC_2_GOBS_PER_BLOCK_WIDTH_THIRTYTWO 0x00280000 +#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT__MASK 0x01c00000 +#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT__SHIFT 22 +#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_ONE 0x00000000 +#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_TWO 0x00400000 +#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_FOUR 0x00800000 +#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_EIGHT 0x00c00000 +#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_SIXTEEN 0x01000000 +#define G80_TIC_2_GOBS_PER_BLOCK_HEIGHT_THIRTYTWO 0x01400000 +#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH__MASK 0x0e000000 +#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH__SHIFT 25 +#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_ONE 0x00000000 +#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_TWO 0x02000000 +#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_FOUR 0x04000000 +#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_EIGHT 0x06000000 +#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_SIXTEEN 0x08000000 +#define G80_TIC_2_GOBS_PER_BLOCK_DEPTH_THIRTYTWO 0x0a000000 +#define G80_TIC_2_SECTOR_PROMOTION__MASK 0x30000000 +#define G80_TIC_2_SECTOR_PROMOTION__SHIFT 28 +#define G80_TIC_2_SECTOR_PROMOTION_NO_PROMOTION 0x00000000 +#define G80_TIC_2_SECTOR_PROMOTION_PROMOTE_TO_2_V 0x10000000 +#define G80_TIC_2_SECTOR_PROMOTION_PROMOTE_TO_2_H 0x20000000 +#define G80_TIC_2_SECTOR_PROMOTION_PROMOTE_TO_4 0x30000000 +#define G80_TIC_2_BORDER_SOURCE__MASK 0x40000000 +#define G80_TIC_2_BORDER_SOURCE__SHIFT 30 +#define G80_TIC_2_BORDER_SOURCE_TEXTURE 0x00000000 +#define G80_TIC_2_BORDER_SOURCE_COLOR 0x40000000 +#define G80_TIC_2_NORMALIZED_COORDS 0x80000000 + +#define G80_TIC_3 0x0000000c +#define G80_TIC_3_PITCH__MASK 0x000fffff +#define G80_TIC_3_PITCH__SHIFT 0 +#define G80_TIC_3_LOD_ANISO_QUALITY__MASK 0x00100000 +#define G80_TIC_3_LOD_ANISO_QUALITY__SHIFT 20 +#define G80_TIC_3_LOD_ANISO_QUALITY_LOW 0x00000000 +#define G80_TIC_3_LOD_ANISO_QUALITY_HIGH 0x00100000 +#define G80_TIC_3_LOD_ISO_QUALITY__MASK 0x00200000 +#define G80_TIC_3_LOD_ISO_QUALITY__SHIFT 21 +#define G80_TIC_3_LOD_ISO_QUALITY_LOW 0x00000000 +#define G80_TIC_3_LOD_ISO_QUALITY_HIGH 0x00200000 +#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER__MASK 0x00c00000 +#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER__SHIFT 22 +#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER_NONE 0x00000000 +#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER_CONST_ONE 0x00400000 +#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER_CONST_TWO 0x00800000 +#define G80_TIC_3_ANISO_COARSE_SPREAD_MODIFIER_SQRT 0x00c00000 +#define G80_TIC_3_ANISO_SPREAD_SCALE__MASK 0x1f000000 +#define G80_TIC_3_ANISO_SPREAD_SCALE__SHIFT 24 +#define G80_TIC_3_USE_HEADER_OPT_CONTROL 0x20000000 +#define G84_TIC_3_ANISO_CLAMP_AT_MAX_LOD 0x40000000 +#define G84_TIC_3_ANISO_POW2 0x80000000 + +#define G80_TIC_4 0x00000010 +#define G80_TIC_4_WIDTH__MASK 0x3fffffff +#define G80_TIC_4_WIDTH__SHIFT 0 +#define G80_TIC_4_DEPTH_TEXTURE 0x40000000 +#define G84_TIC_4_USE_TEXTURE_HEADER_V2 0x80000000 + +#define G80_TIC_5 0x00000014 +#define G80_TIC_5_MAP_MIP_LEVEL__MASK 0xf0000000 +#define G80_TIC_5_MAP_MIP_LEVEL__SHIFT 28 +#define G80_TIC_5_DEPTH__MASK 0x0fff0000 +#define G80_TIC_5_DEPTH__SHIFT 16 +#define G80_TIC_5_HEIGHT__MASK 0x0000ffff +#define G80_TIC_5_HEIGHT__SHIFT 0 + +#define G80_TIC_6 0x00000018 +#define G80_TIC_6_TRILIN_OPT__MASK 0x0000001f +#define G80_TIC_6_TRILIN_OPT__SHIFT 0 +#define G80_TIC_6_MIP_LOD_BIAS__MASK 0x0003ffe0 +#define G80_TIC_6_MIP_LOD_BIAS__SHIFT 5 +#define G80_TIC_6_MIP_LOD_BIAS__RADIX 0x00000008 +#define G80_TIC_6_ANISO_BIAS__MASK 0x00780000 +#define G80_TIC_6_ANISO_BIAS__SHIFT 19 +#define G80_TIC_6_ANISO_BIAS__RADIX 0x00000004 +#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC__MASK 0x01800000 +#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC__SHIFT 23 +#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC_HALF 0x00000000 +#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC_ONE 0x00800000 +#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC_TWO 0x01000000 +#define G80_TIC_6_ANISO_FINE_SPREAD_FUNC_MAX 0x01800000 +#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC__MASK 0x06000000 +#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC__SHIFT 25 +#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC_HALF 0x00000000 +#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC_ONE 0x02000000 +#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC_TWO 0x04000000 +#define G80_TIC_6_ANISO_COARSE_SPREAD_FUNC_MAX 0x06000000 +#define G80_TIC_6_MAX_ANISOTROPY__MASK 0x38000000 +#define G80_TIC_6_MAX_ANISOTROPY__SHIFT 27 +#define G80_TIC_6_MAX_ANISOTROPY_1_TO_1 0x00000000 +#define G80_TIC_6_MAX_ANISOTROPY_2_TO_1 0x08000000 +#define G80_TIC_6_MAX_ANISOTROPY_4_TO_1 0x10000000 +#define G80_TIC_6_MAX_ANISOTROPY_6_TO_1 0x18000000 +#define G80_TIC_6_MAX_ANISOTROPY_8_TO_1 0x20000000 +#define G80_TIC_6_MAX_ANISOTROPY_10_TO_1 0x28000000 +#define G80_TIC_6_MAX_ANISOTROPY_12_TO_1 0x30000000 +#define G80_TIC_6_MAX_ANISOTROPY_16_TO_1 0x38000000 +#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER__MASK 0xc0000000 +#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER__SHIFT 30 +#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER_NONE 0x00000000 +#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER_CONST_ONE 0x40000000 +#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO 0x80000000 +#define G80_TIC_6_ANISO_FINE_SPREAD_MODIFIER_SQRT 0xc0000000 + +#define G80_TIC_7 0x0000001c +#define G80_TIC_7_COLOR_KEY_VALUE__MASK 0xffffffff +#define G80_TIC_7_COLOR_KEY_VALUE__SHIFT 0 + +#define G84_TIC_7 0x0000001c +#define G84_TIC_7_RES_VIEW_MIN_MIP_LEVEL__MASK 0x0000000f +#define G84_TIC_7_RES_VIEW_MIN_MIP_LEVEL__SHIFT 0 +#define G84_TIC_7_RES_VIEW_MAX_MIP_LEVEL__MASK 0x000000f0 +#define G84_TIC_7_RES_VIEW_MAX_MIP_LEVEL__SHIFT 4 +#define G84_TIC_7_HEIGHT_MSB 0x00000100 +#define G84_TIC_7_MULTI_SAMPLE_COUNT__MASK 0x0000f000 +#define G84_TIC_7_MULTI_SAMPLE_COUNT__SHIFT 12 +#define G84_TIC_7_MULTI_SAMPLE_COUNT_1X1 0x00000000 +#define G84_TIC_7_MULTI_SAMPLE_COUNT_2X1 0x00001000 +#define G84_TIC_7_MULTI_SAMPLE_COUNT_2X2 0x00002000 +#define G84_TIC_7_MULTI_SAMPLE_COUNT_4X2 0x00003000 +#define GT215_TIC_7_MULTI_SAMPLE_COUNT_4X2_D3D 0x00004000 +#define GT215_TIC_7_MULTI_SAMPLE_COUNT_2X1_D3D 0x00005000 +#define GF100_TIC_7_MULTI_SAMPLE_COUNT_4X4 0x00006000 +#define G84_TIC_7_MULTI_SAMPLE_COUNT_2X2_VC_4 0x00008000 +#define G84_TIC_7_MULTI_SAMPLE_COUNT_2X2_VC_12 0x00009000 +#define G84_TIC_7_MULTI_SAMPLE_COUNT_4X2_VC_8 0x0000a000 +#define GF100_TIC_7_MULTI_SAMPLE_COUNT_4X2_VC_24 0x0000b000 +#define G84_TIC_7_MIN_LOD_CLAMP__MASK 0x0fff0000 +#define G84_TIC_7_MIN_LOD_CLAMP__SHIFT 16 +#define G84_TIC_7_MIN_LOD_CLAMP__RADIX 0x00000008 +#define G84_TIC_7_DEPTH_MSB__MASK 0x70000000 +#define G84_TIC_7_DEPTH_MSB__SHIFT 28 + +#define G80_TSC__SIZE 0x00000020 +#define G80_TSC_0 0x00000000 +#define G80_TSC_0_ADDRESS_U__MASK 0x00000007 +#define G80_TSC_0_ADDRESS_U__SHIFT 0 +#define G80_TSC_0_ADDRESS_V__MASK 0x00000038 +#define G80_TSC_0_ADDRESS_V__SHIFT 3 +#define G80_TSC_0_ADDRESS_P__MASK 0x000001c0 +#define G80_TSC_0_ADDRESS_P__SHIFT 6 +#define G80_TSC_0_DEPTH_COMPARE 0x00000200 +#define G80_TSC_0_DEPTH_COMPARE_FUNC__MASK 0x00001c00 +#define G80_TSC_0_DEPTH_COMPARE_FUNC__SHIFT 10 +#define G80_TSC_0_DEPTH_COMPARE_FUNC_NEVER 0x00000000 +#define G80_TSC_0_DEPTH_COMPARE_FUNC_LESS 0x00000400 +#define G80_TSC_0_DEPTH_COMPARE_FUNC_EQUAL 0x00000800 +#define G80_TSC_0_DEPTH_COMPARE_FUNC_LEQUAL 0x00000c00 +#define G80_TSC_0_DEPTH_COMPARE_FUNC_GREATER 0x00001000 +#define G80_TSC_0_DEPTH_COMPARE_FUNC_NOTEQUAL 0x00001400 +#define G80_TSC_0_DEPTH_COMPARE_FUNC_GEQUAL 0x00001800 +#define G80_TSC_0_DEPTH_COMPARE_FUNC_ALWAYS 0x00001c00 +#define G80_TSC_0_SRGB_CONVERSION 0x00002000 +#define G80_TSC_0_FONT_FILTER_WIDTH__MASK 0x0001c000 +#define G80_TSC_0_FONT_FILTER_WIDTH__SHIFT 14 +#define G80_TSC_0_FONT_FILTER_HEIGHT__MASK 0x000e0000 +#define G80_TSC_0_FONT_FILTER_HEIGHT__SHIFT 17 +#define G80_TSC_0_MAX_ANISOTROPY__MASK 0x00700000 +#define G80_TSC_0_MAX_ANISOTROPY__SHIFT 20 +#define G80_TSC_0_MAX_ANISOTROPY_1_TO_1 0x00000000 +#define G80_TSC_0_MAX_ANISOTROPY_2_TO_1 0x00100000 +#define G80_TSC_0_MAX_ANISOTROPY_4_TO_1 0x00200000 +#define G80_TSC_0_MAX_ANISOTROPY_6_TO_1 0x00300000 +#define G80_TSC_0_MAX_ANISOTROPY_8_TO_1 0x00400000 +#define G80_TSC_0_MAX_ANISOTROPY_10_TO_1 0x00500000 +#define G80_TSC_0_MAX_ANISOTROPY_12_TO_1 0x00600000 +#define G80_TSC_0_MAX_ANISOTROPY_16_TO_1 0x00700000 + +#define G80_TSC_1 0x00000004 +#define G80_TSC_1_MAG_FILTER__MASK 0x00000003 +#define G80_TSC_1_MAG_FILTER__SHIFT 0 +#define G80_TSC_1_MAG_FILTER_NEAREST 0x00000001 +#define G80_TSC_1_MAG_FILTER_LINEAR 0x00000002 +#define G80_TSC_1_MIN_FILTER__MASK 0x00000030 +#define G80_TSC_1_MIN_FILTER__SHIFT 4 +#define G80_TSC_1_MIN_FILTER_NEAREST 0x00000010 +#define G80_TSC_1_MIN_FILTER_LINEAR 0x00000020 +#define G80_TSC_1_MIP_FILTER__MASK 0x000000c0 +#define G80_TSC_1_MIP_FILTER__SHIFT 6 +#define G80_TSC_1_MIP_FILTER_NONE 0x00000040 +#define G80_TSC_1_MIP_FILTER_NEAREST 0x00000080 +#define G80_TSC_1_MIP_FILTER_LINEAR 0x000000c0 +#define GK104_TSC_1_CUBEMAP_INTERFACE_FILTERING 0x00000200 +#define G80_TSC_1_MIP_LOD_BIAS__MASK 0x01fff000 +#define G80_TSC_1_MIP_LOD_BIAS__SHIFT 12 +#define G80_TSC_1_MIP_LOD_BIAS__RADIX 0x00000008 +#define GK104_TSC_1_FLOAT_COORD_NORMALIZATION__MASK 0x02000000 +#define GK104_TSC_1_FLOAT_COORD_NORMALIZATION__SHIFT 25 +#define GK104_TSC_1_FLOAT_COORD_NORMALIZATION_USE_HEADER_SETTING 0x00000000 +#define GK104_TSC_1_FLOAT_COORD_NORMALIZATION_FORCE_UNNORMALIZED_COORDS 0x02000000 +#define G80_TSC_1_TRILIN_OPT__MASK 0x7c000000 +#define G80_TSC_1_TRILIN_OPT__SHIFT 26 + +#define G80_TSC_2 0x00000008 +#define G80_TSC_2_MIN_LOD_CLAMP__MASK 0x00000fff +#define G80_TSC_2_MIN_LOD_CLAMP__SHIFT 0 +#define G80_TSC_2_MIN_LOD_CLAMP__RADIX 0x00000008 +#define G80_TSC_2_MAX_LOD_CLAMP__MASK 0x00fff000 +#define G80_TSC_2_MAX_LOD_CLAMP__SHIFT 12 +#define G80_TSC_2_MAX_LOD_CLAMP__RADIX 0x00000008 +#define G80_TSC_2_SRGB_BORDER_COLOR_R__MASK 0xff000000 +#define G80_TSC_2_SRGB_BORDER_COLOR_R__SHIFT 24 + +#define G80_TSC_3 0x0000000c +#define G80_TSC_3_SRGB_BORDER_COLOR_G__MASK 0x000ff000 +#define G80_TSC_3_SRGB_BORDER_COLOR_G__SHIFT 12 +#define G80_TSC_3_SRGB_BORDER_COLOR_B__MASK 0x0ff00000 +#define G80_TSC_3_SRGB_BORDER_COLOR_B__SHIFT 20 + +#define G80_TSC_4 0x00000010 +#define G80_TSC_4_BORDER_COLOR_R__MASK 0xffffffff +#define G80_TSC_4_BORDER_COLOR_R__SHIFT 0 + +#define G80_TSC_5 0x00000014 +#define G80_TSC_5_BORDER_COLOR_G__MASK 0xffffffff +#define G80_TSC_5_BORDER_COLOR_G__SHIFT 0 + +#define G80_TSC_6 0x00000018 +#define G80_TSC_6_BORDER_COLOR_B__MASK 0xffffffff +#define G80_TSC_6_BORDER_COLOR_B__SHIFT 0 + +#define G80_TSC_7 0x0000001c +#define G80_TSC_7_BORDER_COLOR_A__MASK 0xffffffff +#define G80_TSC_7_BORDER_COLOR_A__SHIFT 0 + + +#endif /* G80_TEXTURE_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_compute.c b/src/gallium/drivers/nouveau/nv50/nv50_compute.c index 6d23fd66945..04488d6d0a6 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_compute.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_compute.c @@ -270,13 +270,11 @@ nv50_compute_find_symbol(struct nv50_context *nv50, uint32_t label) } void -nv50_launch_grid(struct pipe_context *pipe, - const uint *block_layout, const uint *grid_layout, - uint32_t label, const void *input) +nv50_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) { struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_pushbuf *push = nv50->base.pushbuf; - unsigned block_size = block_layout[0] * block_layout[1] * block_layout[2]; + unsigned block_size = info->block[0] * info->block[1] * info->block[2]; struct nv50_program *cp = nv50->compprog; bool ret; @@ -286,10 +284,10 @@ nv50_launch_grid(struct pipe_context *pipe, return; } - nv50_compute_upload_input(nv50, input); + nv50_compute_upload_input(nv50, info->input); BEGIN_NV04(push, NV50_COMPUTE(CP_START_ID), 1); - PUSH_DATA (push, nv50_compute_find_symbol(nv50, label)); + PUSH_DATA (push, nv50_compute_find_symbol(nv50, info->pc)); BEGIN_NV04(push, NV50_COMPUTE(SHARED_SIZE), 1); PUSH_DATA (push, align(cp->cp.smem_size + cp->parm_size + 0x10, 0x40)); @@ -298,14 +296,14 @@ nv50_launch_grid(struct pipe_context *pipe, /* grid/block setup */ BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_XY), 2); - PUSH_DATA (push, block_layout[1] << 16 | block_layout[0]); - PUSH_DATA (push, block_layout[2]); + PUSH_DATA (push, info->block[1] << 16 | info->block[0]); + PUSH_DATA (push, info->block[2]); BEGIN_NV04(push, NV50_COMPUTE(BLOCK_ALLOC), 1); PUSH_DATA (push, 1 << 16 | block_size); BEGIN_NV04(push, NV50_COMPUTE(BLOCKDIM_LATCH), 1); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_COMPUTE(GRIDDIM), 1); - PUSH_DATA (push, grid_layout[1] << 16 | grid_layout[0]); + PUSH_DATA (push, info->grid[1] << 16 | info->grid[0]); BEGIN_NV04(push, NV50_COMPUTE(GRIDID), 1); PUSH_DATA (push, 1); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index 342ec96d62c..2620d03b999 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -153,6 +153,7 @@ struct nv50_context { uint32_t textures_coherent[3]; struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS]; unsigned num_samplers[3]; + bool seamless_cube_map; uint8_t num_so_targets; uint8_t so_targets_dirty; @@ -322,7 +323,6 @@ nv98_video_buffer_create(struct pipe_context *pipe, /* nv50_compute.c */ void -nv50_launch_grid(struct pipe_context *, const uint *, const uint *, - uint32_t, const void *); +nv50_launch_grid(struct pipe_context *, const struct pipe_grid_info *); #endif diff --git a/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h deleted file mode 100644 index aad2a851691..00000000000 --- a/src/gallium/drivers/nouveau/nv50/nv50_defs.xml.h +++ /dev/null @@ -1,263 +0,0 @@ -#ifndef NV50_DEFS_XML -#define NV50_DEFS_XML - -/* Autogenerated file, DO NOT EDIT manually! - -This file was generated by the rules-ng-ng headergen tool in this git repository: -http://github.com/envytools/envytools/ -git clone https://github.com/envytools/envytools.git - -The rules-ng-ng source files this header was generated from are: -- rnndb/g80_defs.xml ( 18175 bytes, from 2014-09-25 06:32:11) -- rnndb/copyright.xml ( 6452 bytes, from 2013-05-14 03:57:49) -- rnndb/nvchipsets.xml ( 2759 bytes, from 2014-10-05 01:51:02) - -Copyright (C) 2006-2014 by the following authors: -- Artur Huillet <[email protected]> (ahuillet) -- Ben Skeggs (darktama, darktama_) -- B. R. <[email protected]> (koala_br) -- Carlos Martin <[email protected]> (carlosmn) -- Christoph Bumiller <[email protected]> (calim, chrisbmr) -- Dawid Gajownik <[email protected]> (gajownik) -- Dmitry Baryshkov -- Dmitry Eremin-Solenikov <[email protected]> (lumag) -- EdB <[email protected]> (edb_) -- Erik Waling <[email protected]> (erikwaling) -- Francisco Jerez <[email protected]> (curro) -- imirkin <[email protected]> (imirkin) -- jb17bsome <[email protected]> (jb17bsome) -- Jeremy Kolb <[email protected]> (kjeremy) -- Laurent Carlier <[email protected]> (lordheavy) -- Luca Barbieri <[email protected]> (lb, lb1) -- Maarten Maathuis <[email protected]> (stillunknown) -- Marcin Kościelnicki <[email protected]> (mwk, koriakin) -- Mark Carey <[email protected]> (careym) -- Matthieu Castet <[email protected]> (mat-c) -- nvidiaman <[email protected]> (nvidiaman) -- Patrice Mandin <[email protected]> (pmandin, pmdata) -- Pekka Paalanen <[email protected]> (pq, ppaalanen) -- Peter Popov <[email protected]> (ironpeter) -- Richard Hughes <[email protected]> (hughsient) -- Rudi Cilibrasi <[email protected]> (cilibrar) -- Serge Martin -- Simon Raffeiner -- Stephane Loeuillet <[email protected]> (leroutier) -- Stephane Marchesin <[email protected]> (marcheu) -- sturmflut <[email protected]> (sturmflut) -- Sylvain Munaut <[email protected]> -- Victor Stinner <[email protected]> (haypo) -- Wladmir van der Laan <[email protected]> (miathan6) -- Younes Manton <[email protected]> (ymanton) - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - - -#define NV50_VSTATUS_IDLE 0x00000000 -#define NV50_VSTATUS_BUSY 0x00000001 -#define NV50_VSTATUS_UNK2 0x00000002 -#define NV50_VSTATUS_WAITING 0x00000003 -#define NV50_VSTATUS_BLOCKED 0x00000005 -#define NV50_VSTATUS_FAULTED 0x00000006 -#define NV50_VSTATUS_PAUSED 0x00000007 -#define NV50_SURFACE_FORMAT_BITMAP 0x0000001c -#define NV50_SURFACE_FORMAT_UNK1D 0x0000001d -#define NV50_SURFACE_FORMAT_RGBA32_FLOAT 0x000000c0 -#define NV50_SURFACE_FORMAT_RGBA32_SINT 0x000000c1 -#define NV50_SURFACE_FORMAT_RGBA32_UINT 0x000000c2 -#define NV50_SURFACE_FORMAT_RGBX32_FLOAT 0x000000c3 -#define NV50_SURFACE_FORMAT_RGBX32_SINT 0x000000c4 -#define NV50_SURFACE_FORMAT_RGBX32_UINT 0x000000c5 -#define NV50_SURFACE_FORMAT_RGBA16_UNORM 0x000000c6 -#define NV50_SURFACE_FORMAT_RGBA16_SNORM 0x000000c7 -#define NV50_SURFACE_FORMAT_RGBA16_SINT 0x000000c8 -#define NV50_SURFACE_FORMAT_RGBA16_UINT 0x000000c9 -#define NV50_SURFACE_FORMAT_RGBA16_FLOAT 0x000000ca -#define NV50_SURFACE_FORMAT_RG32_FLOAT 0x000000cb -#define NV50_SURFACE_FORMAT_RG32_SINT 0x000000cc -#define NV50_SURFACE_FORMAT_RG32_UINT 0x000000cd -#define NV50_SURFACE_FORMAT_RGBX16_FLOAT 0x000000ce -#define NV50_SURFACE_FORMAT_BGRA8_UNORM 0x000000cf -#define NV50_SURFACE_FORMAT_BGRA8_SRGB 0x000000d0 -#define NV50_SURFACE_FORMAT_RGB10_A2_UNORM 0x000000d1 -#define NV50_SURFACE_FORMAT_RGB10_A2_UINT 0x000000d2 -#define NV50_SURFACE_FORMAT_RGBA8_UNORM 0x000000d5 -#define NV50_SURFACE_FORMAT_RGBA8_SRGB 0x000000d6 -#define NV50_SURFACE_FORMAT_RGBA8_SNORM 0x000000d7 -#define NV50_SURFACE_FORMAT_RGBA8_SINT 0x000000d8 -#define NV50_SURFACE_FORMAT_RGBA8_UINT 0x000000d9 -#define NV50_SURFACE_FORMAT_RG16_UNORM 0x000000da -#define NV50_SURFACE_FORMAT_RG16_SNORM 0x000000db -#define NV50_SURFACE_FORMAT_RG16_SINT 0x000000dc -#define NV50_SURFACE_FORMAT_RG16_UINT 0x000000dd -#define NV50_SURFACE_FORMAT_RG16_FLOAT 0x000000de -#define NV50_SURFACE_FORMAT_BGR10_A2_UNORM 0x000000df -#define NV50_SURFACE_FORMAT_R11G11B10_FLOAT 0x000000e0 -#define NV50_SURFACE_FORMAT_R32_SINT 0x000000e3 -#define NV50_SURFACE_FORMAT_R32_UINT 0x000000e4 -#define NV50_SURFACE_FORMAT_R32_FLOAT 0x000000e5 -#define NV50_SURFACE_FORMAT_BGRX8_UNORM 0x000000e6 -#define NV50_SURFACE_FORMAT_BGRX8_SRGB 0x000000e7 -#define NV50_SURFACE_FORMAT_B5G6R5_UNORM 0x000000e8 -#define NV50_SURFACE_FORMAT_BGR5_A1_UNORM 0x000000e9 -#define NV50_SURFACE_FORMAT_RG8_UNORM 0x000000ea -#define NV50_SURFACE_FORMAT_RG8_SNORM 0x000000eb -#define NV50_SURFACE_FORMAT_RG8_SINT 0x000000ec -#define NV50_SURFACE_FORMAT_RG8_UINT 0x000000ed -#define NV50_SURFACE_FORMAT_R16_UNORM 0x000000ee -#define NV50_SURFACE_FORMAT_R16_SNORM 0x000000ef -#define NV50_SURFACE_FORMAT_R16_SINT 0x000000f0 -#define NV50_SURFACE_FORMAT_R16_UINT 0x000000f1 -#define NV50_SURFACE_FORMAT_R16_FLOAT 0x000000f2 -#define NV50_SURFACE_FORMAT_R8_UNORM 0x000000f3 -#define NV50_SURFACE_FORMAT_R8_SNORM 0x000000f4 -#define NV50_SURFACE_FORMAT_R8_SINT 0x000000f5 -#define NV50_SURFACE_FORMAT_R8_UINT 0x000000f6 -#define NV50_SURFACE_FORMAT_A8_UNORM 0x000000f7 -#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM 0x000000f8 -#define NV50_SURFACE_FORMAT_RGBX8_UNORM 0x000000f9 -#define NV50_SURFACE_FORMAT_RGBX8_SRGB 0x000000fa -#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFB 0x000000fb -#define NV50_SURFACE_FORMAT_BGR5_X1_UNORM_UNKFC 0x000000fc -#define NV50_SURFACE_FORMAT_BGRX8_UNORM_UNKFD 0x000000fd -#define NV50_SURFACE_FORMAT_BGRX8_UNORM_UNKFE 0x000000fe -#define NV50_SURFACE_FORMAT_Y32_UINT_UNKFF 0x000000ff -#define NV50_ZETA_FORMAT_Z32_FLOAT 0x0000000a -#define NV50_ZETA_FORMAT_Z16_UNORM 0x00000013 -#define NV50_ZETA_FORMAT_S8_Z24_UNORM 0x00000014 -#define NV50_ZETA_FORMAT_Z24_X8_UNORM 0x00000015 -#define NV50_ZETA_FORMAT_Z24_S8_UNORM 0x00000016 -#define NV50_ZETA_FORMAT_Z24_C8_UNORM 0x00000018 -#define NV50_ZETA_FORMAT_Z32_S8_X24_FLOAT 0x00000019 -#define NV50_ZETA_FORMAT_Z24_X8_S8_C8_X16_UNORM 0x0000001d -#define NV50_ZETA_FORMAT_Z32_X8_C8_X16_FLOAT 0x0000001e -#define NV50_ZETA_FORMAT_Z32_S8_C8_X16_FLOAT 0x0000001f -#define NVE4_IMAGE_FORMAT_RGBA32_FLOAT 0x00000002 -#define NVE4_IMAGE_FORMAT_RGBA32_SINT 0x00000003 -#define NVE4_IMAGE_FORMAT_RGBA32_UINT 0x00000004 -#define NVE4_IMAGE_FORMAT_RGBA16_UNORM 0x00000008 -#define NVE4_IMAGE_FORMAT_RGBA16_SNORM 0x00000009 -#define NVE4_IMAGE_FORMAT_RGBA16_SINT 0x0000000a -#define NVE4_IMAGE_FORMAT_RGBA16_UINT 0x0000000b -#define NVE4_IMAGE_FORMAT_RGBA16_FLOAT 0x0000000c -#define NVE4_IMAGE_FORMAT_RG32_FLOAT 0x0000000d -#define NVE4_IMAGE_FORMAT_RG32_SINT 0x0000000e -#define NVE4_IMAGE_FORMAT_RG32_UINT 0x0000000f -#define NVE4_IMAGE_FORMAT_RGB10_A2_UNORM 0x00000013 -#define NVE4_IMAGE_FORMAT_RGB10_A2_UINT 0x00000015 -#define NVE4_IMAGE_FORMAT_RGBA8_UNORM 0x00000018 -#define NVE4_IMAGE_FORMAT_RGBA8_SNORM 0x0000001a -#define NVE4_IMAGE_FORMAT_RGBA8_SINT 0x0000001b -#define NVE4_IMAGE_FORMAT_RGBA8_UINT 0x0000001c -#define NVE4_IMAGE_FORMAT_RG16_UNORM 0x0000001d -#define NVE4_IMAGE_FORMAT_RG16_SNORM 0x0000001e -#define NVE4_IMAGE_FORMAT_RG16_SINT 0x0000001f -#define NVE4_IMAGE_FORMAT_RG16_UINT 0x00000020 -#define NVE4_IMAGE_FORMAT_RG16_FLOAT 0x00000021 -#define NVE4_IMAGE_FORMAT_R11G11B10_FLOAT 0x00000024 -#define NVE4_IMAGE_FORMAT_R32_SINT 0x00000027 -#define NVE4_IMAGE_FORMAT_R32_UINT 0x00000028 -#define NVE4_IMAGE_FORMAT_R32_FLOAT 0x00000029 -#define NVE4_IMAGE_FORMAT_RG8_UNORM 0x0000002e -#define NVE4_IMAGE_FORMAT_RG8_SNORM 0x0000002f -#define NVE4_IMAGE_FORMAT_RG8_SINT 0x00000030 -#define NVE4_IMAGE_FORMAT_RG8_UINT 0x00000031 -#define NVE4_IMAGE_FORMAT_R16_UNORM 0x00000032 -#define NVE4_IMAGE_FORMAT_R16_SNORM 0x00000033 -#define NVE4_IMAGE_FORMAT_R16_SINT 0x00000034 -#define NVE4_IMAGE_FORMAT_R16_UINT 0x00000035 -#define NVE4_IMAGE_FORMAT_R16_FLOAT 0x00000036 -#define NVE4_IMAGE_FORMAT_R8_UNORM 0x00000037 -#define NVE4_IMAGE_FORMAT_R8_SNORM 0x00000038 -#define NVE4_IMAGE_FORMAT_R8_SINT 0x00000039 -#define NVE4_IMAGE_FORMAT_R8_UINT 0x0000003a -#define NV50_PGRAPH_DATA_ERROR_INVALID_OPERATION 0x00000003 -#define NV50_PGRAPH_DATA_ERROR_INVALID_VALUE 0x00000004 -#define NV50_PGRAPH_DATA_ERROR_INVALID_ENUM 0x00000005 -#define NV50_PGRAPH_DATA_ERROR_INVALID_OBJECT 0x00000008 -#define NV50_PGRAPH_DATA_ERROR_READ_ONLY_OBJECT 0x00000009 -#define NV50_PGRAPH_DATA_ERROR_SUPERVISOR_OBJECT 0x0000000a -#define NV50_PGRAPH_DATA_ERROR_INVALID_ADDRESS_ALIGNMENT 0x0000000b -#define NV50_PGRAPH_DATA_ERROR_INVALID_BITFIELD 0x0000000c -#define NV50_PGRAPH_DATA_ERROR_BEGIN_END_ACTIVE 0x0000000d -#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_COLOR_BACK_OVER_LIMIT 0x0000000e -#define NV50_PGRAPH_DATA_ERROR_VIEWPORT_ID_NEEDS_GP 0x0000000f -#define NV50_PGRAPH_DATA_ERROR_RT_DOUBLE_BIND 0x00000010 -#define NV50_PGRAPH_DATA_ERROR_RT_TYPES_MISMATCH 0x00000011 -#define NV50_PGRAPH_DATA_ERROR_RT_LINEAR_WITH_ZETA 0x00000012 -#define NV50_PGRAPH_DATA_ERROR_FP_TOO_FEW_REGS 0x00000015 -#define NV50_PGRAPH_DATA_ERROR_ZETA_FORMAT_CSAA_MISMATCH 0x00000016 -#define NV50_PGRAPH_DATA_ERROR_RT_LINEAR_WITH_MSAA 0x00000017 -#define NV50_PGRAPH_DATA_ERROR_FP_INTERPOLANT_START_OVER_LIMIT 0x00000018 -#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_LAYER_OVER_LIMIT 0x00000019 -#define NV50_PGRAPH_DATA_ERROR_RT_INVALID_ALIGNMENT 0x0000001a -#define NV50_PGRAPH_DATA_ERROR_SAMPLER_OVER_LIMIT 0x0000001b -#define NV50_PGRAPH_DATA_ERROR_TEXTURE_OVER_LIMIT 0x0000001c -#define NV50_PGRAPH_DATA_ERROR_GP_TOO_MANY_OUTPUTS 0x0000001e -#define NV50_PGRAPH_DATA_ERROR_RT_BPP128_WITH_MS8 0x0000001f -#define NV50_PGRAPH_DATA_ERROR_Z_OUT_OF_BOUNDS 0x00000021 -#define NV50_PGRAPH_DATA_ERROR_XY_OUT_OF_BOUNDS 0x00000023 -#define NV50_PGRAPH_DATA_ERROR_VP_ZERO_INPUTS 0x00000024 -#define NV50_PGRAPH_DATA_ERROR_CP_MORE_PARAMS_THAN_SHARED 0x00000027 -#define NV50_PGRAPH_DATA_ERROR_CP_NO_REG_SPACE_STRIPED 0x00000028 -#define NV50_PGRAPH_DATA_ERROR_CP_NO_REG_SPACE_PACKED 0x00000029 -#define NV50_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_WARPS 0x0000002a -#define NV50_PGRAPH_DATA_ERROR_CP_BLOCK_SIZE_MISMATCH 0x0000002b -#define NV50_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_LOCAL_WARPS 0x0000002c -#define NV50_PGRAPH_DATA_ERROR_CP_NOT_ENOUGH_STACK_WARPS 0x0000002d -#define NV50_PGRAPH_DATA_ERROR_CP_NO_BLOCKDIM_LATCH 0x0000002e -#define NV50_PGRAPH_DATA_ERROR_ENG2D_FORMAT_MISMATCH 0x00000031 -#define NV50_PGRAPH_DATA_ERROR_ENG2D_OPERATION_ILLEGAL_FOR_DST_FORMAT 0x00000033 -#define NV50_PGRAPH_DATA_ERROR_ENG2D_FORMAT_MISMATCH_B 0x00000034 -#define NV50_PGRAPH_DATA_ERROR_PRIMITIVE_ID_NEEDS_GP 0x0000003f -#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_VIEWPORT_OVER_LIMIT 0x00000044 -#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_COLOR_FRONT_OVER_LIMIT 0x00000045 -#define NV50_PGRAPH_DATA_ERROR_LAYER_ID_NEEDS_GP 0x00000046 -#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_CLIP_OVER_LIMIT 0x00000047 -#define NV50_PGRAPH_DATA_ERROR_SEMANTIC_PTSZ_OVER_LIMIT 0x00000048 -#define NV50_PGRAPH_DATA_ERROR_M2MF_LINE_LENGTH_EXCEEDS_PITCH_IN 0x00000051 -#define NV50_PGRAPH_DATA_ERROR_M2MF_LINE_LENGTH_EXCEEDS_PITCH_OUT 0x00000053 -#define NV50_PGRAPH_DATA_ERROR_RT_LINEAR_WITH_ZETA_GF100 0x00000098 -#define NV50_PGRAPH_DATA_ERROR_ENG2D_UNALIGNED_PITCH_GF100 0x000000a5 -#define NV50_CG_IDLE_TIMEOUT__MASK 0x0000003f -#define NV50_CG_IDLE_TIMEOUT__SHIFT 0 -#define NV50_CG_IDLE_TIMEOUT_ENABLE 0x00000040 -#define NV50_CG_INTERFACE_REENABLE_TIME__MASK 0x000f0000 -#define NV50_CG_INTERFACE_REENABLE_TIME__SHIFT 16 -#define NV50_CG_THROTTLE_DUTY_M1__MASK 0x00f00000 -#define NV50_CG_THROTTLE_DUTY_M1__SHIFT 20 -#define NV50_CG_DELAY__MASK 0x0f000000 -#define NV50_CG_DELAY__SHIFT 24 -#define NV50_CG_CLOCK_THROTTLE_ENABLE 0x10000000 -#define NV50_CG_THROTTLE_MODE__MASK 0x20000000 -#define NV50_CG_THROTTLE_MODE__SHIFT 29 -#define NV50_CG_THROTTLE_MODE_AUTO 0x00000000 -#define NV50_CG_THROTTLE_MODE_MANUAL 0x20000000 -#define NV50_CG_INTERFACE_THROTTLE_ENABLE 0x40000000 -#define NV50_QUERY__SIZE 0x00000010 -#define NV50_QUERY_COUNTER 0x00000000 - -#define NV50_QUERY_RES 0x00000004 - -#define NV50_QUERY_TIME 0x00000008 - - -#endif /* NV50_DEFS_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_formats.c b/src/gallium/drivers/nouveau/nv50/nv50_formats.c index 49a93bf1d91..717067cf2f7 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c @@ -27,8 +27,8 @@ # include "nv50/nv50_screen.h" # include "nv50/nv50_3d.xml.h" #endif -#include "nv50/nv50_texture.xml.h" -#include "nv50/nv50_defs.xml.h" +#include "nv50/g80_texture.xml.h" +#include "nv50/g80_defs.xml.h" #include "pipe/p_defines.h" @@ -39,10 +39,8 @@ * C: render target (color), blendable only on nvc0 * D: scanout/display target, blendable * Z: depth/stencil - * V: vertex fetch * I: image / surface, implies T */ -#define U_V PIPE_BIND_VERTEX_BUFFER #define U_T PIPE_BIND_SAMPLER_VIEW #define U_I PIPE_BIND_SHADER_BUFFER | PIPE_BIND_SHADER_IMAGE | PIPE_BIND_COMPUTE_RESOURCE #define U_TR PIPE_BIND_RENDER_TARGET | U_T @@ -51,38 +49,273 @@ #define U_IB PIPE_BIND_BLENDABLE | U_IR #define U_TD PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | U_TB #define U_TZ PIPE_BIND_DEPTH_STENCIL | U_T -#define U_TV U_V | U_T -#define U_TRV U_V | U_TR -#define U_IRV U_V | U_IR -#define U_TBV U_V | U_TB -#define U_IBV U_V | U_IB -#define U_TDV U_V | U_TD #if NOUVEAU_DRIVER == 0xc0 # define U_TC U_TB # define U_IC U_IB -# define U_TCV U_TBV -# define U_ICV U_IBV # define U_t U_T -# define U_tV U_TV #else # define U_TC U_TR # define U_IC U_IR -# define U_TCV U_TRV -# define U_ICV U_IRV # define U_t 0 -# define U_tV U_V #endif -#define NV50_ZETA_FORMAT_NONE 0 -#define NV50_SURFACE_FORMAT_NONE 0 +#define G80_ZETA_FORMAT_NONE 0 +#define G80_SURFACE_FORMAT_NONE 0 -/* for vertex buffers: */ -#define NV50_TIC_0_FMT_8_8_8 NV50_TIC_0_FMT_8_8_8_8 -#define NV50_TIC_0_FMT_16_16_16 NV50_TIC_0_FMT_16_16_16_16 -#define NV50_TIC_0_FMT_32_32_32 NVC0_TIC_0_FMT_32_32_32 -#define NV50_TIC_0_FMT_BPTC NVC0_TIC_0_FMT_BPTC -#define NV50_TIC_0_FMT_BPTC_FLOAT NVC0_TIC_0_FMT_BPTC_FLOAT -#define NV50_TIC_0_FMT_BPTC_UFLOAT NVC0_TIC_0_FMT_BPTC_UFLOAT +#define SF_A(sz) G80_TIC_0_COMPONENTS_SIZES_##sz +#define SF_B(sz) G200_TIC_0_COMPONENTS_SIZES_##sz +#define SF_C(sz) GF100_TIC_0_COMPONENTS_SIZES_##sz +#define SF(c, pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u) \ + [PIPE_FORMAT_##pf] = { \ + sf, { \ + SF_##c(sz), \ + G80_TIC_TYPE_##t0, \ + G80_TIC_TYPE_##t1, \ + G80_TIC_TYPE_##t2, \ + G80_TIC_TYPE_##t3, \ + G80_TIC_SOURCE_##r, \ + G80_TIC_SOURCE_##g, \ + G80_TIC_SOURCE_##b, \ + G80_TIC_SOURCE_##a, \ + }, U_##u \ + } + +#define C4(c, p, n, r, g, b, a, t, s, u) \ + SF(c, p, G80_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u) + +#define ZX(c, p, n, r, g, b, a, t, s, u) \ + SF(c, p, G80_ZETA_FORMAT_##n, \ + r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u) +#define ZS(c, p, n, r, g, b, a, t, s, u) \ + SF(c, p, G80_ZETA_FORMAT_##n, \ + r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u) +#define SZ(c, p, n, r, g, b, a, t, s, u) \ + SF(c, p, G80_ZETA_FORMAT_##n, \ + r, g, b, ONE_FLOAT, UINT, t, UINT, UINT, s, u) +#define SX(c, p, r, s, u) \ + SF(c, p, G80_ZETA_FORMAT_NONE, \ + r, r, r, r, UINT, UINT, UINT, UINT, s, u) + +#define F3(c, p, n, r, g, b, a, t, s, u) \ + C4(c, p, n, r, g, b, ONE_FLOAT, t, s, u) +#define I3(c, p, n, r, g, b, a, t, s, u) \ + C4(c, p, n, r, g, b, ONE_INT, t, s, u) + +#define F2(c, p, n, r, g, b, a, t, s, u) \ + C4(c, p, n, r, g, ZERO, ONE_FLOAT, t, s, u) +#define I2(c, p, n, r, g, b, a, t, s, u) \ + C4(c, p, n, r, g, ZERO, ONE_INT, t, s, u) + +#define F1(c, p, n, r, g, b, a, t, s, u) \ + C4(c, p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u) +#define I1(c, p, n, r, g, b, a, t, s, u) \ + C4(c, p, n, r, ZERO, ZERO, ONE_INT, t, s, u) + +#define A1(c, p, n, r, g, b, a, t, s, u) \ + C4(c, p, n, ZERO, ZERO, ZERO, a, t, s, u) + +#if NOUVEAU_DRIVER == 0xc0 +const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = +#else +const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = +#endif +{ + C4(A, B8G8R8A8_UNORM, BGRA8_UNORM, B, G, R, A, UNORM, A8B8G8R8, TD), + F3(A, B8G8R8X8_UNORM, BGRX8_UNORM, B, G, R, xx, UNORM, A8B8G8R8, TD), + C4(A, B8G8R8A8_SRGB, BGRA8_SRGB, B, G, R, A, UNORM, A8B8G8R8, TD), + F3(A, B8G8R8X8_SRGB, BGRX8_SRGB, B, G, R, xx, UNORM, A8B8G8R8, TD), + C4(A, R8G8B8A8_UNORM, RGBA8_UNORM, R, G, B, A, UNORM, A8B8G8R8, IB), + F3(A, R8G8B8X8_UNORM, RGBX8_UNORM, R, G, B, xx, UNORM, A8B8G8R8, TB), + C4(A, R8G8B8A8_SRGB, RGBA8_SRGB, R, G, B, A, UNORM, A8B8G8R8, TB), + F3(A, R8G8B8X8_SRGB, RGBX8_SRGB, R, G, B, xx, UNORM, A8B8G8R8, TB), + + ZX(B, Z16_UNORM, Z16_UNORM, R, R, R, xx, UNORM, Z16, TZ), + ZX(A, Z32_FLOAT, Z32_FLOAT, R, R, R, xx, FLOAT, ZF32, TZ), + ZX(A, Z24X8_UNORM, Z24_X8_UNORM, R, R, R, xx, UNORM, X8Z24, TZ), + SZ(A, X8Z24_UNORM, S8_Z24_UNORM, G, G, G, xx, UNORM, Z24S8, TZ), + ZS(A, Z24_UNORM_S8_UINT, Z24_S8_UNORM, R, R, R, xx, UNORM, S8Z24, TZ), + SZ(A, S8_UINT_Z24_UNORM, S8_Z24_UNORM, G, G, G, xx, UNORM, Z24S8, TZ), + ZS(A, Z32_FLOAT_S8X24_UINT, Z32_S8_X24_FLOAT, R, R, R, xx, FLOAT, ZF32_X24S8, TZ), + + SX(A, S8_UINT, R, R8, T), + SX(A, X24S8_UINT, G, S8Z24, T), + SX(A, S8X24_UINT, R, Z24S8, T), + SX(A, X32_S8X24_UINT, G, ZF32_X24S8, T), + + F3(A, B5G6R5_UNORM, B5G6R5_UNORM, B, G, R, xx, UNORM, B5G6R5, TD), + C4(A, B5G5R5A1_UNORM, BGR5_A1_UNORM, B, G, R, A, UNORM, A1B5G5R5, TD), + F3(A, B5G5R5X1_UNORM, BGR5_X1_UNORM, B, G, R, xx, UNORM, A1B5G5R5, TD), + C4(A, B4G4R4A4_UNORM, NONE, B, G, R, A, UNORM, A4B4G4R4, T), + F3(A, B4G4R4X4_UNORM, NONE, B, G, R, xx, UNORM, A4B4G4R4, T), + F3(A, R9G9B9E5_FLOAT, NONE, R, G, B, xx, FLOAT, E5B9G9R9_SHAREDEXP, T), + + C4(A, R10G10B10A2_UNORM, RGB10_A2_UNORM, R, G, B, A, UNORM, A2B10G10R10, IB), + C4(A, B10G10R10A2_UNORM, BGR10_A2_UNORM, B, G, R, A, UNORM, A2B10G10R10, TD), + C4(A, R10G10B10A2_SNORM, NONE, R, G, B, A, SNORM, A2B10G10R10, T), + C4(A, B10G10R10A2_SNORM, NONE, B, G, R, A, SNORM, A2B10G10R10, T), + C4(A, R10G10B10A2_UINT, RGB10_A2_UINT, R, G, B, A, UINT, A2B10G10R10, TR), + C4(A, B10G10R10A2_UINT, RGB10_A2_UINT, B, G, R, A, UINT, A2B10G10R10, T), + + F3(A, R11G11B10_FLOAT, R11G11B10_FLOAT, R, G, B, xx, FLOAT, BF10GF11RF11, IB), + + F3(A, L8_UNORM, R8_UNORM, R, R, R, xx, UNORM, R8, TB), + F3(A, L8_SRGB, R8_UNORM, R, R, R, xx, UNORM, R8, TB), + F3(A, L8_SNORM, R8_SNORM, R, R, R, xx, SNORM, R8, TC), + I3(A, L8_SINT, R8_SINT, R, R, R, xx, SINT, R8, TR), + I3(A, L8_UINT, R8_UINT, R, R, R, xx, UINT, R8, TR), + F3(A, L16_UNORM, R16_UNORM, R, R, R, xx, UNORM, R16, TC), + F3(A, L16_SNORM, R16_SNORM, R, R, R, xx, SNORM, R16, TC), + F3(A, L16_FLOAT, R16_FLOAT, R, R, R, xx, FLOAT, R16, TB), + I3(A, L16_SINT, R16_SINT, R, R, R, xx, SINT, R16, TR), + I3(A, L16_UINT, R16_UINT, R, R, R, xx, UINT, R16, TR), + F3(A, L32_FLOAT, R32_FLOAT, R, R, R, xx, FLOAT, R32, TB), + I3(A, L32_SINT, R32_SINT, R, R, R, xx, SINT, R32, TR), + I3(A, L32_UINT, R32_UINT, R, R, R, xx, UINT, R32, TR), + + C4(A, I8_UNORM, R8_UNORM, R, R, R, R, UNORM, R8, TR), + C4(A, I8_SNORM, R8_SNORM, R, R, R, R, SNORM, R8, TR), + C4(A, I8_SINT, R8_SINT, R, R, R, R, SINT, R8, TR), + C4(A, I8_UINT, R8_UINT, R, R, R, R, UINT, R8, TR), + C4(A, I16_UNORM, R16_UNORM, R, R, R, R, UNORM, R16, TR), + C4(A, I16_SNORM, R16_SNORM, R, R, R, R, SNORM, R16, TR), + C4(A, I16_FLOAT, R16_FLOAT, R, R, R, R, FLOAT, R16, TR), + C4(A, I16_SINT, R16_SINT, R, R, R, R, SINT, R16, TR), + C4(A, I16_UINT, R16_UINT, R, R, R, R, UINT, R16, TR), + C4(A, I32_FLOAT, R32_FLOAT, R, R, R, R, FLOAT, R32, TR), + C4(A, I32_SINT, R32_SINT, R, R, R, R, SINT, R32, TR), + C4(A, I32_UINT, R32_UINT, R, R, R, R, UINT, R32, TR), + + A1(A, A8_UNORM, A8_UNORM, xx, xx, xx, R, UNORM, R8, TB), + A1(A, A8_SNORM, R8_SNORM, xx, xx, xx, R, SNORM, R8, T), + A1(A, A8_SINT, R8_SINT, xx, xx, xx, R, SINT, R8, T), + A1(A, A8_UINT, R8_UINT, xx, xx, xx, R, UINT, R8, T), + A1(A, A16_UNORM, R16_UNORM, xx, xx, xx, R, UNORM, R16, T), + A1(A, A16_SNORM, R16_SNORM, xx, xx, xx, R, SNORM, R16, T), + A1(A, A16_FLOAT, R16_FLOAT, xx, xx, xx, R, FLOAT, R16, T), + A1(A, A16_SINT, R16_SINT, xx, xx, xx, R, SINT, R16, T), + A1(A, A16_UINT, R16_UINT, xx, xx, xx, R, UINT, R16, T), + A1(A, A32_FLOAT, R32_FLOAT, xx, xx, xx, R, FLOAT, R32, T), + A1(A, A32_SINT, R32_SINT, xx, xx, xx, R, SINT, R32, T), + A1(A, A32_UINT, R32_UINT, xx, xx, xx, R, UINT, R32, T), + + C4(A, L4A4_UNORM, NONE, R, R, R, G, UNORM, G4R4, T), + C4(A, L8A8_UNORM, RG8_UNORM, R, R, R, G, UNORM, G8R8, T), + C4(A, L8A8_SNORM, RG8_SNORM, R, R, R, G, SNORM, G8R8, T), + C4(A, L8A8_SRGB, RG8_UNORM, R, R, R, G, UNORM, G8R8, T), + C4(A, L8A8_SINT, RG8_SINT, R, R, R, G, SINT, G8R8, T), + C4(A, L8A8_UINT, RG8_UINT, R, R, R, G, UINT, G8R8, T), + C4(A, L16A16_UNORM, RG16_UNORM, R, R, R, G, UNORM, R16_G16, T), + C4(A, L16A16_SNORM, RG16_SNORM, R, R, R, G, SNORM, R16_G16, T), + C4(A, L16A16_FLOAT, RG16_FLOAT, R, R, R, G, FLOAT, R16_G16, T), + C4(A, L16A16_SINT, RG16_SINT, R, R, R, G, SINT, R16_G16, T), + C4(A, L16A16_UINT, RG16_UINT, R, R, R, G, UINT, R16_G16, T), + C4(A, L32A32_FLOAT, RG32_FLOAT, R, R, R, G, FLOAT, R32_G32, T), + C4(A, L32A32_SINT, RG32_SINT, R, R, R, G, SINT, R32_G32, T), + C4(A, L32A32_UINT, RG32_UINT, R, R, R, G, UINT, R32_G32, T), + + F3(A, DXT1_RGB, NONE, R, G, B, xx, UNORM, DXT1, T), + F3(A, DXT1_SRGB, NONE, R, G, B, xx, UNORM, DXT1, T), + C4(A, DXT1_RGBA, NONE, R, G, B, A, UNORM, DXT1, T), + C4(A, DXT1_SRGBA, NONE, R, G, B, A, UNORM, DXT1, T), + C4(A, DXT3_RGBA, NONE, R, G, B, A, UNORM, DXT23, T), + C4(A, DXT3_SRGBA, NONE, R, G, B, A, UNORM, DXT23, T), + C4(A, DXT5_RGBA, NONE, R, G, B, A, UNORM, DXT45, T), + C4(A, DXT5_SRGBA, NONE, R, G, B, A, UNORM, DXT45, T), + + F1(A, RGTC1_UNORM, NONE, R, xx, xx, xx, UNORM, DXN1, T), + F1(A, RGTC1_SNORM, NONE, R, xx, xx, xx, SNORM, DXN1, T), + F2(A, RGTC2_UNORM, NONE, R, G, xx, xx, UNORM, DXN2, T), + F2(A, RGTC2_SNORM, NONE, R, G, xx, xx, SNORM, DXN2, T), + F3(A, LATC1_UNORM, NONE, R, R, R, xx, UNORM, DXN1, T), + F3(A, LATC1_SNORM, NONE, R, R, R, xx, SNORM, DXN1, T), + C4(A, LATC2_UNORM, NONE, R, R, R, G, UNORM, DXN2, T), + C4(A, LATC2_SNORM, NONE, R, R, R, G, SNORM, DXN2, T), + + C4(C, BPTC_RGBA_UNORM, NONE, R, G, B, A, UNORM, BC7U, t), + C4(C, BPTC_SRGBA, NONE, R, G, B, A, UNORM, BC7U, t), + F3(C, BPTC_RGB_FLOAT, NONE, R, G, B, xx, FLOAT, BC6H_SF16, t), + F3(C, BPTC_RGB_UFLOAT, NONE, R, G, B, xx, FLOAT, BC6H_UF16, t), + + C4(A, R32G32B32A32_FLOAT, RGBA32_FLOAT, R, G, B, A, FLOAT, R32_G32_B32_A32, IB), + C4(A, R32G32B32A32_UNORM, NONE, R, G, B, A, UNORM, R32_G32_B32_A32, T), + C4(A, R32G32B32A32_SNORM, NONE, R, G, B, A, SNORM, R32_G32_B32_A32, T), + C4(A, R32G32B32A32_SINT, RGBA32_SINT, R, G, B, A, SINT, R32_G32_B32_A32, IR), + C4(A, R32G32B32A32_UINT, RGBA32_UINT, R, G, B, A, UINT, R32_G32_B32_A32, IR), + F3(A, R32G32B32X32_FLOAT, RGBX32_FLOAT, R, G, B, xx, FLOAT, R32_G32_B32_A32, TB), + I3(A, R32G32B32X32_SINT, RGBX32_SINT, R, G, B, xx, SINT, R32_G32_B32_A32, TR), + I3(A, R32G32B32X32_UINT, RGBX32_UINT, R, G, B, xx, UINT, R32_G32_B32_A32, TR), + + F3(C, R32G32B32_FLOAT, NONE, R, G, B, xx, FLOAT, R32_G32_B32, t), + I3(C, R32G32B32_SINT, NONE, R, G, B, xx, SINT, R32_G32_B32, t), + I3(C, R32G32B32_UINT, NONE, R, G, B, xx, UINT, R32_G32_B32, t), + + F2(A, R32G32_FLOAT, RG32_FLOAT, R, G, xx, xx, FLOAT, R32_G32, IB), + F2(A, R32G32_UNORM, NONE, R, G, xx, xx, UNORM, R32_G32, T), + F2(A, R32G32_SNORM, NONE, R, G, xx, xx, SNORM, R32_G32, T), + I2(A, R32G32_SINT, RG32_SINT, R, G, xx, xx, SINT, R32_G32, IR), + I2(A, R32G32_UINT, RG32_UINT, R, G, xx, xx, UINT, R32_G32, IR), + + F1(A, R32_FLOAT, R32_FLOAT, R, xx, xx, xx, FLOAT, R32, IB), + F1(A, R32_UNORM, NONE, R, xx, xx, xx, UNORM, R32, T), + F1(A, R32_SNORM, NONE, R, xx, xx, xx, SNORM, R32, T), + I1(A, R32_SINT, R32_SINT, R, xx, xx, xx, SINT, R32, IR), + I1(A, R32_UINT, R32_UINT, R, xx, xx, xx, UINT, R32, IR), + + C4(A, R16G16B16A16_FLOAT, RGBA16_FLOAT, R, G, B, A, FLOAT, R16_G16_B16_A16, IB), + C4(A, R16G16B16A16_UNORM, RGBA16_UNORM, R, G, B, A, UNORM, R16_G16_B16_A16, IC), + C4(A, R16G16B16A16_SNORM, RGBA16_SNORM, R, G, B, A, SNORM, R16_G16_B16_A16, IC), + C4(A, R16G16B16A16_SINT, RGBA16_SINT, R, G, B, A, SINT, R16_G16_B16_A16, IR), + C4(A, R16G16B16A16_UINT, RGBA16_UINT, R, G, B, A, UINT, R16_G16_B16_A16, IR), + F3(A, R16G16B16X16_FLOAT, RGBX16_FLOAT, R, G, B, xx, FLOAT, R16_G16_B16_A16, TB), + F3(A, R16G16B16X16_UNORM, RGBA16_UNORM, R, G, B, xx, UNORM, R16_G16_B16_A16, T), + F3(A, R16G16B16X16_SNORM, RGBA16_SNORM, R, G, B, xx, SNORM, R16_G16_B16_A16, T), + I3(A, R16G16B16X16_SINT, RGBA16_SINT, R, G, B, xx, SINT, R16_G16_B16_A16, T), + I3(A, R16G16B16X16_UINT, RGBA16_UINT, R, G, B, xx, UINT, R16_G16_B16_A16, T), + + F2(A, R16G16_FLOAT, RG16_FLOAT, R, G, xx, xx, FLOAT, R16_G16, IB), + F2(A, R16G16_UNORM, RG16_UNORM, R, G, xx, xx, UNORM, R16_G16, IC), + F2(A, R16G16_SNORM, RG16_SNORM, R, G, xx, xx, SNORM, R16_G16, IC), + I2(A, R16G16_SINT, RG16_SINT, R, G, xx, xx, SINT, R16_G16, IR), + I2(A, R16G16_UINT, RG16_UINT, R, G, xx, xx, UINT, R16_G16, IR), + + F1(A, R16_FLOAT, R16_FLOAT, R, xx, xx, xx, FLOAT, R16, IB), + F1(A, R16_UNORM, R16_UNORM, R, xx, xx, xx, UNORM, R16, IC), + F1(A, R16_SNORM, R16_SNORM, R, xx, xx, xx, SNORM, R16, IC), + I1(A, R16_SINT, R16_SINT, R, xx, xx, xx, SINT, R16, IR), + I1(A, R16_UINT, R16_UINT, R, xx, xx, xx, UINT, R16, IR), + + C4(A, R8G8B8A8_SNORM, RGBA8_SNORM, R, G, B, A, SNORM, A8B8G8R8, IC), + C4(A, R8G8B8A8_SINT, RGBA8_SINT, R, G, B, A, SINT, A8B8G8R8, IR), + C4(A, R8G8B8A8_UINT, RGBA8_UINT, R, G, B, A, UINT, A8B8G8R8, IR), + F3(A, R8G8B8X8_SNORM, RGBA8_SNORM, R, G, B, xx, SNORM, A8B8G8R8, T), + I3(A, R8G8B8X8_SINT, RGBA8_SINT, R, G, B, xx, SINT, A8B8G8R8, T), + I3(A, R8G8B8X8_UINT, RGBA8_UINT, R, G, B, xx, UINT, A8B8G8R8, T), + + F2(A, R8G8_UNORM, RG8_UNORM, R, G, xx, xx, UNORM, G8R8, IB), + F2(A, R8G8_SNORM, RG8_SNORM, R, G, xx, xx, SNORM, G8R8, IC), + I2(A, R8G8_SINT, RG8_SINT, R, G, xx, xx, SINT, G8R8, IR), + I2(A, R8G8_UINT, RG8_UINT, R, G, xx, xx, UINT, G8R8, IR), + + F1(A, R8_UNORM, R8_UNORM, R, xx, xx, xx, UNORM, R8, IB), + F1(A, R8_SNORM, R8_SNORM, R, xx, xx, xx, SNORM, R8, IC), + I1(A, R8_SINT, R8_SINT, R, xx, xx, xx, SINT, R8, IR), + I1(A, R8_UINT, R8_UINT, R, xx, xx, xx, UINT, R8, IR), + + F3(A, R8G8_B8G8_UNORM, NONE, R, G, B, xx, UNORM, G8B8G8R8, T), + F3(A, G8R8_B8R8_UNORM, NONE, G, R, B, xx, UNORM, G8B8G8R8, T), + F3(A, G8R8_G8B8_UNORM, NONE, R, G, B, xx, UNORM, B8G8R8G8, T), + F3(A, R8G8_R8B8_UNORM, NONE, G, R, B, xx, UNORM, B8G8R8G8, T), + + F1(A, R1_UNORM, BITMAP, R, xx, xx, xx, UNORM, R1, T), + + C4(A, R4A4_UNORM, NONE, R, ZERO, ZERO, G, UNORM, G4R4, T), + C4(A, R8A8_UNORM, NONE, R, ZERO, ZERO, G, UNORM, G8R8, T), + C4(A, A4R4_UNORM, NONE, G, ZERO, ZERO, R, UNORM, G4R4, T), + C4(A, A8R8_UNORM, NONE, G, ZERO, ZERO, R, UNORM, G8R8, T), + + SF(A, R8SG8SB8UX8U_NORM, 0, R, G, B, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, A8B8G8R8, T), + SF(A, R5SG5SB6U_NORM, 0, R, G, B, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, B6G5R5, T), +}; #if NOUVEAU_DRIVER == 0xc0 # define NVXX_3D_VAF_SIZE(s) NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_##s @@ -92,353 +325,138 @@ # define NVXX_3D_VAF_TYPE(t) NV50_3D_VERTEX_ARRAY_ATTRIB_TYPE_##t #endif -#define TBLENT_A_(pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u, br) \ - [PIPE_FORMAT_##pf] = { \ - sf, \ - (NV50_TIC_MAP_##r << NV50_TIC_0_MAPR__SHIFT) | \ - (NV50_TIC_MAP_##g << NV50_TIC_0_MAPG__SHIFT) | \ - (NV50_TIC_MAP_##b << NV50_TIC_0_MAPB__SHIFT) | \ - (NV50_TIC_MAP_##a << NV50_TIC_0_MAPA__SHIFT) | \ - (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \ - (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \ - (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \ - (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \ - NV50_TIC_0_FMT_##sz, \ - NVXX_3D_VAF_SIZE(sz) | \ - NVXX_3D_VAF_TYPE(t0) | (br << 31), \ - U_##u \ - } - -#define TBLENT_B_(pf, sf, r, g, b, a, t0, t1, t2, t3, sz, u) \ +#define VF_x(pf, type, size, bgra) 0 +#define VF_A(pf, type, size, bgra) \ + NVXX_3D_VAF_SIZE(size) | NVXX_3D_VAF_TYPE(type) | (bgra << 31) +#define VF(c, pf, type, size, bgra) \ [PIPE_FORMAT_##pf] = { \ - sf, \ - (NV50_TIC_MAP_##r << NV50_TIC_0_MAPR__SHIFT) | \ - (NV50_TIC_MAP_##g << NV50_TIC_0_MAPG__SHIFT) | \ - (NV50_TIC_MAP_##b << NV50_TIC_0_MAPB__SHIFT) | \ - (NV50_TIC_MAP_##a << NV50_TIC_0_MAPA__SHIFT) | \ - (NV50_TIC_TYPE_##t0 << NV50_TIC_0_TYPE0__SHIFT) | \ - (NV50_TIC_TYPE_##t1 << NV50_TIC_0_TYPE1__SHIFT) | \ - (NV50_TIC_TYPE_##t2 << NV50_TIC_0_TYPE2__SHIFT) | \ - (NV50_TIC_TYPE_##t3 << NV50_TIC_0_TYPE3__SHIFT) | \ - NV50_TIC_0_FMT_##sz, 0, U_##u \ + VF_##c(pf, type, size, bgra), \ + PIPE_BIND_VERTEX_BUFFER \ } -#define C4A(p, n, r, g, b, a, t, s, u, br) \ - TBLENT_A_(p, NV50_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u, br) -#define C4B(p, n, r, g, b, a, t, s, u) \ - TBLENT_B_(p, NV50_SURFACE_FORMAT_##n, r, g, b, a, t, t, t, t, s, u) - -#define ZXB(p, n, r, g, b, a, t, s, u) \ - TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \ - r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u) -#define ZSB(p, n, r, g, b, a, t, s, u) \ - TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \ - r, g, b, ONE_FLOAT, t, UINT, UINT, UINT, s, u) -#define SZB(p, n, r, g, b, a, t, s, u) \ - TBLENT_B_(p, NV50_ZETA_FORMAT_##n, \ - r, g, b, ONE_FLOAT, UINT, t, UINT, UINT, s, u) -#define SXB(p, r, s, u) \ - TBLENT_B_(p, NV50_ZETA_FORMAT_NONE, \ - r, r, r, r, UINT, UINT, UINT, UINT, s, u) - -#define F3A(p, n, r, g, b, a, t, s, u) \ - C4A(p, n, r, g, b, ONE_FLOAT, t, s, u, 0) -#define I3A(p, n, r, g, b, a, t, s, u) \ - C4A(p, n, r, g, b, ONE_INT, t, s, u, 0) -#define F3B(p, n, r, g, b, a, t, s, u) \ - C4B(p, n, r, g, b, ONE_FLOAT, t, s, u) -#define I3B(p, n, r, g, b, a, t, s, u) \ - C4B(p, n, r, g, b, ONE_INT, t, s, u) - -#define F2A(p, n, r, g, b, a, t, s, u) \ - C4A(p, n, r, g, ZERO, ONE_FLOAT, t, s, u, 0) -#define I2A(p, n, r, g, b, a, t, s, u) \ - C4A(p, n, r, g, ZERO, ONE_INT, t, s, u, 0) -#define F2B(p, n, r, g, b, a, t, s, u) \ - C4B(p, n, r, g, ZERO, ONE_FLOAT, t, s, u) -#define I2B(p, n, r, g, b, a, t, s, u) \ - C4B(p, n, r, g, ZERO, ONE_INT, t, s, u) - -#define F1A(p, n, r, g, b, a, t, s, u) \ - C4A(p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u, 0) -#define I1A(p, n, r, g, b, a, t, s, u) \ - C4A(p, n, r, ZERO, ZERO, ONE_INT, t, s, u, 0) -#define F1B(p, n, r, g, b, a, t, s, u) \ - C4B(p, n, r, ZERO, ZERO, ONE_FLOAT, t, s, u) -#define I1B(p, n, r, g, b, a, t, s, u) \ - C4B(p, n, r, ZERO, ZERO, ONE_INT, t, s, u) - -#define A1B(p, n, r, g, b, a, t, s, u) \ - C4B(p, n, ZERO, ZERO, ZERO, a, t, s, u) - #if NOUVEAU_DRIVER == 0xc0 -const struct nvc0_format nvc0_format_table[PIPE_FORMAT_COUNT] = +const struct nvc0_vertex_format nvc0_vertex_format[PIPE_FORMAT_COUNT] = #else -const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] = +const struct nv50_vertex_format nv50_vertex_format[PIPE_FORMAT_COUNT] = #endif { - C4A(B8G8R8A8_UNORM, BGRA8_UNORM, C2, C1, C0, C3, UNORM, 8_8_8_8, TDV, 1), - F3A(B8G8R8X8_UNORM, BGRX8_UNORM, C2, C1, C0, xx, UNORM, 8_8_8_8, TD), - C4A(B8G8R8A8_SRGB, BGRA8_SRGB, C2, C1, C0, C3, UNORM, 8_8_8_8, TD, 1), - F3A(B8G8R8X8_SRGB, BGRX8_SRGB, C2, C1, C0, xx, UNORM, 8_8_8_8, TD), - C4A(R8G8B8A8_UNORM, RGBA8_UNORM, C0, C1, C2, C3, UNORM, 8_8_8_8, IBV, 0), - F3A(R8G8B8X8_UNORM, RGBX8_UNORM, C0, C1, C2, xx, UNORM, 8_8_8_8, TB), - C4A(R8G8B8A8_SRGB, RGBA8_SRGB, C0, C1, C2, C3, UNORM, 8_8_8_8, TB, 0), - F3B(R8G8B8X8_SRGB, RGBX8_SRGB, C0, C1, C2, xx, UNORM, 8_8_8_8, TB), - - ZXB(Z16_UNORM, Z16_UNORM, C0, C0, C0, xx, UNORM, Z16, TZ), - ZXB(Z32_FLOAT, Z32_FLOAT, C0, C0, C0, xx, FLOAT, Z32, TZ), - ZXB(Z24X8_UNORM, Z24_X8_UNORM, C0, C0, C0, xx, UNORM, Z24_X8, TZ), - SZB(X8Z24_UNORM, S8_Z24_UNORM, C1, C1, C1, xx, UNORM, S8_Z24, TZ), - ZSB(Z24_UNORM_S8_UINT, Z24_S8_UNORM, C0, C0, C0, xx, UNORM, Z24_S8, TZ), - SZB(S8_UINT_Z24_UNORM, S8_Z24_UNORM, C1, C1, C1, xx, UNORM, S8_Z24, TZ), - ZSB(Z32_FLOAT_S8X24_UINT, Z32_S8_X24_FLOAT, C0, C0, C0, xx, FLOAT, - Z32_S8_X24, TZ), - - SXB(S8_UINT, C0, 8, T), - SXB(X24S8_UINT, C1, Z24_S8, T), - SXB(S8X24_UINT, C0, S8_Z24, T), - SXB(X32_S8X24_UINT, C1, Z32_S8_X24, T), - - F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, TD), - C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TD), - F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TD), - C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T), - F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T), - F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T), - - C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2, - IBV, 0), - C4A(B10G10R10A2_UNORM, BGR10_A2_UNORM, C2, C1, C0, C3, UNORM, 10_10_10_2, - TDV, 1), - C4A(R10G10B10A2_SNORM, NONE, C0, C1, C2, C3, SNORM, 10_10_10_2, TV, 0), - C4A(B10G10R10A2_SNORM, NONE, C2, C1, C0, C3, SNORM, 10_10_10_2, TV, 1), - C4A(R10G10B10A2_UINT, RGB10_A2_UINT, C0, C1, C2, C3, UINT, 10_10_10_2, TRV, 0), - C4A(B10G10R10A2_UINT, RGB10_A2_UINT, C2, C1, C0, C3, UINT, 10_10_10_2, TV, 0), - - F3A(R11G11B10_FLOAT, R11G11B10_FLOAT, C0, C1, C2, xx, FLOAT, 11_11_10, IBV), - - F3B(L8_UNORM, R8_UNORM, C0, C0, C0, xx, UNORM, 8, TB), - F3B(L8_SRGB, R8_UNORM, C0, C0, C0, xx, UNORM, 8, TB), - F3B(L8_SNORM, R8_SNORM, C0, C0, C0, xx, SNORM, 8, TC), - I3B(L8_SINT, R8_SINT, C0, C0, C0, xx, SINT, 8, TR), - I3B(L8_UINT, R8_UINT, C0, C0, C0, xx, UINT, 8, TR), - F3B(L16_UNORM, R16_UNORM, C0, C0, C0, xx, UNORM, 16, TC), - F3B(L16_SNORM, R16_SNORM, C0, C0, C0, xx, SNORM, 16, TC), - F3B(L16_FLOAT, R16_FLOAT, C0, C0, C0, xx, FLOAT, 16, TB), - I3B(L16_SINT, R16_SINT, C0, C0, C0, xx, SINT, 16, TR), - I3B(L16_UINT, R16_UINT, C0, C0, C0, xx, UINT, 16, TR), - F3B(L32_FLOAT, R32_FLOAT, C0, C0, C0, xx, FLOAT, 32, TB), - I3B(L32_SINT, R32_SINT, C0, C0, C0, xx, SINT, 32, TR), - I3B(L32_UINT, R32_UINT, C0, C0, C0, xx, UINT, 32, TR), - - C4B(I8_UNORM, R8_UNORM, C0, C0, C0, C0, UNORM, 8, TR), - C4B(I8_SNORM, R8_SNORM, C0, C0, C0, C0, SNORM, 8, TR), - C4B(I8_SINT, R8_SINT, C0, C0, C0, C0, SINT, 8, TR), - C4B(I8_UINT, R8_UINT, C0, C0, C0, C0, UINT, 8, TR), - C4B(I16_UNORM, R16_UNORM, C0, C0, C0, C0, UNORM, 16, TR), - C4B(I16_SNORM, R16_SNORM, C0, C0, C0, C0, SNORM, 16, TR), - C4B(I16_FLOAT, R16_FLOAT, C0, C0, C0, C0, FLOAT, 16, TR), - C4B(I16_SINT, R16_SINT, C0, C0, C0, C0, SINT, 16, TR), - C4B(I16_UINT, R16_UINT, C0, C0, C0, C0, UINT, 16, TR), - C4B(I32_FLOAT, R32_FLOAT, C0, C0, C0, C0, FLOAT, 32, TR), - C4B(I32_SINT, R32_SINT, C0, C0, C0, C0, SINT, 32, TR), - C4B(I32_UINT, R32_UINT, C0, C0, C0, C0, UINT, 32, TR), - - A1B(A8_UNORM, A8_UNORM, xx, xx, xx, C0, UNORM, 8, TB), - A1B(A8_SNORM, R8_SNORM, xx, xx, xx, C0, SNORM, 8, T), - A1B(A8_SINT, R8_SINT, xx, xx, xx, C0, SINT, 8, T), - A1B(A8_UINT, R8_UINT, xx, xx, xx, C0, UINT, 8, T), - A1B(A16_UNORM, R16_UNORM, xx, xx, xx, C0, UNORM, 16, T), - A1B(A16_SNORM, R16_SNORM, xx, xx, xx, C0, SNORM, 16, T), - A1B(A16_FLOAT, R16_FLOAT, xx, xx, xx, C0, FLOAT, 16, T), - A1B(A16_SINT, R16_SINT, xx, xx, xx, C0, SINT, 16, T), - A1B(A16_UINT, R16_UINT, xx, xx, xx, C0, UINT, 16, T), - A1B(A32_FLOAT, R32_FLOAT, xx, xx, xx, C0, FLOAT, 32, T), - A1B(A32_SINT, R32_SINT, xx, xx, xx, C0, SINT, 32, T), - A1B(A32_UINT, R32_UINT, xx, xx, xx, C0, UINT, 32, T), - - C4B(L4A4_UNORM, NONE, C0, C0, C0, C1, UNORM, 4_4, T), - C4B(L8A8_UNORM, RG8_UNORM, C0, C0, C0, C1, UNORM, 8_8, T), - C4B(L8A8_SNORM, RG8_SNORM, C0, C0, C0, C1, SNORM, 8_8, T), - C4B(L8A8_SRGB, RG8_UNORM, C0, C0, C0, C1, UNORM, 8_8, T), - C4B(L8A8_SINT, RG8_SINT, C0, C0, C0, C1, SINT, 8_8, T), - C4B(L8A8_UINT, RG8_UINT, C0, C0, C0, C1, UINT, 8_8, T), - C4B(L16A16_UNORM, RG16_UNORM, C0, C0, C0, C1, UNORM, 16_16, T), - C4B(L16A16_SNORM, RG16_SNORM, C0, C0, C0, C1, SNORM, 16_16, T), - C4B(L16A16_FLOAT, RG16_FLOAT, C0, C0, C0, C1, FLOAT, 16_16, T), - C4B(L16A16_SINT, RG16_SINT, C0, C0, C0, C1, SINT, 16_16, T), - C4B(L16A16_UINT, RG16_UINT, C0, C0, C0, C1, UINT, 16_16, T), - C4B(L32A32_FLOAT, RG32_FLOAT, C0, C0, C0, C1, FLOAT, 32_32, T), - C4B(L32A32_SINT, RG32_SINT, C0, C0, C0, C1, SINT, 32_32, T), - C4B(L32A32_UINT, RG32_UINT, C0, C0, C0, C1, UINT, 32_32, T), - - F3B(DXT1_RGB, NONE, C0, C1, C2, xx, UNORM, DXT1, T), - F3B(DXT1_SRGB, NONE, C0, C1, C2, xx, UNORM, DXT1, T), - C4B(DXT1_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT1, T), - C4B(DXT1_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT1, T), - C4B(DXT3_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT3, T), - C4B(DXT3_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT3, T), - C4B(DXT5_RGBA, NONE, C0, C1, C2, C3, UNORM, DXT5, T), - C4B(DXT5_SRGBA, NONE, C0, C1, C2, C3, UNORM, DXT5, T), - - F1B(RGTC1_UNORM, NONE, C0, xx, xx, xx, UNORM, RGTC1, T), - F1B(RGTC1_SNORM, NONE, C0, xx, xx, xx, SNORM, RGTC1, T), - F2B(RGTC2_UNORM, NONE, C0, C1, xx, xx, UNORM, RGTC2, T), - F2B(RGTC2_SNORM, NONE, C0, C1, xx, xx, SNORM, RGTC2, T), - F3B(LATC1_UNORM, NONE, C0, C0, C0, xx, UNORM, RGTC1, T), - F3B(LATC1_SNORM, NONE, C0, C0, C0, xx, SNORM, RGTC1, T), - C4B(LATC2_UNORM, NONE, C0, C0, C0, C1, UNORM, RGTC2, T), - C4B(LATC2_SNORM, NONE, C0, C0, C0, C1, SNORM, RGTC2, T), - - C4B(BPTC_RGBA_UNORM, NONE, C0, C1, C2, C3, UNORM, BPTC, t), - C4B(BPTC_SRGBA, NONE, C0, C1, C2, C3, UNORM, BPTC, t), - F3B(BPTC_RGB_FLOAT, NONE, C0, C1, C2, xx, FLOAT, BPTC_FLOAT, t), - F3B(BPTC_RGB_UFLOAT, NONE, C0, C1, C2, xx, FLOAT, BPTC_UFLOAT, t), - - C4A(R32G32B32A32_FLOAT, RGBA32_FLOAT, C0, C1, C2, C3, FLOAT, 32_32_32_32, - IBV, 0), - C4A(R32G32B32A32_UNORM, NONE, C0, C1, C2, C3, UNORM, 32_32_32_32, TV, 0), - C4A(R32G32B32A32_SNORM, NONE, C0, C1, C2, C3, SNORM, 32_32_32_32, TV, 0), - C4A(R32G32B32A32_SINT, RGBA32_SINT, C0, C1, C2, C3, SINT, 32_32_32_32, - IRV, 0), - C4A(R32G32B32A32_UINT, RGBA32_UINT, C0, C1, C2, C3, UINT, 32_32_32_32, - IRV, 0), - F3B(R32G32B32X32_FLOAT, RGBX32_FLOAT, C0, C1, C2, xx, FLOAT, 32_32_32_32, TB), - I3B(R32G32B32X32_SINT, RGBX32_SINT, C0, C1, C2, xx, SINT, 32_32_32_32, TR), - I3B(R32G32B32X32_UINT, RGBX32_UINT, C0, C1, C2, xx, UINT, 32_32_32_32, TR), - - F2A(R32G32_FLOAT, RG32_FLOAT, C0, C1, xx, xx, FLOAT, 32_32, IBV), - F2A(R32G32_UNORM, NONE, C0, C1, xx, xx, UNORM, 32_32, TV), - F2A(R32G32_SNORM, NONE, C0, C1, xx, xx, SNORM, 32_32, TV), - I2A(R32G32_SINT, RG32_SINT, C0, C1, xx, xx, SINT, 32_32, IRV), - I2A(R32G32_UINT, RG32_UINT, C0, C1, xx, xx, UINT, 32_32, IRV), - - F1A(R32_FLOAT, R32_FLOAT, C0, xx, xx, xx, FLOAT, 32, IBV), - F1A(R32_UNORM, NONE, C0, xx, xx, xx, UNORM, 32, TV), - F1A(R32_SNORM, NONE, C0, xx, xx, xx, SNORM, 32, TV), - I1A(R32_SINT, R32_SINT, C0, xx, xx, xx, SINT, 32, IRV), - I1A(R32_UINT, R32_UINT, C0, xx, xx, xx, UINT, 32, IRV), - - C4A(R16G16B16A16_FLOAT, RGBA16_FLOAT, C0, C1, C2, C3, FLOAT, 16_16_16_16, - IBV, 0), - C4A(R16G16B16A16_UNORM, RGBA16_UNORM, C0, C1, C2, C3, UNORM, 16_16_16_16, - ICV, 0), - C4A(R16G16B16A16_SNORM, RGBA16_SNORM, C0, C1, C2, C3, SNORM, 16_16_16_16, - ICV, 0), - C4A(R16G16B16A16_SINT, RGBA16_SINT, C0, C1, C2, C3, SINT, 16_16_16_16, - IRV, 0), - C4A(R16G16B16A16_UINT, RGBA16_UINT, C0, C1, C2, C3, UINT, 16_16_16_16, - IRV, 0), - F3B(R16G16B16X16_FLOAT, RGBX16_FLOAT, C0, C1, C2, xx, FLOAT, 16_16_16_16, TB), - F3B(R16G16B16X16_UNORM, RGBA16_UNORM, C0, C1, C2, xx, UNORM, 16_16_16_16, T), - F3B(R16G16B16X16_SNORM, RGBA16_SNORM, C0, C1, C2, xx, SNORM, 16_16_16_16, T), - I3B(R16G16B16X16_SINT, RGBA16_SINT, C0, C1, C2, xx, SINT, 16_16_16_16, T), - I3B(R16G16B16X16_UINT, RGBA16_UINT, C0, C1, C2, xx, UINT, 16_16_16_16, T), - - F2A(R16G16_FLOAT, RG16_FLOAT, C0, C1, xx, xx, FLOAT, 16_16, IBV), - F2A(R16G16_UNORM, RG16_UNORM, C0, C1, xx, xx, UNORM, 16_16, ICV), - F2A(R16G16_SNORM, RG16_SNORM, C0, C1, xx, xx, SNORM, 16_16, ICV), - I2A(R16G16_SINT, RG16_SINT, C0, C1, xx, xx, SINT, 16_16, IRV), - I2A(R16G16_UINT, RG16_UINT, C0, C1, xx, xx, UINT, 16_16, IRV), - - F1A(R16_FLOAT, R16_FLOAT, C0, xx, xx, xx, FLOAT, 16, IBV), - F1A(R16_UNORM, R16_UNORM, C0, xx, xx, xx, UNORM, 16, ICV), - F1A(R16_SNORM, R16_SNORM, C0, xx, xx, xx, SNORM, 16, ICV), - I1A(R16_SINT, R16_SINT, C0, xx, xx, xx, SINT, 16, IRV), - I1A(R16_UINT, R16_UINT, C0, xx, xx, xx, UINT, 16, IRV), - - C4A(R8G8B8A8_SNORM, RGBA8_SNORM, C0, C1, C2, C3, SNORM, 8_8_8_8, ICV, 0), - C4A(R8G8B8A8_SINT, RGBA8_SINT, C0, C1, C2, C3, SINT, 8_8_8_8, IRV, 0), - C4A(R8G8B8A8_UINT, RGBA8_UINT, C0, C1, C2, C3, UINT, 8_8_8_8, IRV, 0), - F3B(R8G8B8X8_SNORM, RGBA8_SNORM, C0, C1, C2, xx, SNORM, 8_8_8_8, T), - I3B(R8G8B8X8_SINT, RGBA8_SINT, C0, C1, C2, xx, SINT, 8_8_8_8, T), - I3B(R8G8B8X8_UINT, RGBA8_UINT, C0, C1, C2, xx, UINT, 8_8_8_8, T), - - F2A(R8G8_UNORM, RG8_UNORM, C0, C1, xx, xx, UNORM, 8_8, IBV), - F2A(R8G8_SNORM, RG8_SNORM, C0, C1, xx, xx, SNORM, 8_8, ICV), - I2A(R8G8_SINT, RG8_SINT, C0, C1, xx, xx, SINT, 8_8, IRV), - I2A(R8G8_UINT, RG8_UINT, C0, C1, xx, xx, UINT, 8_8, IRV), - - F1A(R8_UNORM, R8_UNORM, C0, xx, xx, xx, UNORM, 8, IBV), - F1A(R8_SNORM, R8_SNORM, C0, xx, xx, xx, SNORM, 8, ICV), - I1A(R8_SINT, R8_SINT, C0, xx, xx, xx, SINT, 8, IRV), - I1A(R8_UINT, R8_UINT, C0, xx, xx, xx, UINT, 8, IRV), - - F3B(R8G8_B8G8_UNORM, NONE, C0, C1, C2, xx, UNORM, U8_YA8_V8_YB8, T), - F3B(G8R8_B8R8_UNORM, NONE, C1, C0, C2, xx, UNORM, U8_YA8_V8_YB8, T), - F3B(G8R8_G8B8_UNORM, NONE, C0, C1, C2, xx, UNORM, YA8_U8_YB8_V8, T), - F3B(R8G8_R8B8_UNORM, NONE, C1, C0, C2, xx, UNORM, YA8_U8_YB8_V8, T), - - F1B(R1_UNORM, BITMAP, C0, xx, xx, xx, UNORM, BITMAP, T), - - C4B(R4A4_UNORM, NONE, C0, ZERO, ZERO, C1, UNORM, 4_4, T), - C4B(R8A8_UNORM, NONE, C0, ZERO, ZERO, C1, UNORM, 8_8, T), - C4B(A4R4_UNORM, NONE, C1, ZERO, ZERO, C0, UNORM, 4_4, T), - C4B(A8R8_UNORM, NONE, C1, ZERO, ZERO, C0, UNORM, 8_8, T), - - TBLENT_B_(R8SG8SB8UX8U_NORM, 0, - C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 8_8_8_8, T), - TBLENT_B_(R5SG5SB6U_NORM, 0, - C0, C1, C2, ONE_FLOAT, SNORM, SNORM, UNORM, UNORM, 5_5_6, T), - - /* vertex-only formats: */ - - C4A(R32G32B32A32_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 32_32_32_32, V, 0), - C4A(R32G32B32A32_USCALED, NONE, C0, C1, C2, C3, USCALED, 32_32_32_32, V, 0), - F3A(R32G32B32_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, tV), - F3A(R32G32B32_UNORM, NONE, C0, C1, C2, xx, UNORM, 32_32_32, V), - F3A(R32G32B32_SNORM, NONE, C0, C1, C2, xx, SNORM, 32_32_32, V), - I3A(R32G32B32_SINT, NONE, C0, C1, C2, xx, SINT, 32_32_32, tV), - I3A(R32G32B32_UINT, NONE, C0, C1, C2, xx, UINT, 32_32_32, tV), - F3A(R32G32B32_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 32_32_32, V), - F3A(R32G32B32_USCALED, NONE, C0, C1, C2, xx, USCALED, 32_32_32, V), - F2A(R32G32_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 32_32, V), - F2A(R32G32_USCALED, NONE, C0, C1, xx, xx, USCALED, 32_32, V), - F1A(R32_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 32, V), - F1A(R32_USCALED, NONE, C0, xx, xx, xx, USCALED, 32, V), - - C4A(R16G16B16A16_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 16_16_16_16, V, 0), - C4A(R16G16B16A16_USCALED, NONE, C0, C1, C2, C3, USCALED, 16_16_16_16, V, 0), - F3A(R16G16B16_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 16_16_16, V), - F3A(R16G16B16_UNORM, NONE, C0, C1, C2, xx, UNORM, 16_16_16, V), - F3A(R16G16B16_SNORM, NONE, C0, C1, C2, xx, SNORM, 16_16_16, V), - I3A(R16G16B16_SINT, NONE, C0, C1, C2, xx, SINT, 16_16_16, V), - I3A(R16G16B16_UINT, NONE, C0, C1, C2, xx, UINT, 16_16_16, V), - F3A(R16G16B16_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 16_16_16, V), - F3A(R16G16B16_USCALED, NONE, C0, C1, C2, xx, USCALED, 16_16_16, V), - F2A(R16G16_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 16_16, V), - F2A(R16G16_USCALED, NONE, C0, C1, xx, xx, USCALED, 16_16, V), - F1A(R16_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 16, V), - F1A(R16_USCALED, NONE, C0, xx, xx, xx, USCALED, 16, V), - - C4A(R10G10B10A2_USCALED, NONE, C0, C1, C2, C3, USCALED, 10_10_10_2, V, 0), - C4A(R10G10B10A2_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 10_10_10_2, V, 0), - C4A(B10G10R10A2_USCALED, NONE, C0, C1, C2, C3, USCALED, 10_10_10_2, V, 1), - C4A(B10G10R10A2_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 10_10_10_2, V, 1), - - C4A(R8G8B8A8_SSCALED, NONE, C0, C1, C2, C3, SSCALED, 8_8_8_8, V, 0), - C4A(R8G8B8A8_USCALED, NONE, C0, C1, C2, C3, USCALED, 8_8_8_8, V, 0), - F3A(R8G8B8_UNORM, NONE, C0, C1, C2, xx, UNORM, 8_8_8, V), - F3A(R8G8B8_SNORM, NONE, C0, C1, C2, xx, SNORM, 8_8_8, V), - I2A(R8G8B8_SINT, NONE, C0, C1, C2, xx, SINT, 8_8_8, V), - I2A(R8G8B8_UINT, NONE, C0, C1, C2, xx, UINT, 8_8_8, V), - F3A(R8G8B8_SSCALED, NONE, C0, C1, C2, xx, SSCALED, 8_8_8, V), - F3A(R8G8B8_USCALED, NONE, C0, C1, C2, xx, USCALED, 8_8_8, V), - F2A(R8G8_SSCALED, NONE, C0, C1, xx, xx, SSCALED, 8_8, V), - F2A(R8G8_USCALED, NONE, C0, C1, xx, xx, USCALED, 8_8, V), - F1A(R8_SSCALED, NONE, C0, xx, xx, xx, SSCALED, 8, V), - F1A(R8_USCALED, NONE, C0, xx, xx, xx, USCALED, 8, V), + VF(A, B8G8R8A8_UNORM, UNORM, 8_8_8_8, 1), + VF(A, R8G8B8A8_UNORM, UNORM, 8_8_8_8, 0), + + VF(A, R10G10B10A2_UNORM, UNORM, 10_10_10_2, 0), + VF(A, B10G10R10A2_UNORM, UNORM, 10_10_10_2, 1), + VF(A, R10G10B10A2_SNORM, SNORM, 10_10_10_2, 0), + VF(A, B10G10R10A2_SNORM, SNORM, 10_10_10_2, 1), + VF(A, R10G10B10A2_UINT, UINT, 10_10_10_2, 0), + VF(A, B10G10R10A2_UINT, UINT, 10_10_10_2, 0), + + VF(A, R11G11B10_FLOAT, FLOAT, 11_11_10, 0), + + VF(A, R32G32B32A32_FLOAT, FLOAT, 32_32_32_32, 0), + VF(A, R32G32B32A32_UNORM, UNORM, 32_32_32_32, 0), + VF(A, R32G32B32A32_SNORM, SNORM, 32_32_32_32, 0), + VF(A, R32G32B32A32_SINT, SINT, 32_32_32_32, 0), + VF(A, R32G32B32A32_UINT, UINT, 32_32_32_32, 0), + + VF(A, R32G32_FLOAT, FLOAT, 32_32, 0), + VF(A, R32G32_UNORM, UNORM, 32_32, 0), + VF(A, R32G32_SNORM, SNORM, 32_32, 0), + VF(A, R32G32_SINT, SINT, 32_32, 0), + VF(A, R32G32_UINT, UINT, 32_32, 0), + + VF(A, R32_FLOAT, FLOAT, 32, 0), + VF(A, R32_UNORM, UNORM, 32, 0), + VF(A, R32_SNORM, SNORM, 32, 0), + VF(A, R32_SINT, SINT, 32, 0), + VF(A, R32_UINT, UINT, 32, 0), + + VF(A, R16G16B16A16_FLOAT, FLOAT, 16_16_16_16, 0), + VF(A, R16G16B16A16_UNORM, UNORM, 16_16_16_16, 0), + VF(A, R16G16B16A16_SNORM, SNORM, 16_16_16_16, 0), + VF(A, R16G16B16A16_SINT, SINT, 16_16_16_16, 0), + VF(A, R16G16B16A16_UINT, UINT, 16_16_16_16, 0), + + VF(A, R16G16_FLOAT, FLOAT, 16_16, 0), + VF(A, R16G16_UNORM, UNORM, 16_16, 0), + VF(A, R16G16_SNORM, SNORM, 16_16, 0), + VF(A, R16G16_SINT, SINT, 16_16, 0), + VF(A, R16G16_UINT, UINT, 16_16, 0), + + VF(A, R16_FLOAT, FLOAT, 16, 0), + VF(A, R16_UNORM, UNORM, 16, 0), + VF(A, R16_SNORM, SNORM, 16, 0), + VF(A, R16_SINT, SINT, 16, 0), + VF(A, R16_UINT, UINT, 16, 0), + + VF(A, R8G8B8A8_SNORM, SNORM, 8_8_8_8, 0), + VF(A, R8G8B8A8_SINT, SINT, 8_8_8_8, 0), + VF(A, R8G8B8A8_UINT, UINT, 8_8_8_8, 0), + + VF(A, R8G8_UNORM, UNORM, 8_8, 0), + VF(A, R8G8_SNORM, SNORM, 8_8, 0), + VF(A, R8G8_SINT, SINT, 8_8, 0), + VF(A, R8G8_UINT, UINT, 8_8, 0), + + VF(A, R8_UNORM, UNORM, 8, 0), + VF(A, R8_SNORM, SNORM, 8, 0), + VF(A, R8_SINT, SINT, 8, 0), + VF(A, R8_UINT, UINT, 8, 0), + + VF(A, R32G32B32A32_SSCALED, SSCALED, 32_32_32_32, 0), + VF(A, R32G32B32A32_USCALED, USCALED, 32_32_32_32, 0), + VF(A, R32G32B32_FLOAT, FLOAT, 32_32_32, 0), + VF(A, R32G32B32_UNORM, UNORM, 32_32_32, 0), + VF(A, R32G32B32_SNORM, SNORM, 32_32_32, 0), + VF(A, R32G32B32_SINT, SINT, 32_32_32, 0), + VF(A, R32G32B32_UINT, UINT, 32_32_32, 0), + VF(A, R32G32B32_SSCALED, SSCALED, 32_32_32, 0), + VF(A, R32G32B32_USCALED, USCALED, 32_32_32, 0), + VF(A, R32G32_SSCALED, SSCALED, 32_32, 0), + VF(A, R32G32_USCALED, USCALED, 32_32, 0), + VF(A, R32_SSCALED, SSCALED, 32, 0), + VF(A, R32_USCALED, USCALED, 32, 0), + + VF(A, R16G16B16A16_SSCALED, SSCALED, 16_16_16_16, 0), + VF(A, R16G16B16A16_USCALED, USCALED, 16_16_16_16, 0), + VF(A, R16G16B16_FLOAT, FLOAT, 16_16_16, 0), + VF(A, R16G16B16_UNORM, UNORM, 16_16_16, 0), + VF(A, R16G16B16_SNORM, SNORM, 16_16_16, 0), + VF(A, R16G16B16_SINT, SINT, 16_16_16, 0), + VF(A, R16G16B16_UINT, UINT, 16_16_16, 0), + VF(A, R16G16B16_SSCALED, SSCALED, 16_16_16, 0), + VF(A, R16G16B16_USCALED, USCALED, 16_16_16, 0), + VF(A, R16G16_SSCALED, SSCALED, 16_16, 0), + VF(A, R16G16_USCALED, USCALED, 16_16, 0), + VF(A, R16_SSCALED, SSCALED, 16, 0), + VF(A, R16_USCALED, USCALED, 16, 0), + + VF(A, R10G10B10A2_USCALED, USCALED, 10_10_10_2, 0), + VF(A, R10G10B10A2_SSCALED, SSCALED, 10_10_10_2, 0), + VF(A, B10G10R10A2_USCALED, USCALED, 10_10_10_2, 1), + VF(A, B10G10R10A2_SSCALED, SSCALED, 10_10_10_2, 1), + + VF(A, R8G8B8A8_SSCALED, SSCALED, 8_8_8_8, 0), + VF(A, R8G8B8A8_USCALED, USCALED, 8_8_8_8, 0), + VF(A, R8G8B8_UNORM, UNORM, 8_8_8, 0), + VF(A, R8G8B8_SNORM, SNORM, 8_8_8, 0), + VF(A, R8G8B8_SINT, SINT, 8_8_8, 0), + VF(A, R8G8B8_UINT, UINT, 8_8_8, 0), + VF(A, R8G8B8_SSCALED, SSCALED, 8_8_8, 0), + VF(A, R8G8B8_USCALED, USCALED, 8_8_8, 0), + VF(A, R8G8_SSCALED, SSCALED, 8_8, 0), + VF(A, R8G8_USCALED, USCALED, 8_8, 0), + VF(A, R8_SSCALED, SSCALED, 8, 0), + VF(A, R8_USCALED, USCALED, 8, 0), /* FIXED types: not supported natively, converted on VBO push */ - C4B(R32G32B32A32_FIXED, NONE, C0, C1, C2, C3, FLOAT, 32_32_32_32, V), - F3B(R32G32B32_FIXED, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, V), - F2B(R32G32_FIXED, NONE, C0, C1, xx, xx, FLOAT, 32_32, V), - F1B(R32_FIXED, NONE, C0, xx, xx, xx, FLOAT, 32, V), + VF(x, R32G32B32A32_FIXED, xx, xx, xx), + VF(x, R32G32B32_FIXED, xx, xx, xx), + VF(x, R32G32_FIXED, xx, xx, xx), + VF(x, R32_FIXED, xx, xx, xx), - C4B(R64G64B64A64_FLOAT, NONE, C0, C1, C2, C3, FLOAT, 32_32_32_32, V), - F3B(R64G64B64_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 32_32_32, V), - F2B(R64G64_FLOAT, NONE, C0, C1, xx, xx, FLOAT, 32_32, V), - F1B(R64_FLOAT, NONE, C0, xx, xx, xx, FLOAT, 32, V), + VF(x, R64G64B64A64_FLOAT, xx, xx, xx), + VF(x, R64G64B64_FLOAT, xx, xx, xx), + VF(x, R64G64_FLOAT, xx, xx, xx), + VF(x, R64_FLOAT, xx, xx, xx), }; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c index 79c7023b2d4..be19c0fdc85 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c @@ -218,11 +218,12 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq) struct pipe_context *pipe = &nv50->base.pipe; struct nouveau_pushbuf *push = nv50->base.pushbuf; struct nv50_hw_sm_query *hsq = nv50_hw_sm_query(hq); + struct pipe_grid_info info = {}; uint32_t mask; uint32_t input[3]; const uint block[3] = { 32, 1, 1 }; const uint grid[3] = { screen->MPsInTP, screen->TPs, 1 }; - int c; + int c, i; if (unlikely(!screen->pm.prog)) { struct nv50_program *prog = CALLOC_STRUCT(nv50_program); @@ -262,7 +263,14 @@ nv50_hw_sm_end_query(struct nv50_context *nv50, struct nv50_hw_query *hq) pipe->bind_compute_state(pipe, screen->pm.prog); input[0] = hq->bo->offset + hq->base_offset; input[1] = hq->sequence; - pipe->launch_grid(pipe, block, grid, 0, input); + + for (i = 0; i < 3; i++) { + info.block[i] = block[i]; + info.grid[i] = grid[i]; + } + info.pc = 0; + info.input = input; + pipe->launch_grid(pipe, &info); nouveau_bufctx_reset(nv50->bufctx_cp, NV50_BIND_CP_QUERY); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 14d0085975b..8d11dd7bf21 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -72,7 +72,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, PIPE_BIND_TRANSFER_WRITE | PIPE_BIND_SHARED); - return (nv50_format_table[format].usage & bindings) == bindings; + return (( nv50_format_table[format].usage | + nv50_vertex_format[format].usage) & bindings) == bindings; } static int @@ -263,8 +264,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_VERTEX: case PIPE_SHADER_GEOMETRY: case PIPE_SHADER_FRAGMENT: - case PIPE_SHADER_COMPUTE: break; + case PIPE_SHADER_COMPUTE: default: return 0; } @@ -315,6 +316,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_SUPPORTED_IRS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; @@ -562,7 +565,7 @@ nv50_screen_init_hwctx(struct nv50_screen *screen) if (screen->tesla->oclass >= NVA0_3D_CLASS) { BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1); - PUSH_DATA (push, NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP); + PUSH_DATA (push, 0); } BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.h b/src/gallium/drivers/nouveau/nv50/nv50_screen.h index 2a4983d1020..cce92f0dd5e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h @@ -50,6 +50,7 @@ struct nv50_graph_state { uint8_t num_samplers[3]; uint8_t prim_size; uint16_t scissor; + bool seamless_cube_map; }; struct nv50_screen { @@ -156,12 +157,27 @@ nv50_resource_validate(struct nv04_resource *res, uint32_t flags) struct nv50_format { uint32_t rt; - uint32_t tic; + struct { + unsigned format:6; + unsigned type_r:3; + unsigned type_g:3; + unsigned type_b:3; + unsigned type_a:3; + unsigned src_x:3; + unsigned src_y:3; + unsigned src_z:3; + unsigned src_w:3; + } tic; + uint32_t usage; +}; + +struct nv50_vertex_format { uint32_t vtx; uint32_t usage; }; extern const struct nv50_format nv50_format_table[]; +extern const struct nv50_vertex_format nv50_vertex_format[]; static inline void nv50_screen_tic_unlock(struct nv50_screen *screen, struct nv50_tic_entry *tic) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index cb040439139..6a09808807a 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -21,6 +21,7 @@ */ #include "pipe/p_defines.h" +#include "util/u_framebuffer.h" #include "util/u_helpers.h" #include "util/u_inlines.h" #include "util/u_transfer.h" @@ -33,7 +34,7 @@ #include "nv50/nv50_query_hw.h" #include "nv50/nv50_3d.xml.h" -#include "nv50/nv50_texture.xml.h" +#include "nv50/g80_texture.xml.h" #include "nouveau_gldefs.h" @@ -437,24 +438,29 @@ nv50_zsa_state_delete(struct pipe_context *pipe, void *hwcso) /* ====================== SAMPLERS AND TEXTURES ================================ */ -#define NV50_TSC_WRAP_CASE(n) \ - case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n - static inline unsigned nv50_tsc_wrap_mode(unsigned wrap) { switch (wrap) { - NV50_TSC_WRAP_CASE(REPEAT); - NV50_TSC_WRAP_CASE(MIRROR_REPEAT); - NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE); - NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER); - NV50_TSC_WRAP_CASE(CLAMP); - NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE); - NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER); - NV50_TSC_WRAP_CASE(MIRROR_CLAMP); + case PIPE_TEX_WRAP_REPEAT: + return G80_TSC_WRAP_WRAP; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return G80_TSC_WRAP_MIRROR; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return G80_TSC_WRAP_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return G80_TSC_WRAP_BORDER; + case PIPE_TEX_WRAP_CLAMP: + return G80_TSC_WRAP_CLAMP_OGL; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return G80_TSC_WRAP_MIRROR_ONCE_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return G80_TSC_WRAP_MIRROR_ONCE_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return G80_TSC_WRAP_MIRROR_ONCE_CLAMP_OGL; default: NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); - return NV50_TSC_WRAP_REPEAT; + return G80_TSC_WRAP_WRAP; } } @@ -474,42 +480,44 @@ nv50_sampler_state_create(struct pipe_context *pipe, switch (cso->mag_img_filter) { case PIPE_TEX_FILTER_LINEAR: - so->tsc[1] = NV50_TSC_1_MAGF_LINEAR; + so->tsc[1] = G80_TSC_1_MAG_FILTER_LINEAR; break; case PIPE_TEX_FILTER_NEAREST: default: - so->tsc[1] = NV50_TSC_1_MAGF_NEAREST; + so->tsc[1] = G80_TSC_1_MAG_FILTER_NEAREST; break; } switch (cso->min_img_filter) { case PIPE_TEX_FILTER_LINEAR: - so->tsc[1] |= NV50_TSC_1_MINF_LINEAR; + so->tsc[1] |= G80_TSC_1_MIN_FILTER_LINEAR; break; case PIPE_TEX_FILTER_NEAREST: default: - so->tsc[1] |= NV50_TSC_1_MINF_NEAREST; + so->tsc[1] |= G80_TSC_1_MIN_FILTER_NEAREST; break; } switch (cso->min_mip_filter) { case PIPE_TEX_MIPFILTER_LINEAR: - so->tsc[1] |= NV50_TSC_1_MIPF_LINEAR; + so->tsc[1] |= G80_TSC_1_MIP_FILTER_LINEAR; break; case PIPE_TEX_MIPFILTER_NEAREST: - so->tsc[1] |= NV50_TSC_1_MIPF_NEAREST; + so->tsc[1] |= G80_TSC_1_MIP_FILTER_NEAREST; break; case PIPE_TEX_MIPFILTER_NONE: default: - so->tsc[1] |= NV50_TSC_1_MIPF_NONE; + so->tsc[1] |= G80_TSC_1_MIP_FILTER_NONE; break; } if (nouveau_screen(pipe->screen)->class_3d >= NVE4_3D_CLASS) { if (cso->seamless_cube_map) - so->tsc[1] |= NVE4_TSC_1_CUBE_SEAMLESS; + so->tsc[1] |= GK104_TSC_1_CUBEMAP_INTERFACE_FILTERING; if (!cso->normalized_coords) - so->tsc[1] |= NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS; + so->tsc[1] |= GK104_TSC_1_FLOAT_COORD_NORMALIZATION_FORCE_UNNORMALIZED_COORDS; + } else { + so->seamless_cube_map = cso->seamless_cube_map; } if (cso->max_anisotropy >= 16) @@ -521,10 +529,10 @@ nv50_sampler_state_create(struct pipe_context *pipe, so->tsc[0] |= (cso->max_anisotropy >> 1) << 20; if (cso->max_anisotropy >= 4) - so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_35; + so->tsc[1] |= 6 << G80_TSC_1_TRILIN_OPT__SHIFT; else if (cso->max_anisotropy >= 2) - so->tsc[1] |= NV50_TSC_1_UNKN_ANISO_15; + so->tsc[1] |= 4 << G80_TSC_1_TRILIN_OPT__SHIFT; } if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { @@ -936,21 +944,10 @@ nv50_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *fb) { struct nv50_context *nv50 = nv50_context(pipe); - unsigned i; nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_FB); - for (i = 0; i < fb->nr_cbufs; ++i) - pipe_surface_reference(&nv50->framebuffer.cbufs[i], fb->cbufs[i]); - for (; i < nv50->framebuffer.nr_cbufs; ++i) - pipe_surface_reference(&nv50->framebuffer.cbufs[i], NULL); - - nv50->framebuffer.nr_cbufs = fb->nr_cbufs; - - nv50->framebuffer.width = fb->width; - nv50->framebuffer.height = fb->height; - - pipe_surface_reference(&nv50->framebuffer.zsbuf, fb->zsbuf); + util_copy_framebuffer_state(&nv50->framebuffer, fb); nv50->dirty |= NV50_NEW_FRAMEBUFFER; } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c index 4af969997f2..55369781606 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c @@ -2,7 +2,6 @@ #include "util/u_format.h" #include "nv50/nv50_context.h" -#include "nv50/nv50_defs.xml.h" static inline void nv50_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h index e0793bb6ec4..6bc451450b1 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj_tex.h @@ -7,6 +7,7 @@ struct nv50_tsc_entry { int id; uint32_t tsc[8]; + bool seamless_cube_map; }; static inline struct nv50_tsc_entry * diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c index c69fa5abb98..4db73cb7fef 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -37,8 +37,8 @@ #include "nv50/nv50_context.h" #include "nv50/nv50_resource.h" -#include "nv50/nv50_defs.xml.h" -#include "nv50/nv50_texture.xml.h" +#include "nv50/g80_defs.xml.h" +#include "nv50/g80_texture.xml.h" /* these are used in nv50_blit.h */ #define NV50_ENG2D_SUPPORTED_FORMATS 0xff0843e080608409ULL @@ -64,15 +64,15 @@ nv50_2d_format(enum pipe_format format, bool dst, bool dst_src_equal) switch (util_format_get_blocksize(format)) { case 1: - return NV50_SURFACE_FORMAT_R8_UNORM; + return G80_SURFACE_FORMAT_R8_UNORM; case 2: - return NV50_SURFACE_FORMAT_R16_UNORM; + return G80_SURFACE_FORMAT_R16_UNORM; case 4: - return NV50_SURFACE_FORMAT_BGRA8_UNORM; + return G80_SURFACE_FORMAT_BGRA8_UNORM; case 8: - return NV50_SURFACE_FORMAT_RGBA16_FLOAT; + return G80_SURFACE_FORMAT_RGBA16_FLOAT; case 16: - return NV50_SURFACE_FORMAT_RGBA32_FLOAT; + return G80_SURFACE_FORMAT_RGBA32_FLOAT; default: return 0; } @@ -628,7 +628,7 @@ nv50_clear_buffer_push(struct pipe_context *pipe, offset &= ~0xff; BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2); - PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_2D(DST_PITCH), 5); PUSH_DATA (push, 262144); @@ -638,7 +638,7 @@ nv50_clear_buffer_push(struct pipe_context *pipe, PUSH_DATA (push, buf->address + offset); BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2); PUSH_DATA (push, 0); - PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM); BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10); PUSH_DATA (push, size); PUSH_DATA (push, 1); @@ -997,12 +997,14 @@ nv50_blitter_make_sampler(struct nv50_blitter *blit) blit->sampler[0].id = -1; - blit->sampler[0].tsc[0] = NV50_TSC_0_SRGB_CONVERSION_ALLOWED | - (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPS__SHIFT) | - (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPT__SHIFT) | - (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPR__SHIFT); + blit->sampler[0].tsc[0] = G80_TSC_0_SRGB_CONVERSION | + (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_U__SHIFT) | + (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_V__SHIFT) | + (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_P__SHIFT); blit->sampler[0].tsc[1] = - NV50_TSC_1_MAGF_NEAREST | NV50_TSC_1_MINF_NEAREST | NV50_TSC_1_MIPF_NONE; + G80_TSC_1_MAG_FILTER_NEAREST | + G80_TSC_1_MIN_FILTER_NEAREST | + G80_TSC_1_MIP_FILTER_NONE; /* clamp to edge, min/max lod = 0, bilinear filtering */ @@ -1010,7 +1012,9 @@ nv50_blitter_make_sampler(struct nv50_blitter *blit) blit->sampler[1].tsc[0] = blit->sampler[0].tsc[0]; blit->sampler[1].tsc[1] = - NV50_TSC_1_MAGF_LINEAR | NV50_TSC_1_MINF_LINEAR | NV50_TSC_1_MIPF_NONE; + G80_TSC_1_MAG_FILTER_LINEAR | + G80_TSC_1_MIN_FILTER_LINEAR | + G80_TSC_1_MIP_FILTER_NONE; } unsigned diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c index c3f433608df..4b69c3bd504 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c @@ -22,32 +22,24 @@ #include "nv50/nv50_context.h" #include "nv50/nv50_resource.h" -#include "nv50/nv50_texture.xml.h" -#include "nv50/nv50_defs.xml.h" +#include "nv50/g80_texture.xml.h" +#include "nv50/g80_defs.xml.h" #include "util/u_format.h" -#define NV50_TIC_0_SWIZZLE__MASK \ - (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \ - NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK) - static inline uint32_t -nv50_tic_swizzle(uint32_t tc, unsigned swz, bool tex_int) +nv50_tic_swizzle(const struct nv50_format *fmt, unsigned swz, bool tex_int) { switch (swz) { - case PIPE_SWIZZLE_RED: - return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT; - case PIPE_SWIZZLE_GREEN: - return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT; - case PIPE_SWIZZLE_BLUE: - return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT; - case PIPE_SWIZZLE_ALPHA: - return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT; + case PIPE_SWIZZLE_RED : return fmt->tic.src_x; + case PIPE_SWIZZLE_GREEN: return fmt->tic.src_y; + case PIPE_SWIZZLE_BLUE : return fmt->tic.src_z; + case PIPE_SWIZZLE_ALPHA: return fmt->tic.src_w; case PIPE_SWIZZLE_ONE: - return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT; + return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT; case PIPE_SWIZZLE_ZERO: default: - return NV50_TIC_MAP_ZERO; + return G80_TIC_SOURCE_ZERO; } } @@ -73,6 +65,7 @@ nv50_create_texture_view(struct pipe_context *pipe, { const uint32_t class_3d = nouveau_context(pipe)->screen->class_3d; const struct util_format_description *desc; + const struct nv50_format *fmt; uint64_t addr; uint32_t *tic; uint32_t swz[4]; @@ -100,19 +93,23 @@ nv50_create_texture_view(struct pipe_context *pipe, /* TIC[0] */ - tic[0] = nv50_format_table[view->pipe.format].tic; + fmt = &nv50_format_table[view->pipe.format]; tex_int = util_format_is_pure_integer(view->pipe.format); - swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int); - swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int); - swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int); - swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int); - tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) | - (swz[0] << NV50_TIC_0_MAPR__SHIFT) | - (swz[1] << NV50_TIC_0_MAPG__SHIFT) | - (swz[2] << NV50_TIC_0_MAPB__SHIFT) | - (swz[3] << NV50_TIC_0_MAPA__SHIFT); + swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int); + swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int); + swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int); + swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int); + tic[0] = (fmt->tic.format << G80_TIC_0_COMPONENTS_SIZES__SHIFT) | + (fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) | + (fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) | + (fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) | + (fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) | + (swz[0] << G80_TIC_0_X_SOURCE__SHIFT) | + (swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) | + (swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) | + (swz[3] << G80_TIC_0_W_SOURCE__SHIFT); addr = mt->base.address; @@ -124,24 +121,24 @@ nv50_create_texture_view(struct pipe_context *pipe, depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1; } - tic[2] = 0x10001000 | NV50_TIC_2_NO_BORDER; + tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR; if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - tic[2] |= NV50_TIC_2_COLORSPACE_SRGB; + tic[2] |= G80_TIC_2_SRGB_CONVERSION; if (!(flags & NV50_TEXVIEW_SCALED_COORDS)) - tic[2] |= NV50_TIC_2_NORMALIZED_COORDS; + tic[2] |= G80_TIC_2_NORMALIZED_COORDS; if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) { if (target == PIPE_BUFFER) { addr += view->pipe.u.buf.first_element * desc->block.bits / 8; - tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_BUFFER; + tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER; tic[3] = 0; tic[4] = /* width */ view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1; tic[5] = 0; } else { - tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT; + tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP; tic[3] = mt->level[0].pitch; tic[4] = mt->base.base.width0; tic[5] = (1 << 16) | (mt->base.base.height0); @@ -162,34 +159,34 @@ nv50_create_texture_view(struct pipe_context *pipe, switch (target) { case PIPE_TEXTURE_1D: - tic[2] |= NV50_TIC_2_TARGET_1D; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D; break; case PIPE_TEXTURE_2D: - tic[2] |= NV50_TIC_2_TARGET_2D; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D; break; case PIPE_TEXTURE_RECT: - tic[2] |= NV50_TIC_2_TARGET_RECT; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP; break; case PIPE_TEXTURE_3D: - tic[2] |= NV50_TIC_2_TARGET_3D; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D; break; case PIPE_TEXTURE_CUBE: depth /= 6; - tic[2] |= NV50_TIC_2_TARGET_CUBE; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP; break; case PIPE_TEXTURE_1D_ARRAY: - tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY; break; case PIPE_TEXTURE_2D_ARRAY: - tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY; break; case PIPE_TEXTURE_CUBE_ARRAY: depth /= 6; - tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY; break; case PIPE_BUFFER: assert(0); /* should be linear and handled above ! */ - tic[2] |= NV50_TIC_2_TARGET_BUFFER | NV50_TIC_2_LINEAR; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER | G80_TIC_2_LAYOUT_PITCH; break; default: unreachable("unexpected/invalid texture target"); @@ -202,9 +199,9 @@ nv50_create_texture_view(struct pipe_context *pipe, tic[5] = (mt->base.base.height0 << mt->ms_y) & 0xffff; tic[5] |= depth << 16; if (class_3d > NV50_3D_CLASS) - tic[5] |= mt->base.base.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT; + tic[5] |= mt->base.base.last_level << G80_TIC_5_MAP_MIP_LEVEL__SHIFT; else - tic[5] |= view->pipe.u.tex.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT; + tic[5] |= view->pipe.u.tex.last_level << G80_TIC_5_MAP_MIP_LEVEL__SHIFT; tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; /* sampling points */ @@ -213,9 +210,9 @@ nv50_create_texture_view(struct pipe_context *pipe, else tic[7] = 0; - if (unlikely(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS))) + if (unlikely(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS))) if (mt->base.base.last_level) - tic[5] &= ~NV50_TIC_5_LAST_LEVEL__MASK; + tic[5] &= ~G80_TIC_5_MAP_MIP_LEVEL__MASK; return &view->pipe; } @@ -265,7 +262,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s) tic->id = nv50_screen_tic_alloc(nv50->screen, tic); BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2); - PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_2D(DST_PITCH), 5); PUSH_DATA (push, 262144); @@ -275,7 +272,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s) PUSH_DATA (push, txc->offset); BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2); PUSH_DATA (push, 0); - PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM); BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10); PUSH_DATA (push, 32); PUSH_DATA (push, 1); @@ -364,6 +361,7 @@ nv50_validate_tsc(struct nv50_context *nv50, int s) PUSH_DATA (push, (i << 4) | 0); continue; } + nv50->seamless_cube_map = tsc->seamless_cube_map; if (tsc->id < 0) { tsc->id = nv50_screen_tsc_alloc(nv50->screen, tsc); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h deleted file mode 100644 index a2b9921f647..00000000000 --- a/src/gallium/drivers/nouveau/nv50/nv50_texture.xml.h +++ /dev/null @@ -1,306 +0,0 @@ -#ifndef NV50_TEXTURE_XML -#define NV50_TEXTURE_XML - -/* Autogenerated file, DO NOT EDIT manually! - -This file was generated by the rules-ng-ng headergen tool in this git repository: -http://github.com/envytools/envytools/ -git clone https://github.com/envytools/envytools.git - -The rules-ng-ng source files this header was generated from are: -- rnndb/graph/g80_texture.xml ( 8881 bytes, from 2014-09-25 06:32:11) -- rnndb/copyright.xml ( 6452 bytes, from 2013-05-14 03:57:49) -- rnndb/nvchipsets.xml ( 2759 bytes, from 2014-10-05 01:51:02) -- rnndb/g80_defs.xml ( 18175 bytes, from 2014-09-25 06:32:11) - -Copyright (C) 2006-2014 by the following authors: -- Artur Huillet <[email protected]> (ahuillet) -- Ben Skeggs (darktama, darktama_) -- B. R. <[email protected]> (koala_br) -- Carlos Martin <[email protected]> (carlosmn) -- Christoph Bumiller <[email protected]> (calim, chrisbmr) -- Dawid Gajownik <[email protected]> (gajownik) -- Dmitry Baryshkov -- Dmitry Eremin-Solenikov <[email protected]> (lumag) -- EdB <[email protected]> (edb_) -- Erik Waling <[email protected]> (erikwaling) -- Francisco Jerez <[email protected]> (curro) -- imirkin <[email protected]> (imirkin) -- jb17bsome <[email protected]> (jb17bsome) -- Jeremy Kolb <[email protected]> (kjeremy) -- Laurent Carlier <[email protected]> (lordheavy) -- Luca Barbieri <[email protected]> (lb, lb1) -- Maarten Maathuis <[email protected]> (stillunknown) -- Marcin Kościelnicki <[email protected]> (mwk, koriakin) -- Mark Carey <[email protected]> (careym) -- Matthieu Castet <[email protected]> (mat-c) -- nvidiaman <[email protected]> (nvidiaman) -- Patrice Mandin <[email protected]> (pmandin, pmdata) -- Pekka Paalanen <[email protected]> (pq, ppaalanen) -- Peter Popov <[email protected]> (ironpeter) -- Richard Hughes <[email protected]> (hughsient) -- Rudi Cilibrasi <[email protected]> (cilibrar) -- Serge Martin -- Simon Raffeiner -- Stephane Loeuillet <[email protected]> (leroutier) -- Stephane Marchesin <[email protected]> (marcheu) -- sturmflut <[email protected]> (sturmflut) -- Sylvain Munaut <[email protected]> -- Victor Stinner <[email protected]> (haypo) -- Wladmir van der Laan <[email protected]> (miathan6) -- Younes Manton <[email protected]> (ymanton) - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - - -#define NV50_TIC_MAP_ZERO 0x00000000 -#define NV50_TIC_MAP_C0 0x00000002 -#define NV50_TIC_MAP_C1 0x00000003 -#define NV50_TIC_MAP_C2 0x00000004 -#define NV50_TIC_MAP_C3 0x00000005 -#define NV50_TIC_MAP_ONE_INT 0x00000006 -#define NV50_TIC_MAP_ONE_FLOAT 0x00000007 -#define NV50_TIC_TYPE_SNORM 0x00000001 -#define NV50_TIC_TYPE_UNORM 0x00000002 -#define NV50_TIC_TYPE_SINT 0x00000003 -#define NV50_TIC_TYPE_UINT 0x00000004 -#define NV50_TIC_TYPE_SSCALED 0x00000005 -#define NV50_TIC_TYPE_USCALED 0x00000006 -#define NV50_TIC_TYPE_FLOAT 0x00000007 -#define NV50_TSC_WRAP_REPEAT 0x00000000 -#define NV50_TSC_WRAP_MIRROR_REPEAT 0x00000001 -#define NV50_TSC_WRAP_CLAMP_TO_EDGE 0x00000002 -#define NV50_TSC_WRAP_CLAMP_TO_BORDER 0x00000003 -#define NV50_TSC_WRAP_CLAMP 0x00000004 -#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_EDGE 0x00000005 -#define NV50_TSC_WRAP_MIRROR_CLAMP_TO_BORDER 0x00000006 -#define NV50_TSC_WRAP_MIRROR_CLAMP 0x00000007 -#define NV50_TIC__SIZE 0x00000020 -#define NV50_TIC_0 0x00000000 -#define NV50_TIC_0_MAPA__MASK 0x38000000 -#define NV50_TIC_0_MAPA__SHIFT 27 -#define NV50_TIC_0_MAPB__MASK 0x07000000 -#define NV50_TIC_0_MAPB__SHIFT 24 -#define NV50_TIC_0_MAPG__MASK 0x00e00000 -#define NV50_TIC_0_MAPG__SHIFT 21 -#define NV50_TIC_0_MAPR__MASK 0x001c0000 -#define NV50_TIC_0_MAPR__SHIFT 18 -#define NV50_TIC_0_TYPE3__MASK 0x00038000 -#define NV50_TIC_0_TYPE3__SHIFT 15 -#define NV50_TIC_0_TYPE2__MASK 0x00007000 -#define NV50_TIC_0_TYPE2__SHIFT 12 -#define NV50_TIC_0_TYPE1__MASK 0x00000e00 -#define NV50_TIC_0_TYPE1__SHIFT 9 -#define NV50_TIC_0_TYPE0__MASK 0x000001c0 -#define NV50_TIC_0_TYPE0__SHIFT 6 -#define NV50_TIC_0_FMT__MASK 0x0000003f -#define NV50_TIC_0_FMT__SHIFT 0 -#define NV50_TIC_0_FMT_32_32_32_32 0x00000001 -#define NVC0_TIC_0_FMT_32_32_32 0x00000002 -#define NV50_TIC_0_FMT_16_16_16_16 0x00000003 -#define NV50_TIC_0_FMT_32_32 0x00000004 -#define NV50_TIC_0_FMT_32_8_X24 0x00000005 -#define NV50_TIC_0_FMT_8_8_8_8 0x00000008 -#define NV50_TIC_0_FMT_10_10_10_2 0x00000009 -#define NV50_TIC_0_FMT_16_16 0x0000000c -#define NV50_TIC_0_FMT_24_8 0x0000000d -#define NV50_TIC_0_FMT_8_24 0x0000000e -#define NV50_TIC_0_FMT_32 0x0000000f -#define NVC0_TIC_0_FMT_BPTC_FLOAT 0x00000010 -#define NVC0_TIC_0_FMT_BPTC_UFLOAT 0x00000011 -#define NV50_TIC_0_FMT_4_4_4_4 0x00000012 -#define NV50_TIC_0_FMT_1_5_5_5 0x00000013 -#define NV50_TIC_0_FMT_5_5_5_1 0x00000014 -#define NV50_TIC_0_FMT_5_6_5 0x00000015 -#define NV50_TIC_0_FMT_5_5_6 0x00000016 -#define NVC0_TIC_0_FMT_BPTC 0x00000017 -#define NV50_TIC_0_FMT_8_8 0x00000018 -#define NV50_TIC_0_FMT_16 0x0000001b -#define NV50_TIC_0_FMT_8 0x0000001d -#define NV50_TIC_0_FMT_4_4 0x0000001e -#define NV50_TIC_0_FMT_BITMAP 0x0000001f -#define NV50_TIC_0_FMT_9_9_9_E5 0x00000020 -#define NV50_TIC_0_FMT_11_11_10 0x00000021 -#define NV50_TIC_0_FMT_U8_YA8_V8_YB8 0x00000022 -#define NV50_TIC_0_FMT_YA8_U8_YB8_V8 0x00000023 -#define NV50_TIC_0_FMT_DXT1 0x00000024 -#define NV50_TIC_0_FMT_DXT3 0x00000025 -#define NV50_TIC_0_FMT_DXT5 0x00000026 -#define NV50_TIC_0_FMT_RGTC1 0x00000027 -#define NV50_TIC_0_FMT_RGTC2 0x00000028 -#define NV50_TIC_0_FMT_S8_Z24 0x00000029 -#define NV50_TIC_0_FMT_Z24_X8 0x0000002a -#define NV50_TIC_0_FMT_Z24_S8 0x0000002b -#define NV50_TIC_0_FMT_Z24_C8_MS4_CS4 0x0000002c -#define NV50_TIC_0_FMT_Z24_C8_MS8_CS8 0x0000002d -#define NV50_TIC_0_FMT_Z24_C8_MS4_CS12 0x0000002e -#define NV50_TIC_0_FMT_Z32 0x0000002f -#define NV50_TIC_0_FMT_Z32_S8_X24 0x00000030 -#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS4_CS4 0x00000031 -#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS8_CS8 0x00000032 -#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS4_CS4 0x00000033 -#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS8_CS8 0x00000034 -#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS4_CS4 0x00000035 -#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS8_CS8 0x00000036 -#define NV50_TIC_0_FMT_Z24_X8_S8_C8_X16_MS4_CS12 0x00000037 -#define NV50_TIC_0_FMT_Z32_X8_C8_X16_MS4_CS12 0x00000038 -#define NV50_TIC_0_FMT_Z32_S8_C8_X16_MS4_CS12 0x00000039 -#define NV50_TIC_0_FMT_Z16 0x0000003a - -#define NV50_TIC_1 0x00000004 -#define NV50_TIC_1_OFFSET_LOW__MASK 0xffffffff -#define NV50_TIC_1_OFFSET_LOW__SHIFT 0 - -#define NV50_TIC_2 0x00000008 -#define NV50_TIC_2_OFFSET_HIGH__MASK 0x000000ff -#define NV50_TIC_2_OFFSET_HIGH__SHIFT 0 -#define NV50_TIC_2_COLORSPACE_SRGB 0x00000400 -#define NV50_TIC_2_TARGET__MASK 0x0003c000 -#define NV50_TIC_2_TARGET__SHIFT 14 -#define NV50_TIC_2_TARGET_1D 0x00000000 -#define NV50_TIC_2_TARGET_2D 0x00004000 -#define NV50_TIC_2_TARGET_3D 0x00008000 -#define NV50_TIC_2_TARGET_CUBE 0x0000c000 -#define NV50_TIC_2_TARGET_1D_ARRAY 0x00010000 -#define NV50_TIC_2_TARGET_2D_ARRAY 0x00014000 -#define NV50_TIC_2_TARGET_BUFFER 0x00018000 -#define NV50_TIC_2_TARGET_RECT 0x0001c000 -#define NV50_TIC_2_TARGET_CUBE_ARRAY 0x00020000 -#define NV50_TIC_2_LINEAR 0x00040000 -#define NV50_TIC_2_TILE_MODE_X__MASK 0x00380000 -#define NV50_TIC_2_TILE_MODE_X__SHIFT 19 -#define NV50_TIC_2_TILE_MODE_Y__MASK 0x01c00000 -#define NV50_TIC_2_TILE_MODE_Y__SHIFT 22 -#define NV50_TIC_2_TILE_MODE_Z__MASK 0x0e000000 -#define NV50_TIC_2_TILE_MODE_Z__SHIFT 25 -#define NV50_TIC_2_2D_UNK0258__MASK 0x30000000 -#define NV50_TIC_2_2D_UNK0258__SHIFT 28 -#define NV50_TIC_2_NO_BORDER 0x40000000 -#define NV50_TIC_2_NORMALIZED_COORDS 0x80000000 - -#define NV50_TIC_3 0x0000000c -#define NV50_TIC_3_PITCH__MASK 0xffffffff -#define NV50_TIC_3_PITCH__SHIFT 0 - -#define NV50_TIC_4 0x00000010 -#define NV50_TIC_4_WIDTH__MASK 0xffffffff -#define NV50_TIC_4_WIDTH__SHIFT 0 - -#define NV50_TIC_5 0x00000014 -#define NV50_TIC_5_LAST_LEVEL__MASK 0xf0000000 -#define NV50_TIC_5_LAST_LEVEL__SHIFT 28 -#define NV50_TIC_5_DEPTH__MASK 0x0fff0000 -#define NV50_TIC_5_DEPTH__SHIFT 16 -#define NV50_TIC_5_HEIGHT__MASK 0x0000ffff -#define NV50_TIC_5_HEIGHT__SHIFT 0 - -#define NV50_TIC_7 0x0000001c -#define NV50_TIC_7_BASE_LEVEL__MASK 0x0000000f -#define NV50_TIC_7_BASE_LEVEL__SHIFT 0 -#define NV50_TIC_7_MAX_LEVEL__MASK 0x000000f0 -#define NV50_TIC_7_MAX_LEVEL__SHIFT 4 -#define NV50_TIC_7_MS_MODE__MASK 0x0000f000 -#define NV50_TIC_7_MS_MODE__SHIFT 12 -#define NV50_TIC_7_MS_MODE_MS1 0x00000000 -#define NV50_TIC_7_MS_MODE_MS2 0x00001000 -#define NV50_TIC_7_MS_MODE_MS4 0x00002000 -#define NV50_TIC_7_MS_MODE_MS8 0x00003000 -#define NVA3_TIC_7_MS_MODE_MS8_ALT 0x00004000 -#define NVA3_TIC_7_MS_MODE_MS2_ALT 0x00005000 -#define NVC0_TIC_7_MS_MODE_UNK6 0x00006000 -#define NV50_TIC_7_MS_MODE_MS4_CS4 0x00008000 -#define NV50_TIC_7_MS_MODE_MS4_CS12 0x00009000 -#define NV50_TIC_7_MS_MODE_MS8_CS8 0x0000a000 -#define NVC0_TIC_7_MS_MODE_MS8_CS24 0x0000b000 - -#define NV50_TSC__SIZE 0x00000020 -#define NV50_TSC_0 0x00000000 -#define NV50_TSC_0_WRAPS__MASK 0x00000007 -#define NV50_TSC_0_WRAPS__SHIFT 0 -#define NV50_TSC_0_WRAPT__MASK 0x00000038 -#define NV50_TSC_0_WRAPT__SHIFT 3 -#define NV50_TSC_0_WRAPR__MASK 0x000001c0 -#define NV50_TSC_0_WRAPR__SHIFT 6 -#define NV50_TSC_0_SHADOW_COMPARE_ENABLE 0x00000200 -#define NV50_TSC_0_SHADOW_COMPARE_FUNC__MASK 0x00001c00 -#define NV50_TSC_0_SHADOW_COMPARE_FUNC__SHIFT 10 -#define NV50_TSC_0_SRGB_CONVERSION_ALLOWED 0x00002000 -#define NV50_TSC_0_BOX_S__MASK 0x0001c000 -#define NV50_TSC_0_BOX_S__SHIFT 14 -#define NV50_TSC_0_BOX_T__MASK 0x000e0000 -#define NV50_TSC_0_BOX_T__SHIFT 17 -#define NV50_TSC_0_ANISOTROPY_MASK__MASK 0x00700000 -#define NV50_TSC_0_ANISOTROPY_MASK__SHIFT 20 - -#define NV50_TSC_1 0x00000004 -#define NV50_TSC_1_UNKN_ANISO_15 0x10000000 -#define NV50_TSC_1_UNKN_ANISO_35 0x18000000 -#define NV50_TSC_1_MAGF__MASK 0x00000003 -#define NV50_TSC_1_MAGF__SHIFT 0 -#define NV50_TSC_1_MAGF_NEAREST 0x00000001 -#define NV50_TSC_1_MAGF_LINEAR 0x00000002 -#define NV50_TSC_1_MINF__MASK 0x00000030 -#define NV50_TSC_1_MINF__SHIFT 4 -#define NV50_TSC_1_MINF_NEAREST 0x00000010 -#define NV50_TSC_1_MINF_LINEAR 0x00000020 -#define NV50_TSC_1_MIPF__MASK 0x000000c0 -#define NV50_TSC_1_MIPF__SHIFT 6 -#define NV50_TSC_1_MIPF_NONE 0x00000040 -#define NV50_TSC_1_MIPF_NEAREST 0x00000080 -#define NV50_TSC_1_MIPF_LINEAR 0x000000c0 -#define NVE4_TSC_1_CUBE_SEAMLESS 0x00000200 -#define NV50_TSC_1_LOD_BIAS__MASK 0x01fff000 -#define NV50_TSC_1_LOD_BIAS__SHIFT 12 -#define NVE4_TSC_1_FORCE_NONNORMALIZED_COORDS 0x02000000 - -#define NV50_TSC_2 0x00000008 -#define NV50_TSC_2_MIN_LOD__MASK 0x00000fff -#define NV50_TSC_2_MIN_LOD__SHIFT 0 -#define NV50_TSC_2_MAX_LOD__MASK 0x00fff000 -#define NV50_TSC_2_MAX_LOD__SHIFT 12 -#define NV50_TSC_2_BORDER_COLOR_SRGB_RED__MASK 0xff000000 -#define NV50_TSC_2_BORDER_COLOR_SRGB_RED__SHIFT 24 - -#define NV50_TSC_3 0x0000000c -#define NV50_TSC_3_BORDER_COLOR_SRGB_GREEN__MASK 0x000ff000 -#define NV50_TSC_3_BORDER_COLOR_SRGB_GREEN__SHIFT 12 -#define NV50_TSC_3_BORDER_COLOR_SRGB_BLUE__MASK 0x0ff00000 -#define NV50_TSC_3_BORDER_COLOR_SRGB_BLUE__SHIFT 20 - -#define NV50_TSC_4 0x00000010 -#define NV50_TSC_4_BORDER_COLOR_RED__MASK 0xffffffff -#define NV50_TSC_4_BORDER_COLOR_RED__SHIFT 0 - -#define NV50_TSC_5 0x00000014 -#define NV50_TSC_5_BORDER_COLOR_GREEN__MASK 0xffffffff -#define NV50_TSC_5_BORDER_COLOR_GREEN__SHIFT 0 - -#define NV50_TSC_6 0x00000018 -#define NV50_TSC_6_BORDER_COLOR_BLUE__MASK 0xffffffff -#define NV50_TSC_6_BORDER_COLOR_BLUE__SHIFT 0 - -#define NV50_TSC_7 0x0000001c -#define NV50_TSC_7_BORDER_COLOR_ALPHA__MASK 0xffffffff -#define NV50_TSC_7_BORDER_COLOR_ALPHA__SHIFT 0 - - -#endif /* NV50_TEXTURE_XML */ diff --git a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c index 9a3fd1e705f..86a8c159469 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c @@ -3,7 +3,7 @@ #include "nv50/nv50_context.h" -#include "nv50/nv50_defs.xml.h" +#include "nv50/g80_defs.xml.h" struct nv50_transfer { struct pipe_transfer base; @@ -163,7 +163,7 @@ nv50_sifc_linear_u8(struct nouveau_context *nv, offset &= ~0xff; BEGIN_NV04(push, NV50_2D(DST_FORMAT), 2); - PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM); PUSH_DATA (push, 1); BEGIN_NV04(push, NV50_2D(DST_PITCH), 5); PUSH_DATA (push, 262144); @@ -173,7 +173,7 @@ nv50_sifc_linear_u8(struct nouveau_context *nv, PUSH_DATA (push, dst->offset + offset); BEGIN_NV04(push, NV50_2D(SIFC_BITMAP_ENABLE), 2); PUSH_DATA (push, 0); - PUSH_DATA (push, NV50_SURFACE_FORMAT_R8_UNORM); + PUSH_DATA (push, G80_SURFACE_FORMAT_R8_UNORM); BEGIN_NV04(push, NV50_2D(SIFC_WIDTH), 10); PUSH_DATA (push, size); PUSH_DATA (push, 1); diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index 5369d5207ee..6f60445d8d2 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -76,7 +76,7 @@ nv50_vertex_state_create(struct pipe_context *pipe, enum pipe_format fmt = ve->src_format; so->element[i].pipe = elements[i]; - so->element[i].state = nv50_format_table[fmt].vtx; + so->element[i].state = nv50_vertex_format[fmt].vtx; if (!so->element[i].state) { switch (util_format_get_nr_components(fmt)) { @@ -89,7 +89,7 @@ nv50_vertex_state_create(struct pipe_context *pipe, FREE(so); return NULL; } - so->element[i].state = nv50_format_table[fmt].vtx; + so->element[i].state = nv50_vertex_format[fmt].vtx; so->need_conversion = true; pipe_debug_message(&nouveau_context(pipe)->debug, FALLBACK, "Converting vertex element %d, no hw format %s", @@ -816,6 +816,13 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) PUSH_DATA (push, 0x20); } + if (nv50->screen->base.class_3d >= NVA0_3D_CLASS && + nv50->seamless_cube_map != nv50->state.seamless_cube_map) { + nv50->state.seamless_cube_map = nv50->seamless_cube_map; + BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1); + PUSH_DATA (push, nv50->seamless_cube_map ? NVA0_3D_TEX_MISC_SEAMLESS_CUBE_MAP : 0); + } + if (nv50->vbo_fifo) { nv50_push_vbo(nv50, info); push->kick_notify = nv50_default_kick_notify; diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h index 0a0e187dc02..3479c343261 100644 --- a/src/gallium/drivers/nouveau/nv_object.xml.h +++ b/src/gallium/drivers/nouveau/nv_object.xml.h @@ -192,6 +192,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVF0_3D_CLASS 0x0000a197 #define NVEA_3D_CLASS 0x0000a297 #define GM107_3D_CLASS 0x0000b097 +#define GM200_3D_CLASS 0x0000b197 #define NV50_2D_CLASS 0x0000502d #define NVC0_2D_CLASS 0x0000902d #define NV50_COMPUTE_CLASS 0x000050c0 @@ -200,6 +201,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC8_COMPUTE_CLASS 0x000092c0 #define NVE4_COMPUTE_CLASS 0x0000a0c0 #define NVF0_COMPUTE_CLASS 0x0000a1c0 +#define GM107_COMPUTE_CLASS 0x0000b0c0 #define NV84_CRYPT_CLASS 0x000074c1 #define BLOB_NVC0_PCOPY1_CLASS 0x000090b8 #define BLOB_NVC0_PCOPY0_CLASS 0x000090b5 diff --git a/src/gallium/drivers/nouveau/nvc0/gm107_texture.xml.h b/src/gallium/drivers/nouveau/nvc0/gm107_texture.xml.h new file mode 100644 index 00000000000..a4bc3805f26 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/gm107_texture.xml.h @@ -0,0 +1,365 @@ +#ifndef GM107_TEXTURE_XML +#define GM107_TEXTURE_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://github.com/envytools/envytools/ +git clone https://github.com/envytools/envytools.git + +The rules-ng-ng source files this header was generated from are: +- /home/skeggsb/git/envytools/rnndb/../rnndb/graph/gm107_texture.xml ( 22057 bytes, from 2016-02-12 03:01:43) +- /home/skeggsb/git/envytools/rnndb/copyright.xml ( 6456 bytes, from 2015-09-10 02:57:40) +- /home/skeggsb/git/envytools/rnndb/nvchipsets.xml ( 2908 bytes, from 2016-02-04 22:19:11) +- /home/skeggsb/git/envytools/rnndb/g80_defs.xml ( 21739 bytes, from 2016-02-04 00:29:42) + +Copyright (C) 2006-2016 by the following authors: +- Artur Huillet <[email protected]> (ahuillet) +- Ben Skeggs (darktama, darktama_) +- B. R. <[email protected]> (koala_br) +- Carlos Martin <[email protected]> (carlosmn) +- Christoph Bumiller <[email protected]> (calim, chrisbmr) +- Dawid Gajownik <[email protected]> (gajownik) +- Dmitry Baryshkov +- Dmitry Eremin-Solenikov <[email protected]> (lumag) +- EdB <[email protected]> (edb_) +- Erik Waling <[email protected]> (erikwaling) +- Francisco Jerez <[email protected]> (curro) +- Ilia Mirkin <[email protected]> (imirkin) +- jb17bsome <[email protected]> (jb17bsome) +- Jeremy Kolb <[email protected]> (kjeremy) +- Laurent Carlier <[email protected]> (lordheavy) +- Luca Barbieri <[email protected]> (lb, lb1) +- Maarten Maathuis <[email protected]> (stillunknown) +- Marcin Kościelnicki <[email protected]> (mwk, koriakin) +- Mark Carey <[email protected]> (careym) +- Matthieu Castet <[email protected]> (mat-c) +- nvidiaman <[email protected]> (nvidiaman) +- Patrice Mandin <[email protected]> (pmandin, pmdata) +- Pekka Paalanen <[email protected]> (pq, ppaalanen) +- Peter Popov <[email protected]> (ironpeter) +- Richard Hughes <[email protected]> (hughsient) +- Rudi Cilibrasi <[email protected]> (cilibrar) +- Serge Martin +- Simon Raffeiner +- Stephane Loeuillet <[email protected]> (leroutier) +- Stephane Marchesin <[email protected]> (marcheu) +- sturmflut <[email protected]> (sturmflut) +- Sylvain Munaut <[email protected]> +- Victor Stinner <[email protected]> (haypo) +- Wladmir van der Laan <[email protected]> (miathan6) +- Younes Manton <[email protected]> (ymanton) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#define GM107_TIC2__SIZE 0x00000020 +#define GM107_TIC2_0 0x00000000 +#define GM107_TIC2_0_COMPONENTS_SIZES__MASK 0x0000007f +#define GM107_TIC2_0_COMPONENTS_SIZES__SHIFT 0 +#define GM107_TIC2_0_COMPONENTS_SIZES_R32_G32_B32_A32 0x00000001 +#define GM107_TIC2_0_COMPONENTS_SIZES_R32_G32_B32 0x00000002 +#define GM107_TIC2_0_COMPONENTS_SIZES_R16_G16_B16_A16 0x00000003 +#define GM107_TIC2_0_COMPONENTS_SIZES_R32_G32 0x00000004 +#define GM107_TIC2_0_COMPONENTS_SIZES_R32_B24G8 0x00000005 +#define GM107_TIC2_0_COMPONENTS_SIZES_X8B8G8R8 0x00000007 +#define GM107_TIC2_0_COMPONENTS_SIZES_A8B8G8R8 0x00000008 +#define GM107_TIC2_0_COMPONENTS_SIZES_A2B10G10R10 0x00000009 +#define GM107_TIC2_0_COMPONENTS_SIZES_R16_G16 0x0000000c +#define GM107_TIC2_0_COMPONENTS_SIZES_G8R24 0x0000000d +#define GM107_TIC2_0_COMPONENTS_SIZES_G24R8 0x0000000e +#define GM107_TIC2_0_COMPONENTS_SIZES_R32 0x0000000f +#define GM107_TIC2_0_COMPONENTS_SIZES_A4B4G4R4 0x00000012 +#define GM107_TIC2_0_COMPONENTS_SIZES_A5B5G5R1 0x00000013 +#define GM107_TIC2_0_COMPONENTS_SIZES_A1B5G5R5 0x00000014 +#define GM107_TIC2_0_COMPONENTS_SIZES_B5G6R5 0x00000015 +#define GM107_TIC2_0_COMPONENTS_SIZES_B6G5R5 0x00000016 +#define GM107_TIC2_0_COMPONENTS_SIZES_G8R8 0x00000018 +#define GM107_TIC2_0_COMPONENTS_SIZES_R16 0x0000001b +#define GM107_TIC2_0_COMPONENTS_SIZES_Y8_VIDEO 0x0000001c +#define GM107_TIC2_0_COMPONENTS_SIZES_R8 0x0000001d +#define GM107_TIC2_0_COMPONENTS_SIZES_G4R4 0x0000001e +#define GM107_TIC2_0_COMPONENTS_SIZES_R1 0x0000001f +#define GM107_TIC2_0_COMPONENTS_SIZES_E5B9G9R9_SHAREDEXP 0x00000020 +#define GM107_TIC2_0_COMPONENTS_SIZES_BF10GF11RF11 0x00000021 +#define GM107_TIC2_0_COMPONENTS_SIZES_G8B8G8R8 0x00000022 +#define GM107_TIC2_0_COMPONENTS_SIZES_B8G8R8G8 0x00000023 +#define GM107_TIC2_0_COMPONENTS_SIZES_DXT1 0x00000024 +#define GM107_TIC2_0_COMPONENTS_SIZES_DXT23 0x00000025 +#define GM107_TIC2_0_COMPONENTS_SIZES_DXT45 0x00000026 +#define GM107_TIC2_0_COMPONENTS_SIZES_DXN1 0x00000027 +#define GM107_TIC2_0_COMPONENTS_SIZES_DXN2 0x00000028 +#define GM107_TIC2_0_COMPONENTS_SIZES_BC6H_SF16 0x00000010 +#define GM107_TIC2_0_COMPONENTS_SIZES_BC6H_UF16 0x00000011 +#define GM107_TIC2_0_COMPONENTS_SIZES_BC7U 0x00000017 +#define GM107_TIC2_0_COMPONENTS_SIZES_ETC2_RGB 0x00000006 +#define GM107_TIC2_0_COMPONENTS_SIZES_ETC2_RGB_PTA 0x0000000a +#define GM107_TIC2_0_COMPONENTS_SIZES_ETC2_RGBA 0x0000000b +#define GM107_TIC2_0_COMPONENTS_SIZES_EAC 0x00000019 +#define GM107_TIC2_0_COMPONENTS_SIZES_EACX2 0x0000001a +#define GM107_TIC2_0_COMPONENTS_SIZES_Z24S8 0x00000029 +#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24 0x0000002a +#define GM107_TIC2_0_COMPONENTS_SIZES_S8Z24 0x0000002b +#define GM107_TIC2_0_COMPONENTS_SIZES_X4V4Z24__COV4R4V 0x0000002c +#define GM107_TIC2_0_COMPONENTS_SIZES_X4V4Z24__COV8R8V 0x0000002d +#define GM107_TIC2_0_COMPONENTS_SIZES_V8Z24__COV4R12V 0x0000002e +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32 0x0000002f +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X24S8 0x00000030 +#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24_X20V4S8__COV4R4V 0x00000031 +#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24_X20V4S8__COV8R8V 0x00000032 +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X20V4X8__COV4R4V 0x00000033 +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X20V4X8__COV8R8V 0x00000034 +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X20V4S8__COV4R4V 0x00000035 +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X20V4S8__COV8R8V 0x00000036 +#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24_X16V8S8__COV4R12V 0x00000037 +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X16V8X8__COV4R12V 0x00000038 +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X16V8S8__COV4R12V 0x00000039 +#define GM107_TIC2_0_COMPONENTS_SIZES_Z16 0x0000003a +#define GM107_TIC2_0_COMPONENTS_SIZES_V8Z24__COV8R24V 0x0000003b +#define GM107_TIC2_0_COMPONENTS_SIZES_X8Z24_X16V8S8__COV8R24V 0x0000003c +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X16V8X8__COV8R24V 0x0000003d +#define GM107_TIC2_0_COMPONENTS_SIZES_ZF32_X16V8S8__COV8R24V 0x0000003e +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_4X4 0x00000040 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_5X4 0x00000050 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_5X5 0x00000041 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_6X5 0x00000051 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_6X6 0x00000042 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_8X5 0x00000055 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_8X6 0x00000052 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_8X8 0x00000044 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_10X5 0x00000056 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_10X6 0x00000057 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_10X8 0x00000053 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_10X10 0x00000045 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_12X10 0x00000054 +#define GM107_TIC2_0_COMPONENTS_SIZES_ASTC_2D_12X12 0x00000046 +#define GM107_TIC2_0_R_DATA_TYPE__MASK 0x00000380 +#define GM107_TIC2_0_R_DATA_TYPE__SHIFT 7 +#define GM107_TIC2_0_G_DATA_TYPE__MASK 0x00001c00 +#define GM107_TIC2_0_G_DATA_TYPE__SHIFT 10 +#define GM107_TIC2_0_B_DATA_TYPE__MASK 0x0000e000 +#define GM107_TIC2_0_B_DATA_TYPE__SHIFT 13 +#define GM107_TIC2_0_A_DATA_TYPE__MASK 0x00070000 +#define GM107_TIC2_0_A_DATA_TYPE__SHIFT 16 +#define GM107_TIC2_0_X_SOURCE__MASK 0x00380000 +#define GM107_TIC2_0_X_SOURCE__SHIFT 19 +#define GM107_TIC2_0_Y_SOURCE__MASK 0x01c00000 +#define GM107_TIC2_0_Y_SOURCE__SHIFT 22 +#define GM107_TIC2_0_Z_SOURCE__MASK 0x0e000000 +#define GM107_TIC2_0_Z_SOURCE__SHIFT 25 +#define GM107_TIC2_0_W_SOURCE__MASK 0x70000000 +#define GM107_TIC2_0_W_SOURCE__SHIFT 28 +#define GM107_TIC2_0_PACK_COMPONENTS 0x80000000 + +#define GM107_TIC2_1 0x00000004 +#define GM107_TIC2_1_ADDRESS_BITS_31_TO_0__MASK 0xffffffff +#define GM107_TIC2_1_ADDRESS_BITS_31_TO_0__SHIFT 0 +#define GM107_TIC2_1_ADDRESS_BITS_31_TO_5__MASK 0xffffffe0 +#define GM107_TIC2_1_ADDRESS_BITS_31_TO_5__SHIFT 5 +#define GM107_TIC2_1_ADDRESS_BITS_31_TO_5__SHR 5 +#define GM107_TIC2_1_GOB_DEPTH_OFFSET__MASK 0x00000060 +#define GM107_TIC2_1_GOB_DEPTH_OFFSET__SHIFT 5 +#define GM107_TIC2_1_ADDRESS_BITS_31_TO_9__MASK 0xfffffe00 +#define GM107_TIC2_1_ADDRESS_BITS_31_TO_9__SHIFT 9 +#define GM107_TIC2_1_ADDRESS_BITS_31_TO_9__SHR 9 + +#define GM107_TIC2_2 0x00000008 +#define GM107_TIC2_2_ADDRESS_BITS_47_TO_32__MASK 0x0000ffff +#define GM107_TIC2_2_ADDRESS_BITS_47_TO_32__SHIFT 0 +#define GM107_TIC2_2_HEADER_VERSION__MASK 0x00e00000 +#define GM107_TIC2_2_HEADER_VERSION__SHIFT 21 +#define GM107_TIC2_2_HEADER_VERSION_ONE_D_BUFFER 0x00000000 +#define GM107_TIC2_2_HEADER_VERSION_PITCH_COLORKEY 0x00200000 +#define GM107_TIC2_2_HEADER_VERSION_PITCH 0x00400000 +#define GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR 0x00600000 +#define GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR_COLORKEY 0x00800000 +#define GM107_TIC2_2_RESOURCE_VIEW_COHERENCY_HASH__MASK 0x1e000000 +#define GM107_TIC2_2_RESOURCE_VIEW_COHERENCY_HASH__SHIFT 25 + +#define GM107_TIC2_3 0x0000000c +#define GM107_TIC2_3_WIDTH_MINUS_ONE_BITS_31_TO_16__MASK 0x0000ffff +#define GM107_TIC2_3_WIDTH_MINUS_ONE_BITS_31_TO_16__SHIFT 0 +#define GM107_TIC2_3_PITCH_BITS_20_TO_5__MASK 0x0000ffff +#define GM107_TIC2_3_PITCH_BITS_20_TO_5__SHIFT 0 +#define GM107_TIC2_3_PITCH_BITS_20_TO_5__SHR 5 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH__MASK 0x00000007 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH__SHIFT 0 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH__MIN 0x00000000 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH__MAX 0x00000000 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_ONE 0x00000000 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_TWO 0x00000001 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_FOUR 0x00000002 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_EIGHT 0x00000003 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_SIXTEEN 0x00000004 +#define GM107_TIC2_3_GOBS_PER_BLOCK_WIDTH_THIRTYTWO 0x00000005 +#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT__MASK 0x00000038 +#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT__SHIFT 3 +#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_ONE 0x00000000 +#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_TWO 0x00000008 +#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_FOUR 0x00000010 +#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_EIGHT 0x00000018 +#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_SIXTEEN 0x00000020 +#define GM107_TIC2_3_GOBS_PER_BLOCK_HEIGHT_THIRTYTWO 0x00000028 +#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH__MASK 0x000001c0 +#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH__SHIFT 6 +#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_ONE 0x00000000 +#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_TWO 0x00000040 +#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_FOUR 0x00000080 +#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_EIGHT 0x000000c0 +#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_SIXTEEN 0x00000100 +#define GM107_TIC2_3_GOBS_PER_BLOCK_DEPTH_THIRTYTWO 0x00000140 +#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS__MASK 0x00001c00 +#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS__SHIFT 10 +#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_ONE 0x00000000 +#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_TWO 0x00000400 +#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_FOUR 0x00000800 +#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_EIGHT 0x00000c00 +#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_SIXTEEN 0x00001000 +#define GM107_TIC2_3_TILE_WIDTH_IN_GOBS_THIRTYTWO 0x00001400 +#define GM107_TIC2_3_GOB_3D 0x00002000 +#define GM107_TIC2_3_LOD_ANISO_QUALITY_2 0x00010000 +#define GM107_TIC2_3_LOD_ANISO_QUALITY__MASK 0x00020000 +#define GM107_TIC2_3_LOD_ANISO_QUALITY__SHIFT 17 +#define GM107_TIC2_3_LOD_ANISO_QUALITY_LOW 0x00000000 +#define GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH 0x00020000 +#define GM107_TIC2_3_LOD_ISO_QUALITY__MASK 0x00040000 +#define GM107_TIC2_3_LOD_ISO_QUALITY__SHIFT 18 +#define GM107_TIC2_3_LOD_ISO_QUALITY_LOW 0x00000000 +#define GM107_TIC2_3_LOD_ISO_QUALITY_HIGH 0x00040000 +#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER__MASK 0x00180000 +#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER__SHIFT 19 +#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER_NONE 0x00000000 +#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER_CONST_ONE 0x00080000 +#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER_CONST_TWO 0x00100000 +#define GM107_TIC2_3_ANISO_COARSE_SPREAD_MODIFIER_SQRT 0x00180000 +#define GM107_TIC2_3_ANISO_SPREAD_SCALE__MASK 0x03e00000 +#define GM107_TIC2_3_ANISO_SPREAD_SCALE__SHIFT 21 +#define GM107_TIC2_3_USE_HEADER_OPT_CONTROL 0x04000000 +#define GM107_TIC2_3_DEPTH_TEXTURE 0x08000000 +#define GM107_TIC2_3_MAX_MIP_LEVEL__MASK 0xf0000000 +#define GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT 28 + +#define GM107_TIC2_4 0x00000010 +#define GM107_TIC2_4_WIDTH_MINUS_ONE_BITS_15_TO_0__MASK 0x0000ffff +#define GM107_TIC2_4_WIDTH_MINUS_ONE_BITS_15_TO_0__SHIFT 0 +#define GM107_TIC2_4_WIDTH_MINUS_ONE__MASK 0x0000ffff +#define GM107_TIC2_4_WIDTH_MINUS_ONE__SHIFT 0 +#define GM107_TIC2_4_ANISO_SPREAD_MAX_LOG2__MASK 0x00380000 +#define GM107_TIC2_4_ANISO_SPREAD_MAX_LOG2__SHIFT 19 +#define GM107_TIC2_4_SRGB_CONVERSION 0x00400000 +#define GM107_TIC2_4_TEXTURE_TYPE__MASK 0x07800000 +#define GM107_TIC2_4_TEXTURE_TYPE__SHIFT 23 +#define GM107_TIC2_4_TEXTURE_TYPE_ONE_D 0x00000000 +#define GM107_TIC2_4_TEXTURE_TYPE_TWO_D 0x00800000 +#define GM107_TIC2_4_TEXTURE_TYPE_THREE_D 0x01000000 +#define GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP 0x01800000 +#define GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY 0x02000000 +#define GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY 0x02800000 +#define GM107_TIC2_4_TEXTURE_TYPE_ONE_D_BUFFER 0x03000000 +#define GM107_TIC2_4_TEXTURE_TYPE_TWO_D_NO_MIPMAP 0x03800000 +#define GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY 0x04000000 +#define GM107_TIC2_4_SECTOR_PROMOTION__MASK 0x18000000 +#define GM107_TIC2_4_SECTOR_PROMOTION__SHIFT 27 +#define GM107_TIC2_4_SECTOR_PROMOTION_NO_PROMOTION 0x00000000 +#define GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V 0x08000000 +#define GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_H 0x10000000 +#define GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_4 0x18000000 +#define GM107_TIC2_4_BORDER_SIZE__MASK 0xe0000000 +#define GM107_TIC2_4_BORDER_SIZE__SHIFT 29 +#define GM107_TIC2_4_BORDER_SIZE_ONE 0x00000000 +#define GM107_TIC2_4_BORDER_SIZE_TWO 0x20000000 +#define GM107_TIC2_4_BORDER_SIZE_FOUR 0x40000000 +#define GM107_TIC2_4_BORDER_SIZE_EIGHT 0x60000000 +#define GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR 0xe0000000 + +#define GM107_TIC2_5 0x00000014 +#define GM107_TIC2_5_HEIGHT_MINUS_ONE__MASK 0x0000ffff +#define GM107_TIC2_5_HEIGHT_MINUS_ONE__SHIFT 0 +#define GM107_TIC2_5_DEPTH_MINUS_ONE__MASK 0x3fff0000 +#define GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT 16 +#define GM107_TIC2_5_NORMALIZED_COORDS 0x80000000 + +#define GM107_TIC2_6 0x00000018 +#define GM107_TIC2_6_COLOR_KEY_OP 0x00000001 +#define GM107_TIC2_6_TRILIN_OPT__MASK 0x0000003e +#define GM107_TIC2_6_TRILIN_OPT__SHIFT 1 +#define GM107_TIC2_6_MIP_LOD_BIAS__MASK 0x0007ffc0 +#define GM107_TIC2_6_MIP_LOD_BIAS__SHIFT 6 +#define GM107_TIC2_6_MIP_LOD_BIAS__RADIX 0x00000008 +#define GM107_TIC2_6_ANISO_BIAS__MASK 0x00780000 +#define GM107_TIC2_6_ANISO_BIAS__SHIFT 19 +#define GM107_TIC2_6_ANISO_BIAS__RADIX 0x00000004 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC__MASK 0x01800000 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC__SHIFT 23 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_HALF 0x00000000 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_ONE 0x00800000 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO 0x01000000 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_MAX 0x01800000 +#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC__MASK 0x06000000 +#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC__SHIFT 25 +#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_HALF 0x00000000 +#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE 0x02000000 +#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_TWO 0x04000000 +#define GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_MAX 0x06000000 +#define GM107_TIC2_6_MAX_ANISOTROPY__MASK 0x38000000 +#define GM107_TIC2_6_MAX_ANISOTROPY__SHIFT 27 +#define GM107_TIC2_6_MAX_ANISOTROPY_1_TO_1 0x00000000 +#define GM107_TIC2_6_MAX_ANISOTROPY_2_TO_1 0x08000000 +#define GM107_TIC2_6_MAX_ANISOTROPY_4_TO_1 0x10000000 +#define GM107_TIC2_6_MAX_ANISOTROPY_6_TO_1 0x18000000 +#define GM107_TIC2_6_MAX_ANISOTROPY_8_TO_1 0x20000000 +#define GM107_TIC2_6_MAX_ANISOTROPY_10_TO_1 0x28000000 +#define GM107_TIC2_6_MAX_ANISOTROPY_12_TO_1 0x30000000 +#define GM107_TIC2_6_MAX_ANISOTROPY_16_TO_1 0x38000000 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER__MASK 0xc0000000 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER__SHIFT 30 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_NONE 0x00000000 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_ONE 0x40000000 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO 0x80000000 +#define GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_SQRT 0xc0000000 + +#define GM107_TIC2_7 0x0000001c +#define GM107_TIC2_7_COLOR_KEY_VALUE__MASK 0xffffffff +#define GM107_TIC2_7_COLOR_KEY_VALUE__SHIFT 0 +#define GM107_TIC2_7_RES_VIEW_MIN_MIP_LEVEL__MASK 0x0000000f +#define GM107_TIC2_7_RES_VIEW_MIN_MIP_LEVEL__SHIFT 0 +#define GM107_TIC2_7_RES_VIEW_MAX_MIP_LEVEL__MASK 0x000000f0 +#define GM107_TIC2_7_RES_VIEW_MAX_MIP_LEVEL__SHIFT 4 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT__MASK 0x00000f00 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT__SHIFT 8 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_1X1 0x00000000 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X1 0x00000100 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X2 0x00000200 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X2 0x00000300 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X2_D3D 0x00000400 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X1_D3D 0x00000500 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X4 0x00000600 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X2_VC_4 0x00000800 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_2X2_VC_12 0x00000900 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X2_VC_8 0x00000a00 +#define GM107_TIC2_7_MULTI_SAMPLE_COUNT_4X2_VC_24 0x00000b00 +#define GM107_TIC2_7_MIN_LOD_CLAMP__MASK 0x00fff000 +#define GM107_TIC2_7_MIN_LOD_CLAMP__SHIFT 12 +#define GM107_TIC2_7_MIN_LOD_CLAMP__RADIX 0x00000008 + + +#endif /* GM107_TEXTURE_XML */ diff --git a/src/gallium/drivers/nouveau/nvc0/mme/Makefile b/src/gallium/drivers/nouveau/nvc0/mme/Makefile index 1c0f5835973..52fb0a54812 100644 --- a/src/gallium/drivers/nouveau/nvc0/mme/Makefile +++ b/src/gallium/drivers/nouveau/nvc0/mme/Makefile @@ -1,5 +1,5 @@ ENVYAS?=envyas -TARGETS=com9097.mme.h +TARGETS=com9097.mme.h com90c0.mme.h all: $(TARGETS) diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme new file mode 100644 index 00000000000..a9233ad8015 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme @@ -0,0 +1,24 @@ +/* NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT + * + * arg = num_groups_x + * parm[0] = num_groups_y + * parm[1] = num_groups_z + */ +.section #mme90c0_launch_grid_indirect + parm $r2 maddr 0x108e /* GRIDDIM_YX */ + braz $r1 #fail + parm $r3 + braz annul $r2 #fail + braz annul $r3 #fail + send (extrinsrt $r1 $r2 0x0 0x10 0x10) /* num_groups_y << 16 | num_groups_x */ + send $r3 + maddrsend 0xa7 /* COMPUTE_BEGIN */ + maddrsend 0x282 /* UNKA08 */ + maddr 0xda /* LAUNCH */ + send 0x1000 + maddrsend 0x281 /* COMPUTE_END */ + exit maddr 0xd8 /* UNK360 */ + send 0x1 +fail: + exit + nop diff --git a/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h new file mode 100644 index 00000000000..1dc06e5e690 --- /dev/null +++ b/src/gallium/drivers/nouveau/nvc0/mme/com90c0.mme.h @@ -0,0 +1,19 @@ +uint32_t mme90c0_launch_grid_indirect[] = { + 0x04238251, + 0x00034807, + 0x00000301, +/* 0x000e: fail */ + 0x0002d027, + 0x00029827, + 0x84008842, + 0x00001841, + 0x0029c071, + 0x00a08071, + 0x00368021, + 0x04000041, + 0x00a04071, + 0x003600a1, + 0x00004041, + 0x00000091, + 0x00000011, +}; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c index 71804343138..0f1265f5db5 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c @@ -59,53 +59,63 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen, if (ret) return ret; - BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); + BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->compute->oclass); /* hardware limit */ - BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1); + BEGIN_NVC0(push, NVC0_CP(MP_LIMIT), 1); PUSH_DATA (push, screen->mp_count); - BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1); + BEGIN_NVC0(push, NVC0_CP(CALL_LIMIT_LOG), 1); PUSH_DATA (push, 0xf); - BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1); + BEGIN_NVC0(push, SUBC_CP(0x02a0), 1); PUSH_DATA (push, 0x8000); /* global memory setup */ - BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); + BEGIN_NVC0(push, SUBC_CP(0x02c4), 1); PUSH_DATA (push, 0); - BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100); + BEGIN_NIC0(push, NVC0_CP(GLOBAL_BASE), 0x100); for (i = 0; i <= 0xff; i++) PUSH_DATA (push, (0xc << 28) | (i << 16) | i); - BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1); + BEGIN_NVC0(push, SUBC_CP(0x02c4), 1); PUSH_DATA (push, 1); /* local memory and cstack setup */ - BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVC0_CP(TEMP_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->tls->offset); PUSH_DATA (push, screen->tls->offset); - BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2); + BEGIN_NVC0(push, NVC0_CP(TEMP_SIZE_HIGH), 2); PUSH_DATAh(push, screen->tls->size); PUSH_DATA (push, screen->tls->size); - BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1); + BEGIN_NVC0(push, NVC0_CP(WARP_TEMP_ALLOC), 1); PUSH_DATA (push, 0); - BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1); - PUSH_DATA (push, 1 << 24); + BEGIN_NVC0(push, NVC0_CP(LOCAL_BASE), 1); + PUSH_DATA (push, 0xff << 24); /* shared memory setup */ - BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1); + BEGIN_NVC0(push, NVC0_CP(CACHE_SPLIT), 1); PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1); - BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1); - PUSH_DATA (push, 2 << 24); - BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1); + BEGIN_NVC0(push, NVC0_CP(SHARED_BASE), 1); + PUSH_DATA (push, 0xfe << 24); + BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 1); PUSH_DATA (push, 0); /* code segment setup */ - BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->text->offset); PUSH_DATA (push, screen->text->offset); - /* TODO: textures & samplers */ + /* textures */ + BEGIN_NVC0(push, NVC0_CP(TIC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset); + PUSH_DATA (push, screen->txc->offset); + PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1); + + /* samplers */ + BEGIN_NVC0(push, NVC0_CP(TSC_ADDRESS_HIGH), 3); + PUSH_DATAh(push, screen->txc->offset + 65536); + PUSH_DATA (push, screen->txc->offset + 65536); + PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1); return 0; } @@ -130,7 +140,7 @@ nvc0_compute_validate_program(struct nvc0_context *nvc0) if (likely(prog->code_size)) { if (nvc0_program_upload_code(nvc0, prog)) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; - BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE); return true; } @@ -138,13 +148,149 @@ nvc0_compute_validate_program(struct nvc0_context *nvc0) return false; } +static void +nvc0_compute_validate_samplers(struct nvc0_context *nvc0) +{ + bool need_flush = nvc0_validate_tsc(nvc0, 5); + if (need_flush) { + BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TSC_FLUSH), 1); + PUSH_DATA (nvc0->base.pushbuf, 0); + } +} + +static void +nvc0_compute_validate_textures(struct nvc0_context *nvc0) +{ + bool need_flush = nvc0_validate_tic(nvc0, 5); + if (need_flush) { + BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(TIC_FLUSH), 1); + PUSH_DATA (nvc0->base.pushbuf, 0); + } +} + +static void +nvc0_compute_validate_constbufs(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + const int s = 5; + + while (nvc0->constbuf_dirty[s]) { + int i = ffs(nvc0->constbuf_dirty[s]) - 1; + nvc0->constbuf_dirty[s] &= ~(1 << i); + + if (nvc0->constbuf[s][i].user) { + struct nouveau_bo *bo = nvc0->screen->uniform_bo; + const unsigned base = s << 16; + const unsigned size = nvc0->constbuf[s][0].size; + assert(i == 0); /* we really only want OpenGL uniforms here */ + assert(nvc0->constbuf[s][0].u.data); + + if (nvc0->state.uniform_buffer_bound[s] < size) { + nvc0->state.uniform_buffer_bound[s] = align(size, 0x100); + + BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); + PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]); + PUSH_DATAh(push, bo->offset + base); + PUSH_DATA (push, bo->offset + base); + BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); + PUSH_DATA (push, (0 << 8) | 1); + } + nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base), + base, nvc0->state.uniform_buffer_bound[s], + 0, (size + 3) / 4, + nvc0->constbuf[s][0].u.data); + } else { + struct nv04_resource *res = + nv04_resource(nvc0->constbuf[s][i].u.buf); + if (res) { + BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); + PUSH_DATA (push, nvc0->constbuf[s][i].size); + PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset); + PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset); + BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); + PUSH_DATA (push, (i << 8) | 1); + + BCTX_REFN(nvc0->bufctx_cp, CP_CB(i), res, RD); + + res->cb_bindings[s] |= 1 << i; + } else { + BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); + PUSH_DATA (push, (i << 8) | 0); + } + if (i == 0) + nvc0->state.uniform_buffer_bound[s] = 0; + } + } + + BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); + PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); +} + +static void +nvc0_compute_validate_driverconst(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; + + BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); + PUSH_DATA (push, 1024); + PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (5 << 10)); + PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (5 << 10)); + BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); + PUSH_DATA (push, (15 << 8) | 1); + + nvc0->dirty_3d |= NVC0_NEW_3D_DRIVERCONST; +} + +static void +nvc0_compute_validate_buffers(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + const int s = 5; + int i; + + BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); + PUSH_DATA (push, 1024); + PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10)); + PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10)); + BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS); + PUSH_DATA (push, 512); + + for (i = 0; i < NVC0_MAX_BUFFERS; i++) { + if (nvc0->buffers[s][i].buffer) { + struct nv04_resource *res = + nv04_resource(nvc0->buffers[s][i].buffer); + PUSH_DATA (push, res->address + nvc0->buffers[s][i].buffer_offset); + PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset); + PUSH_DATA (push, nvc0->buffers[s][i].buffer_size); + PUSH_DATA (push, 0); + BCTX_REFN(nvc0->bufctx_cp, CP_BUF, res, RDWR); + } else { + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + PUSH_DATA (push, 0); + } + } +} + static bool nvc0_compute_state_validate(struct nvc0_context *nvc0) { if (!nvc0_compute_validate_program(nvc0)) return false; - - /* TODO: textures, samplers, surfaces, global memory buffers */ + if (nvc0->dirty_cp & NVC0_NEW_CP_CONSTBUF) + nvc0_compute_validate_constbufs(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_DRIVERCONST) + nvc0_compute_validate_driverconst(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_BUFFERS) + nvc0_compute_validate_buffers(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES) + nvc0_compute_validate_textures(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS) + nvc0_compute_validate_samplers(nvc0); + + /* TODO: surfaces, global memory buffers */ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false); @@ -166,32 +312,29 @@ nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input) struct nvc0_program *cp = nvc0->compprog; if (cp->parm_size) { - BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3); + BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3); PUSH_DATA (push, align(cp->parm_size, 0x100)); PUSH_DATAh(push, screen->parm->offset); PUSH_DATA (push, screen->parm->offset); - BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1); + BEGIN_NVC0(push, NVC0_CP(CB_BIND), 1); PUSH_DATA (push, (0 << 8) | 1); /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */ - BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4); + BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + cp->parm_size / 4); PUSH_DATA (push, 0); PUSH_DATAp(push, input, cp->parm_size / 4); - BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB); } } void -nvc0_launch_grid(struct pipe_context *pipe, - const uint *block_layout, const uint *grid_layout, - uint32_t label, - const void *input) +nvc0_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nvc0_program *cp = nvc0->compprog; - unsigned s, i; + unsigned s; int ret; ret = !nvc0_compute_state_validate(nvc0); @@ -200,59 +343,69 @@ nvc0_launch_grid(struct pipe_context *pipe, return; } - nvc0_compute_upload_input(nvc0, input); + nvc0_compute_upload_input(nvc0, info->input); - BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1); - PUSH_DATA (push, nvc0_program_symbol_offset(cp, label)); + BEGIN_NVC0(push, NVC0_CP(CP_START_ID), 1); + PUSH_DATA (push, nvc0_program_symbol_offset(cp, info->pc)); - BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3); + BEGIN_NVC0(push, NVC0_CP(LOCAL_POS_ALLOC), 3); PUSH_DATA (push, align(cp->cp.lmem_size, 0x10)); PUSH_DATA (push, 0); PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */ - BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3); + BEGIN_NVC0(push, NVC0_CP(SHARED_SIZE), 3); PUSH_DATA (push, align(cp->cp.smem_size, 0x100)); - PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]); + PUSH_DATA (push, info->block[0] * info->block[1] * info->block[2]); PUSH_DATA (push, cp->num_barriers); - BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1); + BEGIN_NVC0(push, NVC0_CP(CP_GPR_ALLOC), 1); PUSH_DATA (push, cp->num_gprs); - /* grid/block setup */ - BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2); - PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]); - PUSH_DATA (push, grid_layout[2]); - BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2); - PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]); - PUSH_DATA (push, block_layout[2]); - /* launch preliminary setup */ - BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1); + BEGIN_NVC0(push, NVC0_CP(GRIDID), 1); PUSH_DATA (push, 0x1); - BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1); + BEGIN_NVC0(push, SUBC_CP(0x036c), 1); PUSH_DATA (push, 0); - BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1); + BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8); - /* kernel launching */ - BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1); - PUSH_DATA (push, 0); - BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1); - PUSH_DATA (push, 0); - BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1); - PUSH_DATA (push, 0x1000); - BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1); - PUSH_DATA (push, 0); - BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1); - PUSH_DATA (push, 0x1); + /* block setup */ + BEGIN_NVC0(push, NVC0_CP(BLOCKDIM_YX), 2); + PUSH_DATA (push, (info->block[1] << 16) | info->block[0]); + PUSH_DATA (push, info->block[2]); + + if (unlikely(info->indirect)) { + struct nv04_resource *res = nv04_resource(info->indirect); + uint32_t offset = res->offset + info->indirect_offset; + unsigned macro = NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT; + + nouveau_pushbuf_space(push, 16, 0, 1); + PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain); + PUSH_DATA(push, NVC0_FIFO_PKHDR_1I(1, macro, 3)); + nouveau_pushbuf_data(push, res->bo, offset, + NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4); + } else { + /* grid setup */ + BEGIN_NVC0(push, NVC0_CP(GRIDDIM_YX), 2); + PUSH_DATA (push, (info->grid[1] << 16) | info->grid[0]); + PUSH_DATA (push, info->grid[2]); + + /* kernel launching */ + BEGIN_NVC0(push, NVC0_CP(COMPUTE_BEGIN), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_CP(0x0a08), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, NVC0_CP(LAUNCH), 1); + PUSH_DATA (push, 0x1000); + BEGIN_NVC0(push, NVC0_CP(COMPUTE_END), 1); + PUSH_DATA (push, 0); + BEGIN_NVC0(push, SUBC_CP(0x0360), 1); + PUSH_DATA (push, 0x1); + } - /* rebind all the 3D constant buffers - * (looks like binding a CB on COMPUTE clobbers 3D state) */ - nvc0->dirty |= NVC0_NEW_CONSTBUF; + /* Invalidate all 3D constbufs because they are aliased with COMPUTE. */ + nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF; for (s = 0; s < 5; s++) { - for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++) - if (nvc0->constbuf[s][i].u.buf) - nvc0->constbuf_dirty[s] |= 1 << i; + nvc0->constbuf_dirty[s] |= nvc0->constbuf_valid[s]; + nvc0->state.uniform_buffer_bound[s] = 0; } - memset(nvc0->state.uniform_buffer_bound, 0, - sizeof(nvc0->state.uniform_buffer_bound)); } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h index 168a6d1bee2..a23f7f39dda 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h @@ -1,7 +1,6 @@ #ifndef NVC0_COMPUTE_H #define NVC0_COMPUTE_H -#include "nv50/nv50_defs.xml.h" #include "nvc0/nvc0_compute.xml.h" bool diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c index 547b8f5d309..007cccfd10b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c @@ -194,8 +194,8 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) { if (nvc0->framebuffer.cbufs[i] && nvc0->framebuffer.cbufs[i]->texture == res) { - nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); + nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB); if (!--ref) return ref; } @@ -204,8 +204,8 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, if (res->bind & PIPE_BIND_DEPTH_STENCIL) { if (nvc0->framebuffer.zsbuf && nvc0->framebuffer.zsbuf->texture == res) { - nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); + nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB); if (!--ref) return ref; } @@ -214,16 +214,16 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, if (res->target == PIPE_BUFFER) { for (i = 0; i < nvc0->num_vtxbufs; ++i) { if (nvc0->vtxbuf[i].buffer == res) { - nvc0->dirty |= NVC0_NEW_ARRAYS; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX); + nvc0->dirty_3d |= NVC0_NEW_3D_ARRAYS; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX); if (!--ref) return ref; } } if (nvc0->idxbuf.buffer == res) { - nvc0->dirty |= NVC0_NEW_IDXBUF; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_IDX); + nvc0->dirty_3d |= NVC0_NEW_3D_IDXBUF; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_IDX); if (!--ref) return ref; } @@ -233,35 +233,45 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx, if (nvc0->textures[s][i] && nvc0->textures[s][i]->texture == res) { nvc0->textures_dirty[s] |= 1 << i; - nvc0->dirty |= NVC0_NEW_TEXTURES; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i)); + nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i)); if (!--ref) return ref; } } } - for (s = 0; s < 5; ++s) { + for (s = 0; s < 6; ++s) { for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; ++i) { if (!(nvc0->constbuf_valid[s] & (1 << i))) continue; if (!nvc0->constbuf[s][i].user && nvc0->constbuf[s][i].u.buf == res) { - nvc0->dirty |= NVC0_NEW_CONSTBUF; nvc0->constbuf_dirty[s] |= 1 << i; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i)); + if (unlikely(s == 5)) { + nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF; + nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_CB(i)); + } else { + nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_CB(s, i)); + } if (!--ref) return ref; } } } - for (s = 0; s < 5; ++s) { + for (s = 0; s < 6; ++s) { for (i = 0; i < NVC0_MAX_BUFFERS; ++i) { if (nvc0->buffers[s][i].buffer == res) { nvc0->buffers_dirty[s] |= 1 << i; - nvc0->dirty |= NVC0_NEW_BUFFERS; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF); + if (unlikely(s == 5)) { + nvc0->dirty_cp |= NVC0_NEW_CP_BUFFERS; + nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_BUF); + } else { + nvc0->dirty_3d |= NVC0_NEW_3D_BUFFERS; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_BUF); + } if (!--ref) return ref; } @@ -342,7 +352,12 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) if (!nvc0->tcp_empty) goto out_err; /* set the empty tctl prog on next draw in case one is never set */ - nvc0->dirty |= NVC0_NEW_TCTLPROG; + nvc0->dirty_3d |= NVC0_NEW_3D_TCTLPROG; + + /* Do not bind the COMPUTE driver constbuf at screen initialization because + * CBs are aliased between 3D and COMPUTE, but make sure it will be bound if + * a grid is launched later. */ + nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST; /* now that there are no more opportunities for errors, set the current * context if there isn't already one. @@ -358,11 +373,12 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) flags = NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RD; - BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text); - BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo); - BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->txc); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->text); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->uniform_bo); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->txc); if (screen->compute) { BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->text); + BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->uniform_bo); BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->txc); BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->parm); } @@ -370,13 +386,13 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags) flags = NV_VRAM_DOMAIN(&screen->base) | NOUVEAU_BO_RDWR; if (screen->poly_cache) - BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->poly_cache); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->poly_cache); if (screen->compute) BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->tls); flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR; - BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->fence.bo); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_SCREEN, flags, screen->fence.bo); BCTX_REFN_bo(nvc0->bufctx, FENCE, flags, screen->fence.bo); if (screen->compute) BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->fence.bo); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 4a6ea867e85..d3e3a818910 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -29,34 +29,35 @@ #include "nvc0/nve4_p2mf.xml.h" #include "nvc0/nvc0_macros.h" -/* NOTE: must keep NVC0_NEW_...PROG in consecutive bits in this order */ -#define NVC0_NEW_BLEND (1 << 0) -#define NVC0_NEW_RASTERIZER (1 << 1) -#define NVC0_NEW_ZSA (1 << 2) -#define NVC0_NEW_VERTPROG (1 << 3) -#define NVC0_NEW_TCTLPROG (1 << 4) -#define NVC0_NEW_TEVLPROG (1 << 5) -#define NVC0_NEW_GMTYPROG (1 << 6) -#define NVC0_NEW_FRAGPROG (1 << 7) -#define NVC0_NEW_BLEND_COLOUR (1 << 8) -#define NVC0_NEW_STENCIL_REF (1 << 9) -#define NVC0_NEW_CLIP (1 << 10) -#define NVC0_NEW_SAMPLE_MASK (1 << 11) -#define NVC0_NEW_FRAMEBUFFER (1 << 12) -#define NVC0_NEW_STIPPLE (1 << 13) -#define NVC0_NEW_SCISSOR (1 << 14) -#define NVC0_NEW_VIEWPORT (1 << 15) -#define NVC0_NEW_ARRAYS (1 << 16) -#define NVC0_NEW_VERTEX (1 << 17) -#define NVC0_NEW_CONSTBUF (1 << 18) -#define NVC0_NEW_TEXTURES (1 << 19) -#define NVC0_NEW_SAMPLERS (1 << 20) -#define NVC0_NEW_TFB_TARGETS (1 << 21) -#define NVC0_NEW_IDXBUF (1 << 22) -#define NVC0_NEW_SURFACES (1 << 23) -#define NVC0_NEW_MIN_SAMPLES (1 << 24) -#define NVC0_NEW_TESSFACTOR (1 << 25) -#define NVC0_NEW_BUFFERS (1 << 26) +/* NOTE: must keep NVC0_NEW_3D_...PROG in consecutive bits in this order */ +#define NVC0_NEW_3D_BLEND (1 << 0) +#define NVC0_NEW_3D_RASTERIZER (1 << 1) +#define NVC0_NEW_3D_ZSA (1 << 2) +#define NVC0_NEW_3D_VERTPROG (1 << 3) +#define NVC0_NEW_3D_TCTLPROG (1 << 4) +#define NVC0_NEW_3D_TEVLPROG (1 << 5) +#define NVC0_NEW_3D_GMTYPROG (1 << 6) +#define NVC0_NEW_3D_FRAGPROG (1 << 7) +#define NVC0_NEW_3D_BLEND_COLOUR (1 << 8) +#define NVC0_NEW_3D_STENCIL_REF (1 << 9) +#define NVC0_NEW_3D_CLIP (1 << 10) +#define NVC0_NEW_3D_SAMPLE_MASK (1 << 11) +#define NVC0_NEW_3D_FRAMEBUFFER (1 << 12) +#define NVC0_NEW_3D_STIPPLE (1 << 13) +#define NVC0_NEW_3D_SCISSOR (1 << 14) +#define NVC0_NEW_3D_VIEWPORT (1 << 15) +#define NVC0_NEW_3D_ARRAYS (1 << 16) +#define NVC0_NEW_3D_VERTEX (1 << 17) +#define NVC0_NEW_3D_CONSTBUF (1 << 18) +#define NVC0_NEW_3D_TEXTURES (1 << 19) +#define NVC0_NEW_3D_SAMPLERS (1 << 20) +#define NVC0_NEW_3D_TFB_TARGETS (1 << 21) +#define NVC0_NEW_3D_IDXBUF (1 << 22) +#define NVC0_NEW_3D_SURFACES (1 << 23) +#define NVC0_NEW_3D_MIN_SAMPLES (1 << 24) +#define NVC0_NEW_3D_TESSFACTOR (1 << 25) +#define NVC0_NEW_3D_BUFFERS (1 << 26) +#define NVC0_NEW_3D_DRIVERCONST (1 << 27) #define NVC0_NEW_CP_PROGRAM (1 << 0) #define NVC0_NEW_CP_SURFACES (1 << 1) @@ -64,20 +65,22 @@ #define NVC0_NEW_CP_SAMPLERS (1 << 3) #define NVC0_NEW_CP_CONSTBUF (1 << 4) #define NVC0_NEW_CP_GLOBALS (1 << 5) +#define NVC0_NEW_CP_DRIVERCONST (1 << 6) +#define NVC0_NEW_CP_BUFFERS (1 << 7) /* 3d bufctx (during draw_vbo, blit_3d) */ -#define NVC0_BIND_FB 0 -#define NVC0_BIND_VTX 1 -#define NVC0_BIND_VTX_TMP 2 -#define NVC0_BIND_IDX 3 -#define NVC0_BIND_TEX(s, i) ( 4 + 32 * (s) + (i)) -#define NVC0_BIND_CB(s, i) (164 + 16 * (s) + (i)) -#define NVC0_BIND_TFB 244 -#define NVC0_BIND_SUF 245 -#define NVC0_BIND_BUF 246 -#define NVC0_BIND_SCREEN 247 -#define NVC0_BIND_TLS 249 -#define NVC0_BIND_3D_COUNT 250 +#define NVC0_BIND_3D_FB 0 +#define NVC0_BIND_3D_VTX 1 +#define NVC0_BIND_3D_VTX_TMP 2 +#define NVC0_BIND_3D_IDX 3 +#define NVC0_BIND_3D_TEX(s, i) ( 4 + 32 * (s) + (i)) +#define NVC0_BIND_3D_CB(s, i) (164 + 16 * (s) + (i)) +#define NVC0_BIND_3D_TFB 244 +#define NVC0_BIND_3D_SUF 245 +#define NVC0_BIND_3D_BUF 246 +#define NVC0_BIND_3D_SCREEN 247 +#define NVC0_BIND_3D_TLS 249 +#define NVC0_BIND_3D_COUNT 250 /* compute bufctx (during launch_grid) */ #define NVC0_BIND_CP_CB(i) ( 0 + (i)) @@ -87,7 +90,8 @@ #define NVC0_BIND_CP_DESC 50 #define NVC0_BIND_CP_SCREEN 51 #define NVC0_BIND_CP_QUERY 52 -#define NVC0_BIND_CP_COUNT 53 +#define NVC0_BIND_CP_BUF 53 +#define NVC0_BIND_CP_COUNT 54 /* bufctx for other operations */ #define NVC0_BIND_2D 0 @@ -114,7 +118,7 @@ struct nvc0_context { const struct nv50_m2mf_rect *src, uint32_t nblocksx, uint32_t nblocksy); - uint32_t dirty; + uint32_t dirty_3d; /* dirty flags for 3d state */ uint32_t dirty_cp; /* dirty flags for compute state */ struct nvc0_graph_state state; @@ -157,6 +161,7 @@ struct nvc0_context { struct nv50_tsc_entry *samplers[6][PIPE_MAX_SAMPLERS]; unsigned num_samplers[6]; uint16_t samplers_dirty[6]; + bool seamless_cube_map; uint32_t tex_handles[6][PIPE_MAX_SAMPLERS]; /* for nve4 */ @@ -267,6 +272,8 @@ extern void nvc0_clear(struct pipe_context *, unsigned buffers, extern void nvc0_init_surface_functions(struct nvc0_context *); /* nvc0_tex.c */ +bool nvc0_validate_tic(struct nvc0_context *nvc0, int s); +bool nvc0_validate_tsc(struct nvc0_context *nvc0, int s); bool nve4_validate_tsc(struct nvc0_context *nvc0, int s); void nvc0_validate_textures(struct nvc0_context *); void nvc0_validate_samplers(struct nvc0_context *); @@ -331,11 +338,9 @@ nvc0_video_buffer_create(struct pipe_context *pipe, void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *); /* nve4_compute.c */ -void nve4_launch_grid(struct pipe_context *, - const uint *, const uint *, uint32_t, const void *); +void nve4_launch_grid(struct pipe_context *, const struct pipe_grid_info *); /* nvc0_compute.c */ -void nvc0_launch_grid(struct pipe_context *, - const uint *, const uint *, uint32_t, const void *); +void nvc0_launch_grid(struct pipe_context *, const struct pipe_grid_info *); #endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h index 49e176cbd49..eeacc714f3e 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_macros.h @@ -35,4 +35,6 @@ #define NVC0_3D_MACRO_QUERY_BUFFER_WRITE 0x00003858 +#define NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT 0x00003860 + #endif /* __NVC0_MACROS_H__ */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 93f211bd5fc..bc884d6c08f 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -544,6 +544,9 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, info->io.texBindBase = NVE4_CP_INPUT_TEX(0); info->io.suInfoBase = NVE4_CP_INPUT_SUF(0); info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0); + } else { + info->io.resInfoCBSlot = 15; + info->io.suInfoBase = 512; } info->io.msInfoCBSlot = 0; info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c index 721857edecc..f5f9bb39fd9 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c @@ -846,15 +846,15 @@ nve4_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) /* configure and reset the counter(s) */ if (d == 0) - BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1); + BEGIN_NVC0(push, NVE4_CP(MP_PM_A_SIGSEL(c & 3)), 1); else - BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1); + BEGIN_NVC0(push, NVE4_CP(MP_PM_B_SIGSEL(c & 3)), 1); PUSH_DATA (push, cfg->ctr[i].sig_sel); - BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1); + BEGIN_NVC0(push, NVE4_CP(MP_PM_SRCSEL(c)), 1); PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3)); - BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1); + BEGIN_NVC0(push, NVE4_CP(MP_PM_FUNC(c)), 1); PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode); - BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1); + BEGIN_NVC0(push, NVE4_CP(MP_PM_SET(c)), 1); PUSH_DATA (push, 0); } return true; @@ -917,13 +917,13 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) mask_sel &= cfg->ctr[i].src_mask; /* configure and reset the counter(s) */ - BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SIGSEL(c)), 1); + BEGIN_NVC0(push, NVC0_CP(MP_PM_SIGSEL(c)), 1); PUSH_DATA (push, cfg->ctr[i].sig_sel); - BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SRCSEL(c)), 1); + BEGIN_NVC0(push, NVC0_CP(MP_PM_SRCSEL(c)), 1); PUSH_DATA (push, cfg->ctr[i].src_sel | mask_sel); - BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 1); + BEGIN_NVC0(push, NVC0_CP(MP_PM_OP(c)), 1); PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode); - BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SET(c)), 1); + BEGIN_NVC0(push, NVC0_CP(MP_PM_SET(c)), 1); PUSH_DATA (push, 0); } return true; @@ -937,11 +937,12 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) struct nouveau_pushbuf *push = nvc0->base.pushbuf; const bool is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS; struct nvc0_hw_sm_query *hsq = nvc0_hw_sm_query(hq); + struct pipe_grid_info info = {}; uint32_t mask; uint32_t input[3]; const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 }; const uint grid[3] = { screen->mp_count, screen->gpc_count, 1 }; - unsigned c; + unsigned c, i; if (unlikely(!screen->pm.prog)) { struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program); @@ -965,9 +966,9 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) for (c = 0; c < 8; ++c) if (screen->pm.mp_counter[c]) { if (is_nve4) { - IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0); + IMMED_NVC0(push, NVE4_CP(MP_PM_FUNC(c)), 0); } else { - IMMED_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 0); + IMMED_NVC0(push, NVC0_CP(MP_PM_OP(c)), 0); } } /* release counters for this query */ @@ -983,13 +984,20 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) hq->bo); PUSH_SPACE(push, 1); - IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0); + IMMED_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 0); pipe->bind_compute_state(pipe, screen->pm.prog); input[0] = (hq->bo->offset + hq->base_offset); input[1] = (hq->bo->offset + hq->base_offset) >> 32; input[2] = hq->sequence; - pipe->launch_grid(pipe, block, grid, 0, input); + + for (i = 0; i < 3; i++) { + info.block[i] = block[i]; + info.grid[i] = grid[i]; + } + info.pc = 0; + info.input = input; + pipe->launch_grid(pipe, &info); nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY); @@ -1010,9 +1018,9 @@ nvc0_hw_sm_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq) break; mask |= 1 << hsq->ctr[i]; if (is_nve4) { - BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(hsq->ctr[i])), 1); + BEGIN_NVC0(push, NVE4_CP(MP_PM_FUNC(hsq->ctr[i])), 1); } else { - BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(hsq->ctr[i])), 1); + BEGIN_NVC0(push, NVC0_CP(MP_PM_OP(hsq->ctr[i])), 1); } PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode); } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index d368fda707d..998e9ea47ef 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -36,6 +36,7 @@ #include "nvc0/nvc0_screen.h" #include "nvc0/mme/com9097.mme.h" +#include "nvc0/mme/com90c0.mme.h" static boolean nvc0_screen_is_format_supported(struct pipe_screen *pscreen, @@ -61,7 +62,8 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen, PIPE_BIND_TRANSFER_WRITE | PIPE_BIND_SHARED); - return (nvc0_format_table[format].usage & bindings) == bindings; + return (( nvc0_format_table[format].usage | + nvc0_vertex_format[format].usage) & bindings) == bindings; } static int @@ -196,6 +198,8 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: return (class_3d >= NVE4_3D_CLASS) ? 1 : 0; case PIPE_CAP_COMPUTE: + if (debug_get_bool_option("NVF0_COMPUTE", false)) + return 1; return (class_3d <= NVE4_3D_CLASS) ? 1 : 0; case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER: return nouveau_screen(pscreen)->vram_domain & NOUVEAU_BO_VRAM ? 1 : 0; @@ -262,8 +266,9 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 0; break; case PIPE_SHADER_COMPUTE: - if (class_3d > NVE4_3D_CLASS) - return 0; + if (!debug_get_bool_option("NVF0_COMPUTE", false)) + if (class_3d > NVE4_3D_CLASS) + return 0; break; default: return 0; @@ -272,6 +277,10 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, switch (param) { case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + if (class_3d >= NVE4_3D_CLASS) + return 0; + return 1 << PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_MAX_INSTRUCTIONS: case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS: case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS: @@ -336,6 +345,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 16; /* XXX not sure if more are really safe */ case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: + return 0; default: NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param); return 0; @@ -598,6 +609,9 @@ nvc0_screen_init_compute(struct nvc0_screen *screen) case 0xf0: case 0x100: case 0x110: + if (debug_get_bool_option("NVF0_COMPUTE", false)) + return nve4_screen_compute_setup(screen, screen->base.pushbuf); + case 0x120: return 0; default: return -1; @@ -660,6 +674,7 @@ nvc0_screen_create(struct nouveau_device *dev) case 0xf0: case 0x100: case 0x110: + case 0x120: break; default: return NULL; @@ -728,6 +743,7 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATA (push, screen->nvsw->handle); switch (dev->chipset & ~0xf) { + case 0x120: case 0x110: case 0x100: case 0xf0: @@ -779,6 +795,9 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATA (push, screen->fence.bo->offset + 16); switch (dev->chipset & ~0xf) { + case 0x120: + obj_class = GM200_3D_CLASS; + break; case 0x110: obj_class = GM107_3D_CLASS; break; @@ -860,8 +879,7 @@ nvc0_screen_create(struct nouveau_device *dev) BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1); PUSH_DATA (push, NVC0_3D_SHADE_MODEL_SMOOTH); if (screen->eng3d->oclass < NVE4_3D_CLASS) { - BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1); - PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP); + IMMED_NVC0(push, NVC0_3D(TEX_MISC), 0); } else { BEGIN_NVC0(push, NVE4_3D(TEX_CB_INDEX), 1); PUSH_DATA (push, 15); @@ -887,7 +905,7 @@ nvc0_screen_create(struct nouveau_device *dev) */ nouveau_heap_init(&screen->text_heap, 0, (1 << 20) - 0x100); - ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 6 << 16, NULL, + ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 1 << 12, 7 << 16, NULL, &screen->uniform_bo); if (ret) goto fail; @@ -899,8 +917,8 @@ nvc0_screen_create(struct nouveau_device *dev) /* auxiliary constants (6 user clip planes, base instance id) */ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 1024); - PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 10)); - PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 10)); + PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (i << 10)); + PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (i << 10)); BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1); PUSH_DATA (push, (15 << 4) | 1); if (screen->eng3d->oclass >= NVE4_3D_CLASS) { @@ -920,8 +938,8 @@ nvc0_screen_create(struct nouveau_device *dev) /* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 256); - PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10)); - PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10)); + PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (6 << 10)); + PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (6 << 10)); BEGIN_1IC0(push, NVC0_3D(CB_POS), 5); PUSH_DATA (push, 0); PUSH_DATAf(push, 0.0f); @@ -929,8 +947,8 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATAf(push, 0.0f); PUSH_DATAf(push, 0.0f); BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2); - PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 10)); - PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 10)); + PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (6 << 10)); + PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (6 << 10)); if (screen->base.drm->version >= 0x01000101) { ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value); @@ -988,6 +1006,14 @@ nvc0_screen_create(struct nouveau_device *dev) PUSH_DATAh(push, screen->txc->offset); PUSH_DATA (push, screen->txc->offset); PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1); + if (screen->eng3d->oclass >= GM107_3D_CLASS) { + screen->tic.maxwell = true; + if (screen->eng3d->oclass == GM107_3D_CLASS) { + screen->tic.maxwell = + debug_get_bool_option("NOUVEAU_MAXWELL_TIC", true); + IMMED_NVC0(push, SUBC_3D(0x0f10), screen->tic.maxwell); + } + } BEGIN_NVC0(push, NVC0_3D(TSC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset + 65536); @@ -1051,6 +1077,7 @@ nvc0_screen_create(struct nouveau_device *dev) MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count); MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count); MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write); + MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect); BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1); PUSH_DATA (push, 1); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h index 1a56177815c..8487abcf999 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h @@ -51,8 +51,9 @@ struct nvc0_graph_state { uint8_t c14_bound; /* whether immediate array constbuf is bound */ uint8_t clip_enable; uint32_t clip_mode; - uint32_t uniform_buffer_bound[5]; + uint32_t uniform_buffer_bound[6]; struct nvc0_transform_feedback_state *tfb; + bool seamless_cube_map; }; struct nvc0_screen { @@ -83,6 +84,7 @@ struct nvc0_screen { void **entries; int next; uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32]; + bool maxwell; } tic; struct { @@ -164,12 +166,27 @@ nvc0_resource_validate(struct nv04_resource *res, uint32_t flags) struct nvc0_format { uint32_t rt; - uint32_t tic; + struct { + unsigned format:7; + unsigned type_r:3; + unsigned type_g:3; + unsigned type_b:3; + unsigned type_a:3; + unsigned src_x:3; + unsigned src_y:3; + unsigned src_z:3; + unsigned src_w:3; + } tic; + uint32_t usage; +}; + +struct nvc0_vertex_format { uint32_t vtx; uint32_t usage; }; extern const struct nvc0_format nvc0_format_table[]; +extern const struct nvc0_vertex_format nvc0_vertex_format[]; static inline void nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nv50_tic_entry *tic) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index 382a18ef153..2f46c436a4c 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -37,11 +37,11 @@ nvc0_program_update_context_state(struct nvc0_context *nvc0, if (prog && prog->need_tls) { const uint32_t flags = NV_VRAM_DOMAIN(&nvc0->screen->base) | NOUVEAU_BO_RDWR; if (!nvc0->state.tls_required) - BCTX_REFN_bo(nvc0->bufctx_3d, TLS, flags, nvc0->screen->tls); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TLS, flags, nvc0->screen->tls); nvc0->state.tls_required |= 1 << stage; } else { if (nvc0->state.tls_required == (1 << stage)) - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TLS); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TLS); nvc0->state.tls_required &= ~(1 << stage); } @@ -152,7 +152,7 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0) NVC0_3D_SHADE_MODEL_SMOOTH); } - if (fp->mem && !(nvc0->dirty & NVC0_NEW_FRAGPROG)) { + if (fp->mem && !(nvc0->dirty_3d & NVC0_NEW_3D_FRAGPROG)) { return; } @@ -292,9 +292,9 @@ nvc0_tfb_validate(struct nvc0_context *nvc0) } nvc0->state.tfb = tfb; - if (!(nvc0->dirty & NVC0_NEW_TFB_TARGETS)) + if (!(nvc0->dirty_3d & NVC0_NEW_3D_TFB_TARGETS)) return; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TFB); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TFB); for (b = 0; b < nvc0->num_tfbbufs; ++b) { struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]); @@ -310,7 +310,7 @@ nvc0_tfb_validate(struct nvc0_context *nvc0) buf = nv04_resource(targ->pipe.buffer); - BCTX_REFN(nvc0->bufctx_3d, TFB, buf, WR); + BCTX_REFN(nvc0->bufctx_3d, 3D_TFB, buf, WR); if (!(nvc0->tfbbuf_dirty & (1 << b))) continue; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index cf3d3497c78..7ccce9ff6bf 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -21,6 +21,7 @@ */ #include "pipe/p_defines.h" +#include "util/u_framebuffer.h" #include "util/u_helpers.h" #include "util/u_inlines.h" #include "util/u_transfer.h" @@ -32,7 +33,6 @@ #include "nvc0/nvc0_query_hw.h" #include "nvc0/nvc0_3d.xml.h" -#include "nv50/nv50_texture.xml.h" #include "nouveau_gldefs.h" @@ -186,7 +186,7 @@ nvc0_blend_state_bind(struct pipe_context *pipe, void *hwcso) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->blend = hwcso; - nvc0->dirty |= NVC0_NEW_BLEND; + nvc0->dirty_3d |= NVC0_NEW_3D_BLEND; } static void @@ -315,7 +315,7 @@ nvc0_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->rast = hwcso; - nvc0->dirty |= NVC0_NEW_RASTERIZER; + nvc0->dirty_3d |= NVC0_NEW_3D_RASTERIZER; } static void @@ -393,7 +393,7 @@ nvc0_zsa_state_bind(struct pipe_context *pipe, void *hwcso) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->zsa = hwcso; - nvc0->dirty |= NVC0_NEW_ZSA; + nvc0->dirty_3d |= NVC0_NEW_3D_ZSA; } static void @@ -449,7 +449,7 @@ nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s, nvc0->num_samplers[s] = nr; - nvc0->dirty |= NVC0_NEW_SAMPLERS; + nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS; } static void @@ -566,7 +566,7 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s, } if (old) { - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i)); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i)); nvc0_screen_tic_unlock(nvc0->screen, old); } @@ -576,7 +576,7 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s, for (i = nr; i < nvc0->num_textures[s]; ++i) { struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]); if (old) { - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i)); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i)); nvc0_screen_tic_unlock(nvc0->screen, old); pipe_sampler_view_reference(&nvc0->textures[s][i], NULL); } @@ -584,7 +584,7 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s, nvc0->num_textures[s] = nr; - nvc0->dirty |= NVC0_NEW_TEXTURES; + nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES; } static void @@ -594,7 +594,7 @@ nvc0_stage_set_sampler_views_range(struct nvc0_context *nvc0, const unsigned s, { struct nouveau_bufctx *bctx = (s == 5) ? nvc0->bufctx_cp : nvc0->bufctx_3d; const unsigned end = start + nr; - const unsigned bin = (s == 5) ? NVC0_BIND_CP_TEX(0) : NVC0_BIND_TEX(s, 0); + const unsigned bin = (s == 5) ? NVC0_BIND_CP_TEX(0) : NVC0_BIND_3D_TEX(s, 0); int last_valid = -1; unsigned i; @@ -733,7 +733,7 @@ nvc0_vp_state_bind(struct pipe_context *pipe, void *hwcso) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->vertprog = hwcso; - nvc0->dirty |= NVC0_NEW_VERTPROG; + nvc0->dirty_3d |= NVC0_NEW_3D_VERTPROG; } static void * @@ -749,7 +749,7 @@ nvc0_fp_state_bind(struct pipe_context *pipe, void *hwcso) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->fragprog = hwcso; - nvc0->dirty |= NVC0_NEW_FRAGPROG; + nvc0->dirty_3d |= NVC0_NEW_3D_FRAGPROG; } static void * @@ -765,7 +765,7 @@ nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->gmtyprog = hwcso; - nvc0->dirty |= NVC0_NEW_GMTYPROG; + nvc0->dirty_3d |= NVC0_NEW_3D_GMTYPROG; } static void * @@ -781,7 +781,7 @@ nvc0_tcp_state_bind(struct pipe_context *pipe, void *hwcso) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->tctlprog = hwcso; - nvc0->dirty |= NVC0_NEW_TCTLPROG; + nvc0->dirty_3d |= NVC0_NEW_3D_TCTLPROG; } static void * @@ -797,7 +797,7 @@ nvc0_tep_state_bind(struct pipe_context *pipe, void *hwcso) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->tevlprog = hwcso; - nvc0->dirty |= NVC0_NEW_TEVLPROG; + nvc0->dirty_3d |= NVC0_NEW_3D_TEVLPROG; } static void * @@ -839,7 +839,9 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, const unsigned i = index; if (unlikely(shader == PIPE_SHADER_COMPUTE)) { - assert(!cb || !cb->user_buffer); + if (nvc0->constbuf[s][i].user) + nvc0->constbuf[s][i].u.buf = NULL; + else if (nvc0->constbuf[s][i].u.buf) nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_CB(i)); @@ -849,9 +851,9 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, nvc0->constbuf[s][i].u.buf = NULL; else if (nvc0->constbuf[s][i].u.buf) - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i)); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_CB(s, i)); - nvc0->dirty |= NVC0_NEW_CONSTBUF; + nvc0->dirty_3d |= NVC0_NEW_3D_CONSTBUF; } nvc0->constbuf_dirty[s] |= 1 << i; @@ -891,7 +893,7 @@ nvc0_set_blend_color(struct pipe_context *pipe, struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->blend_colour = *bcol; - nvc0->dirty |= NVC0_NEW_BLEND_COLOUR; + nvc0->dirty_3d |= NVC0_NEW_3D_BLEND_COLOUR; } static void @@ -901,7 +903,7 @@ nvc0_set_stencil_ref(struct pipe_context *pipe, struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->stencil_ref = *sr; - nvc0->dirty |= NVC0_NEW_STENCIL_REF; + nvc0->dirty_3d |= NVC0_NEW_3D_STENCIL_REF; } static void @@ -912,7 +914,7 @@ nvc0_set_clip_state(struct pipe_context *pipe, memcpy(nvc0->clip.ucp, clip->ucp, sizeof(clip->ucp)); - nvc0->dirty |= NVC0_NEW_CLIP; + nvc0->dirty_3d |= NVC0_NEW_3D_CLIP; } static void @@ -921,7 +923,7 @@ nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->sample_mask = sample_mask; - nvc0->dirty |= NVC0_NEW_SAMPLE_MASK; + nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLE_MASK; } static void @@ -931,7 +933,7 @@ nvc0_set_min_samples(struct pipe_context *pipe, unsigned min_samples) if (nvc0->min_samples != min_samples) { nvc0->min_samples = min_samples; - nvc0->dirty |= NVC0_NEW_MIN_SAMPLES; + nvc0->dirty_3d |= NVC0_NEW_3D_MIN_SAMPLES; } } @@ -940,23 +942,12 @@ nvc0_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *fb) { struct nvc0_context *nvc0 = nvc0_context(pipe); - unsigned i; - - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); - for (i = 0; i < fb->nr_cbufs; ++i) - pipe_surface_reference(&nvc0->framebuffer.cbufs[i], fb->cbufs[i]); - for (; i < nvc0->framebuffer.nr_cbufs; ++i) - pipe_surface_reference(&nvc0->framebuffer.cbufs[i], NULL); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB); - nvc0->framebuffer.nr_cbufs = fb->nr_cbufs; + util_copy_framebuffer_state(&nvc0->framebuffer, fb); - nvc0->framebuffer.width = fb->width; - nvc0->framebuffer.height = fb->height; - - pipe_surface_reference(&nvc0->framebuffer.zsbuf, fb->zsbuf); - - nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; + nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER; } static void @@ -966,7 +957,7 @@ nvc0_set_polygon_stipple(struct pipe_context *pipe, struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->stipple = *stipple; - nvc0->dirty |= NVC0_NEW_STIPPLE; + nvc0->dirty_3d |= NVC0_NEW_3D_STIPPLE; } static void @@ -984,7 +975,7 @@ nvc0_set_scissor_states(struct pipe_context *pipe, continue; nvc0->scissors[start_slot + i] = scissor[i]; nvc0->scissors_dirty |= 1 << (start_slot + i); - nvc0->dirty |= NVC0_NEW_SCISSOR; + nvc0->dirty_3d |= NVC0_NEW_3D_SCISSOR; } } @@ -1003,7 +994,7 @@ nvc0_set_viewport_states(struct pipe_context *pipe, continue; nvc0->viewports[start_slot + i] = vpt[i]; nvc0->viewports_dirty |= 1 << (start_slot + i); - nvc0->dirty |= NVC0_NEW_VIEWPORT; + nvc0->dirty_3d |= NVC0_NEW_3D_VIEWPORT; } } @@ -1017,7 +1008,7 @@ nvc0_set_tess_state(struct pipe_context *pipe, memcpy(nvc0->default_tess_outer, default_tess_outer, 4 * sizeof(float)); memcpy(nvc0->default_tess_inner, default_tess_inner, 2 * sizeof(float)); - nvc0->dirty |= NVC0_NEW_TESSFACTOR; + nvc0->dirty_3d |= NVC0_NEW_3D_TESSFACTOR; } static void @@ -1028,8 +1019,8 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe, struct nvc0_context *nvc0 = nvc0_context(pipe); unsigned i; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX); - nvc0->dirty |= NVC0_NEW_ARRAYS; + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX); + nvc0->dirty_3d |= NVC0_NEW_3D_ARRAYS; util_set_vertex_buffers_count(nvc0->vtxbuf, &nvc0->num_vtxbufs, vb, start_slot, count); @@ -1071,20 +1062,20 @@ nvc0_set_index_buffer(struct pipe_context *pipe, struct nvc0_context *nvc0 = nvc0_context(pipe); if (nvc0->idxbuf.buffer) - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_IDX); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_IDX); if (ib) { pipe_resource_reference(&nvc0->idxbuf.buffer, ib->buffer); nvc0->idxbuf.index_size = ib->index_size; if (ib->buffer) { nvc0->idxbuf.offset = ib->offset; - nvc0->dirty |= NVC0_NEW_IDXBUF; + nvc0->dirty_3d |= NVC0_NEW_3D_IDXBUF; } else { nvc0->idxbuf.user_buffer = ib->user_buffer; - nvc0->dirty &= ~NVC0_NEW_IDXBUF; + nvc0->dirty_3d &= ~NVC0_NEW_3D_IDXBUF; } } else { - nvc0->dirty &= ~NVC0_NEW_IDXBUF; + nvc0->dirty_3d &= ~NVC0_NEW_3D_IDXBUF; pipe_resource_reference(&nvc0->idxbuf.buffer, NULL); } } @@ -1095,7 +1086,7 @@ nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso) struct nvc0_context *nvc0 = nvc0_context(pipe); nvc0->vertex = hwcso; - nvc0->dirty |= NVC0_NEW_VERTEX; + nvc0->dirty_3d |= NVC0_NEW_3D_VERTEX; } static struct pipe_stream_output_target * @@ -1194,7 +1185,7 @@ nvc0_set_transform_feedback_targets(struct pipe_context *pipe, nvc0->num_tfbbufs = num_targets; if (nvc0->tfbbuf_dirty) - nvc0->dirty |= NVC0_NEW_TFB_TARGETS; + nvc0->dirty_3d |= NVC0_NEW_3D_TFB_TARGETS; } static void @@ -1223,7 +1214,7 @@ nvc0_bind_surfaces_range(struct nvc0_context *nvc0, const unsigned t, nvc0->surfaces_dirty[t] |= mask; if (t == 0) - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_SUF); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_SUF); else nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF); } @@ -1241,7 +1232,7 @@ nvc0_set_compute_resources(struct pipe_context *pipe, static void nvc0_set_shader_images(struct pipe_context *pipe, unsigned shader, unsigned start_slot, unsigned count, - struct pipe_image_view **views) + struct pipe_image_view *views) { } @@ -1254,7 +1245,7 @@ nvc0_bind_buffers_range(struct nvc0_context *nvc0, const unsigned t, const unsigned mask = ((1 << nr) - 1) << start; unsigned i; - assert(t < 5); + assert(t < 6); if (pbuffers) { for (i = start; i < end; ++i) { @@ -1274,7 +1265,11 @@ nvc0_bind_buffers_range(struct nvc0_context *nvc0, const unsigned t, } nvc0->buffers_dirty[t] |= mask; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_BUF); + if (t == 5) + nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_BUF); + else + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_BUF); + } static void @@ -1286,7 +1281,10 @@ nvc0_set_shader_buffers(struct pipe_context *pipe, const unsigned s = nvc0_shader_stage(shader); nvc0_bind_buffers_range(nvc0_context(pipe), s, start, nr, buffers); - nvc0_context(pipe)->dirty |= NVC0_NEW_BUFFERS; + if (s == 5) + nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_BUFFERS; + else + nvc0_context(pipe)->dirty_3d |= NVC0_NEW_3D_BUFFERS; } static inline void diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index e0d8ab01776..18e79e36b85 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -3,7 +3,6 @@ #include "util/u_math.h" #include "nvc0/nvc0_context.h" -#include "nv50/nv50_defs.xml.h" #if 0 static void @@ -77,7 +76,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1; bool serialize = false; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB); BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1); PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs); @@ -142,7 +141,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING; /* only register for writing, otherwise we'd always serialize here */ - BCTX_REFN(nvc0->bufctx_3d, FB, res, WR); + BCTX_REFN(nvc0->bufctx_3d, 3D_FB, res, WR); } if (fb->zsbuf) { @@ -173,7 +172,7 @@ nvc0_validate_fb(struct nvc0_context *nvc0) mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING; mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING; - BCTX_REFN(nvc0->bufctx_3d, FB, &mt->base, WR); + BCTX_REFN(nvc0->bufctx_3d, 3D_FB, &mt->base, WR); } else { BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1); PUSH_DATA (push, 0); @@ -184,8 +183,8 @@ nvc0_validate_fb(struct nvc0_context *nvc0) ms = 1 << ms_mode; BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 1024); - PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10)); - PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (4 << 10)); + PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (4 << 10)); + PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (4 << 10)); BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms); PUSH_DATA (push, 256 + 128); for (i = 0; i < ms; i++) { @@ -240,7 +239,7 @@ nvc0_validate_scissor(struct nvc0_context *nvc0) int i; struct nouveau_pushbuf *push = nvc0->base.pushbuf; - if (!(nvc0->dirty & NVC0_NEW_SCISSOR) && + if (!(nvc0->dirty_3d & NVC0_NEW_3D_SCISSOR) && nvc0->rast->pipe.scissor == nvc0->state.scissor) return; @@ -318,8 +317,8 @@ nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s) BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 1024); - PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 10)); - PUSH_DATA (push, bo->offset + (5 << 16) + (s << 10)); + PUSH_DATAh(push, bo->offset + (6 << 16) + (s << 10)); + PUSH_DATA (push, bo->offset + (6 << 16) + (s << 10)); BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1); PUSH_DATA (push, 256); PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4); @@ -368,7 +367,7 @@ nvc0_validate_clip(struct nvc0_context *nvc0) if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES) nvc0_check_program_ucps(nvc0, vp, clip_enable); - if (nvc0->dirty & (NVC0_NEW_CLIP | (NVC0_NEW_VERTPROG << stage))) + if (nvc0->dirty_3d & (NVC0_NEW_3D_CLIP | (NVC0_NEW_3D_VERTPROG << stage))) if (vp->vp.num_ucps > 0 && vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES) nvc0_upload_uclip_planes(nvc0, stage); @@ -455,7 +454,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1); PUSH_DATA (push, (i << 4) | 1); - BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD); + BCTX_REFN(nvc0->bufctx_3d, 3D_CB(s, i), res, RD); nvc0->cb_dirty = 1; /* Force cache flush for UBO. */ res->cb_bindings[s] |= 1 << i; @@ -468,6 +467,11 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0) } } } + + /* Invalidate all COMPUTE constbufs because they are aliased with 3D. */ + nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF; + nvc0->constbuf_dirty[5] |= nvc0->constbuf_valid[5]; + nvc0->state.uniform_buffer_bound[5] = 0; } static void @@ -479,8 +483,8 @@ nvc0_validate_buffers(struct nvc0_context *nvc0) for (s = 0; s < 5; s++) { BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 1024); - PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10)); - PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (s << 10)); + PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10)); + PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (s << 10)); BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 4 * NVC0_MAX_BUFFERS); PUSH_DATA (push, 512); for (i = 0; i < NVC0_MAX_BUFFERS; i++) { @@ -491,7 +495,7 @@ nvc0_validate_buffers(struct nvc0_context *nvc0) PUSH_DATAh(push, res->address + nvc0->buffers[s][i].buffer_offset); PUSH_DATA (push, nvc0->buffers[s][i].buffer_size); PUSH_DATA (push, 0); - BCTX_REFN(nvc0->bufctx_3d, BUF, res, RDWR); + BCTX_REFN(nvc0->bufctx_3d, 3D_BUF, res, RDWR); } else { PUSH_DATA (push, 0); PUSH_DATA (push, 0); @@ -536,6 +540,25 @@ nvc0_validate_min_samples(struct nvc0_context *nvc0) IMMED_NVC0(push, NVC0_3D(SAMPLE_SHADING), samples); } +static void +nvc0_validate_driverconst(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; + int i; + + for (i = 0; i < 5; ++i) { + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, 1024); + PUSH_DATAh(push, screen->uniform_bo->offset + (6 << 16) + (i << 10)); + PUSH_DATA (push, screen->uniform_bo->offset + (6 << 16) + (i << 10)); + BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1); + PUSH_DATA (push, (15 << 4) | 1); + } + + nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST; +} + void nvc0_validate_global_residents(struct nvc0_context *nvc0, struct nouveau_bufctx *bctx, int bin) @@ -629,35 +652,37 @@ nvc0_switch_pipe_context(struct nvc0_context *ctx_to) else ctx_to->state = ctx_to->screen->save_state; - ctx_to->dirty = ~0; + ctx_to->dirty_3d = ~0; + ctx_to->dirty_cp = ~0; ctx_to->viewports_dirty = ~0; ctx_to->scissors_dirty = ~0; - for (s = 0; s < 5; ++s) { + for (s = 0; s < 6; ++s) { ctx_to->samplers_dirty[s] = ~0; ctx_to->textures_dirty[s] = ~0; ctx_to->constbuf_dirty[s] = (1 << NVC0_MAX_PIPE_CONSTBUFS) - 1; + ctx_to->buffers_dirty[s] = ~0; } /* Reset tfb as the shader that owns it may have been deleted. */ ctx_to->state.tfb = NULL; if (!ctx_to->vertex) - ctx_to->dirty &= ~(NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS); + ctx_to->dirty_3d &= ~(NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS); if (!ctx_to->idxbuf.buffer) - ctx_to->dirty &= ~NVC0_NEW_IDXBUF; + ctx_to->dirty_3d &= ~NVC0_NEW_3D_IDXBUF; if (!ctx_to->vertprog) - ctx_to->dirty &= ~NVC0_NEW_VERTPROG; + ctx_to->dirty_3d &= ~NVC0_NEW_3D_VERTPROG; if (!ctx_to->fragprog) - ctx_to->dirty &= ~NVC0_NEW_FRAGPROG; + ctx_to->dirty_3d &= ~NVC0_NEW_3D_FRAGPROG; if (!ctx_to->blend) - ctx_to->dirty &= ~NVC0_NEW_BLEND; + ctx_to->dirty_3d &= ~NVC0_NEW_3D_BLEND; if (!ctx_to->rast) - ctx_to->dirty &= ~(NVC0_NEW_RASTERIZER | NVC0_NEW_SCISSOR); + ctx_to->dirty_3d &= ~(NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_SCISSOR); if (!ctx_to->zsa) - ctx_to->dirty &= ~NVC0_NEW_ZSA; + ctx_to->dirty_3d &= ~NVC0_NEW_3D_ZSA; ctx_to->screen->cur_ctx = ctx_to; } @@ -666,40 +691,41 @@ static struct state_validate { void (*func)(struct nvc0_context *); uint32_t states; } validate_list[] = { - { nvc0_validate_fb, NVC0_NEW_FRAMEBUFFER }, - { nvc0_validate_blend, NVC0_NEW_BLEND }, - { nvc0_validate_zsa, NVC0_NEW_ZSA }, - { nvc0_validate_sample_mask, NVC0_NEW_SAMPLE_MASK }, - { nvc0_validate_rasterizer, NVC0_NEW_RASTERIZER }, - { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR }, - { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF }, - { nvc0_validate_stipple, NVC0_NEW_STIPPLE }, - { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_RASTERIZER }, - { nvc0_validate_viewport, NVC0_NEW_VIEWPORT }, - { nvc0_vertprog_validate, NVC0_NEW_VERTPROG }, - { nvc0_tctlprog_validate, NVC0_NEW_TCTLPROG }, - { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG }, - { nvc0_validate_tess_state, NVC0_NEW_TESSFACTOR }, - { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG }, - { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG | NVC0_NEW_RASTERIZER }, - { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA | - NVC0_NEW_RASTERIZER }, - { nvc0_validate_derived_2, NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER }, - { nvc0_validate_derived_3, NVC0_NEW_BLEND | NVC0_NEW_FRAMEBUFFER }, - { nvc0_validate_clip, NVC0_NEW_CLIP | NVC0_NEW_RASTERIZER | - NVC0_NEW_VERTPROG | - NVC0_NEW_TEVLPROG | - NVC0_NEW_GMTYPROG }, - { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF }, - { nvc0_validate_textures, NVC0_NEW_TEXTURES }, - { nvc0_validate_samplers, NVC0_NEW_SAMPLERS }, - { nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS }, - { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }, - { nvc0_validate_surfaces, NVC0_NEW_SURFACES }, - { nvc0_validate_buffers, NVC0_NEW_BUFFERS }, - { nvc0_idxbuf_validate, NVC0_NEW_IDXBUF }, - { nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG }, - { nvc0_validate_min_samples, NVC0_NEW_MIN_SAMPLES }, + { nvc0_validate_fb, NVC0_NEW_3D_FRAMEBUFFER }, + { nvc0_validate_blend, NVC0_NEW_3D_BLEND }, + { nvc0_validate_zsa, NVC0_NEW_3D_ZSA }, + { nvc0_validate_sample_mask, NVC0_NEW_3D_SAMPLE_MASK }, + { nvc0_validate_rasterizer, NVC0_NEW_3D_RASTERIZER }, + { nvc0_validate_blend_colour, NVC0_NEW_3D_BLEND_COLOUR }, + { nvc0_validate_stencil_ref, NVC0_NEW_3D_STENCIL_REF }, + { nvc0_validate_stipple, NVC0_NEW_3D_STIPPLE }, + { nvc0_validate_scissor, NVC0_NEW_3D_SCISSOR | NVC0_NEW_3D_RASTERIZER }, + { nvc0_validate_viewport, NVC0_NEW_3D_VIEWPORT }, + { nvc0_vertprog_validate, NVC0_NEW_3D_VERTPROG }, + { nvc0_tctlprog_validate, NVC0_NEW_3D_TCTLPROG }, + { nvc0_tevlprog_validate, NVC0_NEW_3D_TEVLPROG }, + { nvc0_validate_tess_state, NVC0_NEW_3D_TESSFACTOR }, + { nvc0_gmtyprog_validate, NVC0_NEW_3D_GMTYPROG }, + { nvc0_fragprog_validate, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_RASTERIZER }, + { nvc0_validate_derived_1, NVC0_NEW_3D_FRAGPROG | NVC0_NEW_3D_ZSA | + NVC0_NEW_3D_RASTERIZER }, + { nvc0_validate_derived_2, NVC0_NEW_3D_ZSA | NVC0_NEW_3D_FRAMEBUFFER }, + { nvc0_validate_derived_3, NVC0_NEW_3D_BLEND | NVC0_NEW_3D_FRAMEBUFFER }, + { nvc0_validate_clip, NVC0_NEW_3D_CLIP | NVC0_NEW_3D_RASTERIZER | + NVC0_NEW_3D_VERTPROG | + NVC0_NEW_3D_TEVLPROG | + NVC0_NEW_3D_GMTYPROG }, + { nvc0_constbufs_validate, NVC0_NEW_3D_CONSTBUF }, + { nvc0_validate_textures, NVC0_NEW_3D_TEXTURES }, + { nvc0_validate_samplers, NVC0_NEW_3D_SAMPLERS }, + { nve4_set_tex_handles, NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS }, + { nvc0_vertex_arrays_validate, NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS }, + { nvc0_validate_surfaces, NVC0_NEW_3D_SURFACES }, + { nvc0_validate_buffers, NVC0_NEW_3D_BUFFERS }, + { nvc0_idxbuf_validate, NVC0_NEW_3D_IDXBUF }, + { nvc0_tfb_validate, NVC0_NEW_3D_TFB_TARGETS | NVC0_NEW_3D_GMTYPROG }, + { nvc0_validate_min_samples, NVC0_NEW_3D_MIN_SAMPLES }, + { nvc0_validate_driverconst, NVC0_NEW_3D_DRIVERCONST }, }; bool @@ -712,7 +738,7 @@ nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask) if (nvc0->screen->cur_ctx != nvc0) nvc0_switch_pipe_context(nvc0); - state_mask = nvc0->dirty & mask; + state_mask = nvc0->dirty_3d & mask; if (state_mask) { for (i = 0; i < ARRAY_SIZE(validate_list); ++i) { @@ -721,7 +747,7 @@ nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask) if (state_mask & validate->states) validate->func(nvc0); } - nvc0->dirty &= ~state_mask; + nvc0->dirty_3d &= ~state_mask; nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, false); } diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index e3843ca1bf1..49577969d3d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -34,8 +34,8 @@ #include "nvc0/nvc0_context.h" #include "nvc0/nvc0_resource.h" -#include "nv50/nv50_defs.xml.h" -#include "nv50/nv50_texture.xml.h" +#include "nv50/g80_defs.xml.h" +#include "nv50/g80_texture.xml.h" /* these are used in nv50_blit.h */ #define NV50_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL @@ -54,7 +54,7 @@ nvc0_2d_format(enum pipe_format format, bool dst, bool dst_src_equal) /* A8_UNORM is treated as I8_UNORM as far as the 2D engine is concerned. */ if (!dst && unlikely(format == PIPE_FORMAT_I8_UNORM) && !dst_src_equal) - return NV50_SURFACE_FORMAT_A8_UNORM; + return G80_SURFACE_FORMAT_A8_UNORM; /* Hardware values for color formats range from 0xc0 to 0xff, * but the 2D engine doesn't support all of them. @@ -65,15 +65,15 @@ nvc0_2d_format(enum pipe_format format, bool dst, bool dst_src_equal) switch (util_format_get_blocksize(format)) { case 1: - return NV50_SURFACE_FORMAT_R8_UNORM; + return G80_SURFACE_FORMAT_R8_UNORM; case 2: - return NV50_SURFACE_FORMAT_RG8_UNORM; + return G80_SURFACE_FORMAT_RG8_UNORM; case 4: - return NV50_SURFACE_FORMAT_BGRA8_UNORM; + return G80_SURFACE_FORMAT_BGRA8_UNORM; case 8: - return NV50_SURFACE_FORMAT_RGBA16_UNORM; + return G80_SURFACE_FORMAT_RGBA16_UNORM; case 16: - return NV50_SURFACE_FORMAT_RGBA32_FLOAT; + return G80_SURFACE_FORMAT_RGBA32_FLOAT; default: assert(0); return 0; @@ -353,7 +353,7 @@ nvc0_clear_render_target(struct pipe_context *pipe, IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode); - nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; + nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER; } static void @@ -609,7 +609,7 @@ nvc0_clear_buffer(struct pipe_context *pipe, data, data_size); } - nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; + nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER; } static void @@ -678,7 +678,7 @@ nvc0_clear_depth_stencil(struct pipe_context *pipe, IMMED_NVC0(push, NVC0_3D(COND_MODE), nvc0->cond_condmode); - nvc0->dirty |= NVC0_NEW_FRAMEBUFFER; + nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER; } void @@ -693,7 +693,7 @@ nvc0_clear(struct pipe_context *pipe, unsigned buffers, uint32_t mode = 0; /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */ - if (!nvc0_state_validate(nvc0, NVC0_NEW_FRAMEBUFFER)) + if (!nvc0_state_validate(nvc0, NVC0_NEW_3D_FRAMEBUFFER)) return; if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) { @@ -793,7 +793,7 @@ struct nvc0_blitctx struct pipe_sampler_view *texture[2]; struct nv50_tsc_entry *sampler[2]; unsigned min_samples; - uint32_t dirty; + uint32_t dirty_3d; } saved; struct nvc0_rasterizer_stateobj rast; }; @@ -871,12 +871,14 @@ nvc0_blitter_make_sampler(struct nvc0_blitter *blit) blit->sampler[0].id = -1; - blit->sampler[0].tsc[0] = NV50_TSC_0_SRGB_CONVERSION_ALLOWED | - (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPS__SHIFT) | - (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPT__SHIFT) | - (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPR__SHIFT); + blit->sampler[0].tsc[0] = G80_TSC_0_SRGB_CONVERSION | + (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_U__SHIFT) | + (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_V__SHIFT) | + (G80_TSC_WRAP_CLAMP_TO_EDGE << G80_TSC_0_ADDRESS_P__SHIFT); blit->sampler[0].tsc[1] = - NV50_TSC_1_MAGF_NEAREST | NV50_TSC_1_MINF_NEAREST | NV50_TSC_1_MIPF_NONE; + G80_TSC_1_MAG_FILTER_NEAREST | + G80_TSC_1_MIN_FILTER_NEAREST | + G80_TSC_1_MIP_FILTER_NONE; /* clamp to edge, min/max lod = 0, bilinear filtering */ @@ -884,7 +886,9 @@ nvc0_blitter_make_sampler(struct nvc0_blitter *blit) blit->sampler[1].tsc[0] = blit->sampler[0].tsc[0]; blit->sampler[1].tsc[1] = - NV50_TSC_1_MAGF_LINEAR | NV50_TSC_1_MINF_LINEAR | NV50_TSC_1_MIPF_NONE; + G80_TSC_1_MAG_FILTER_LINEAR | + G80_TSC_1_MIN_FILTER_LINEAR | + G80_TSC_1_MIP_FILTER_NONE; } static void @@ -1081,19 +1085,19 @@ nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx) nvc0->min_samples = 1; - ctx->saved.dirty = nvc0->dirty; + ctx->saved.dirty_3d = nvc0->dirty_3d; nvc0->textures_dirty[4] |= 3; nvc0->samplers_dirty[4] |= 3; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0)); - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1)); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 0)); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 1)); - nvc0->dirty = NVC0_NEW_FRAMEBUFFER | NVC0_NEW_MIN_SAMPLES | - NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG | - NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG | - NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS; + nvc0->dirty_3d = NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_MIN_SAMPLES | + NVC0_NEW_3D_VERTPROG | NVC0_NEW_3D_FRAGPROG | + NVC0_NEW_3D_TCTLPROG | NVC0_NEW_3D_TEVLPROG | NVC0_NEW_3D_GMTYPROG | + NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS; } static void @@ -1141,20 +1145,20 @@ nvc0_blitctx_post_blit(struct nvc0_blitctx *blit) nvc0->base.pipe.render_condition(&nvc0->base.pipe, nvc0->cond_query, nvc0->cond_cond, nvc0->cond_mode); - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP); - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB); - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0)); - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1)); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_FB); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 0)); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(4, 1)); nouveau_scratch_done(&nvc0->base); - nvc0->dirty = blit->saved.dirty | - (NVC0_NEW_FRAMEBUFFER | NVC0_NEW_SCISSOR | NVC0_NEW_SAMPLE_MASK | - NVC0_NEW_RASTERIZER | NVC0_NEW_ZSA | NVC0_NEW_BLEND | - NVC0_NEW_VIEWPORT | - NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS | - NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG | - NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG | - NVC0_NEW_TFB_TARGETS | NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS); + nvc0->dirty_3d = blit->saved.dirty_3d | + (NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_SCISSOR | NVC0_NEW_3D_SAMPLE_MASK | + NVC0_NEW_3D_RASTERIZER | NVC0_NEW_3D_ZSA | NVC0_NEW_3D_BLEND | + NVC0_NEW_3D_VIEWPORT | + NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS | + NVC0_NEW_3D_VERTPROG | NVC0_NEW_3D_FRAGPROG | + NVC0_NEW_3D_TCTLPROG | NVC0_NEW_3D_TEVLPROG | NVC0_NEW_3D_GMTYPROG | + NVC0_NEW_3D_TFB_TARGETS | NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS); nvc0->scissors_dirty |= 1; nvc0->viewports_dirty |= 1; @@ -1263,7 +1267,8 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) return; } - BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, vtxbuf_bo); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, + NOUVEAU_BO_GART | NOUVEAU_BO_RD, vtxbuf_bo); nouveau_pushbuf_validate(push); BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(0)), 4); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c index 7223f5aecfb..53332400a4f 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c @@ -22,35 +22,29 @@ #include "nvc0/nvc0_context.h" #include "nvc0/nvc0_resource.h" -#include "nv50/nv50_texture.xml.h" -#include "nv50/nv50_defs.xml.h" +#include "nvc0/gm107_texture.xml.h" +#include "nvc0/nvc0_compute.xml.h" +#include "nv50/g80_texture.xml.h" +#include "nv50/g80_defs.xml.h" #include "util/u_format.h" #define NVE4_TIC_ENTRY_INVALID 0x000fffff #define NVE4_TSC_ENTRY_INVALID 0xfff00000 -#define NV50_TIC_0_SWIZZLE__MASK \ - (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \ - NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK) - static inline uint32_t -nv50_tic_swizzle(uint32_t tc, unsigned swz, bool tex_int) +nv50_tic_swizzle(const struct nvc0_format *fmt, unsigned swz, bool tex_int) { switch (swz) { - case PIPE_SWIZZLE_RED: - return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT; - case PIPE_SWIZZLE_GREEN: - return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT; - case PIPE_SWIZZLE_BLUE: - return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT; - case PIPE_SWIZZLE_ALPHA: - return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT; + case PIPE_SWIZZLE_RED : return fmt->tic.src_x; + case PIPE_SWIZZLE_GREEN: return fmt->tic.src_y; + case PIPE_SWIZZLE_BLUE : return fmt->tic.src_z; + case PIPE_SWIZZLE_ALPHA: return fmt->tic.src_w; case PIPE_SWIZZLE_ONE: - return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT; + return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT; case PIPE_SWIZZLE_ZERO: default: - return NV50_TIC_MAP_ZERO; + return G80_TIC_SOURCE_ZERO; } } @@ -67,14 +61,15 @@ nvc0_create_sampler_view(struct pipe_context *pipe, return nvc0_create_texture_view(pipe, res, templ, flags, templ->target); } -struct pipe_sampler_view * -nvc0_create_texture_view(struct pipe_context *pipe, - struct pipe_resource *texture, - const struct pipe_sampler_view *templ, - uint32_t flags, - enum pipe_texture_target target) +static struct pipe_sampler_view * +gm107_create_texture_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ, + uint32_t flags, + enum pipe_texture_target target) { const struct util_format_description *desc; + const struct nvc0_format *fmt; uint64_t address; uint32_t *tic; uint32_t swz[4]; @@ -101,45 +96,224 @@ nvc0_create_texture_view(struct pipe_context *pipe, tic = &view->tic[0]; desc = util_format_description(view->pipe.format); + tex_int = util_format_is_pure_integer(view->pipe.format); + + fmt = &nvc0_format_table[view->pipe.format]; + swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int); + swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int); + swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int); + swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int); + + tic[0] = fmt->tic.format << GM107_TIC2_0_COMPONENTS_SIZES__SHIFT; + tic[0] |= fmt->tic.type_r << GM107_TIC2_0_R_DATA_TYPE__SHIFT; + tic[0] |= fmt->tic.type_g << GM107_TIC2_0_G_DATA_TYPE__SHIFT; + tic[0] |= fmt->tic.type_b << GM107_TIC2_0_B_DATA_TYPE__SHIFT; + tic[0] |= fmt->tic.type_a << GM107_TIC2_0_A_DATA_TYPE__SHIFT; + tic[0] |= swz[0] << GM107_TIC2_0_X_SOURCE__SHIFT; + tic[0] |= swz[1] << GM107_TIC2_0_Y_SOURCE__SHIFT; + tic[0] |= swz[2] << GM107_TIC2_0_Z_SOURCE__SHIFT; + tic[0] |= swz[3] << GM107_TIC2_0_W_SOURCE__SHIFT; + + address = mt->base.address; + + tic[3] = GM107_TIC2_3_LOD_ANISO_QUALITY_2; + tic[4] = GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V; + tic[4] |= GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR; + + if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) + tic[4] |= GM107_TIC2_4_SRGB_CONVERSION; + + if (!(flags & NV50_TEXVIEW_SCALED_COORDS)) + tic[5] = GM107_TIC2_5_NORMALIZED_COORDS; + else + tic[5] = 0; + + /* check for linear storage type */ + if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) { + if (texture->target == PIPE_BUFFER) { + assert(!(tic[5] & GM107_TIC2_5_NORMALIZED_COORDS)); + width = view->pipe.u.buf.last_element - view->pipe.u.buf.first_element; + address += + view->pipe.u.buf.first_element * desc->block.bits / 8; + tic[2] = GM107_TIC2_2_HEADER_VERSION_ONE_D_BUFFER; + tic[3] |= width >> 16; + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_BUFFER; + tic[4] |= width & 0xffff; + } else { + assert(!(mt->level[0].pitch & 0x1f)); + /* must be 2D texture without mip maps */ + tic[2] = GM107_TIC2_2_HEADER_VERSION_PITCH; + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_NO_MIPMAP; + tic[3] |= mt->level[0].pitch >> 5; + tic[4] |= mt->base.base.width0 - 1; + tic[5] |= 0 << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT; + tic[5] |= mt->base.base.height0 - 1; + } + tic[1] = address; + tic[2] |= address >> 32; + tic[6] = 0; + tic[7] = 0; + return &view->pipe; + } + + tic[2] = GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR; + tic[3] |= + ((mt->level[0].tile_mode & 0x0f0) >> 4 << 3) | + ((mt->level[0].tile_mode & 0xf00) >> 8 << 6); + + depth = MAX2(mt->base.base.array_size, mt->base.base.depth0); - tic[0] = nvc0_format_table[view->pipe.format].tic; + if (mt->base.base.array_size > 1) { + /* there doesn't seem to be a base layer field in TIC */ + address += view->pipe.u.tex.first_layer * mt->layer_stride; + depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1; + } + tic[1] = address; + tic[2] |= address >> 32; + + switch (target) { + case PIPE_TEXTURE_1D: + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D; + break; + case PIPE_TEXTURE_2D: + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D; + break; + case PIPE_TEXTURE_RECT: + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D; + break; + case PIPE_TEXTURE_3D: + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_THREE_D; + break; + case PIPE_TEXTURE_CUBE: + depth /= 6; + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP; + break; + case PIPE_TEXTURE_1D_ARRAY: + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY; + break; + case PIPE_TEXTURE_2D_ARRAY: + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY; + break; + case PIPE_TEXTURE_CUBE_ARRAY: + depth /= 6; + tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY; + break; + default: + unreachable("unexpected/invalid texture target"); + } + + tic[3] |= (flags & NV50_TEXVIEW_FILTER_MSAA8) ? + GM107_TIC2_3_USE_HEADER_OPT_CONTROL : + GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH | + GM107_TIC2_3_LOD_ISO_QUALITY_HIGH; + + if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) { + width = mt->base.base.width0 << mt->ms_x; + height = mt->base.base.height0 << mt->ms_y; + } else { + width = mt->base.base.width0; + height = mt->base.base.height0; + } + + tic[4] |= width - 1; + + tic[5] |= (height - 1) & 0xffff; + tic[5] |= (depth - 1) << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT; + tic[3] |= mt->base.base.last_level << GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT; + + /* sampling points: (?) */ + if ((flags & NV50_TEXVIEW_ACCESS_RESOLVE) && mt->ms_x > 1) { + tic[6] = GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO; + tic[6] |= GM107_TIC2_6_MAX_ANISOTROPY_2_TO_1; + } else { + tic[6] = GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO; + tic[6] |= GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE; + } + + tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level; + tic[7] |= mt->ms_mode << GM107_TIC2_7_MULTI_SAMPLE_COUNT__SHIFT; + + return &view->pipe; +} + +static struct pipe_sampler_view * +gf100_create_texture_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ, + uint32_t flags, + enum pipe_texture_target target) +{ + const struct util_format_description *desc; + const struct nvc0_format *fmt; + uint64_t address; + uint32_t *tic; + uint32_t swz[4]; + uint32_t width, height; + uint32_t depth; + struct nv50_tic_entry *view; + struct nv50_miptree *mt; + bool tex_int; + + view = MALLOC_STRUCT(nv50_tic_entry); + if (!view) + return NULL; + mt = nv50_miptree(texture); + + view->pipe = *templ; + view->pipe.reference.count = 1; + view->pipe.texture = NULL; + view->pipe.context = pipe; + + view->id = -1; + + pipe_resource_reference(&view->pipe.texture, texture); + + tic = &view->tic[0]; + + desc = util_format_description(view->pipe.format); + + fmt = &nvc0_format_table[view->pipe.format]; tex_int = util_format_is_pure_integer(view->pipe.format); - swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int); - swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int); - swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int); - swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int); - tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) | - (swz[0] << NV50_TIC_0_MAPR__SHIFT) | - (swz[1] << NV50_TIC_0_MAPG__SHIFT) | - (swz[2] << NV50_TIC_0_MAPB__SHIFT) | - (swz[3] << NV50_TIC_0_MAPA__SHIFT); + swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int); + swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int); + swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int); + swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int); + tic[0] = (fmt->tic.format << G80_TIC_0_COMPONENTS_SIZES__SHIFT) | + (fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) | + (fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) | + (fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) | + (fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) | + (swz[0] << G80_TIC_0_X_SOURCE__SHIFT) | + (swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) | + (swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) | + (swz[3] << G80_TIC_0_W_SOURCE__SHIFT); address = mt->base.address; - tic[2] = 0x10001000 | NV50_TIC_2_NO_BORDER; + tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR; if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) - tic[2] |= NV50_TIC_2_COLORSPACE_SRGB; + tic[2] |= G80_TIC_2_SRGB_CONVERSION; if (!(flags & NV50_TEXVIEW_SCALED_COORDS)) - tic[2] |= NV50_TIC_2_NORMALIZED_COORDS; + tic[2] |= G80_TIC_2_NORMALIZED_COORDS; /* check for linear storage type */ if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) { if (texture->target == PIPE_BUFFER) { - assert(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS)); + assert(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS)); address += view->pipe.u.buf.first_element * desc->block.bits / 8; - tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_BUFFER; + tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER; tic[3] = 0; tic[4] = /* width */ view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1; tic[5] = 0; } else { /* must be 2D texture without mip maps */ - tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT; + tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP; tic[3] = mt->level[0].pitch; tic[4] = mt->base.base.width0; tic[5] = (1 << 16) | mt->base.base.height0; @@ -167,30 +341,30 @@ nvc0_create_texture_view(struct pipe_context *pipe, switch (target) { case PIPE_TEXTURE_1D: - tic[2] |= NV50_TIC_2_TARGET_1D; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D; break; case PIPE_TEXTURE_2D: - tic[2] |= NV50_TIC_2_TARGET_2D; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D; break; case PIPE_TEXTURE_RECT: - tic[2] |= NV50_TIC_2_TARGET_2D; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D; break; case PIPE_TEXTURE_3D: - tic[2] |= NV50_TIC_2_TARGET_3D; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D; break; case PIPE_TEXTURE_CUBE: depth /= 6; - tic[2] |= NV50_TIC_2_TARGET_CUBE; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP; break; case PIPE_TEXTURE_1D_ARRAY: - tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY; break; case PIPE_TEXTURE_2D_ARRAY: - tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY; break; case PIPE_TEXTURE_CUBE_ARRAY: depth /= 6; - tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY; + tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY; break; default: unreachable("unexpected/invalid texture target"); @@ -224,6 +398,18 @@ nvc0_create_texture_view(struct pipe_context *pipe, return &view->pipe; } +struct pipe_sampler_view * +nvc0_create_texture_view(struct pipe_context *pipe, + struct pipe_resource *texture, + const struct pipe_sampler_view *templ, + uint32_t flags, + enum pipe_texture_target target) +{ + if (nvc0_context(pipe)->screen->tic.maxwell) + return gm107_create_texture_view(pipe, texture, templ, flags, target); + return gf100_create_texture_view(pipe, texture, templ, flags, target); +} + static void nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic, struct nv04_resource *res) @@ -244,7 +430,7 @@ nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic, tic->tic[2] |= address >> 32; } -static bool +bool nvc0_validate_tic(struct nvc0_context *nvc0, int s) { uint32_t commands[32]; @@ -285,7 +471,10 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s) need_flush = true; } else if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { - BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1); + if (unlikely(s == 5)) + BEGIN_NVC0(push, NVC0_CP(TEX_CACHE_CTL), 1); + else + BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1); PUSH_DATA (push, (tic->id << 4) | 1); NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1); } @@ -298,7 +487,10 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s) continue; commands[n++] = (tic->id << 9) | (i << 1) | 1; - BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD); + if (unlikely(s == 5)) + BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD); + else + BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD); } for (; i < nvc0->state.num_textures[s]; ++i) commands[n++] = (i << 1) | 0; @@ -306,7 +498,10 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s) nvc0->state.num_textures[s] = nvc0->num_textures[s]; if (n) { - BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n); + if (unlikely(s == 5)) + BEGIN_NIC0(push, NVC0_CP(BIND_TIC), n); + else + BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n); PUSH_DATAp(push, commands, n); } nvc0->textures_dirty[s] = 0; @@ -362,7 +557,7 @@ nve4_validate_tic(struct nvc0_context *nvc0, unsigned s) nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID; nvc0->tex_handles[s][i] |= tic->id; if (dirty) - BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD); + BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD); } for (; i < nvc0->state.num_textures[s]; ++i) { nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; @@ -392,7 +587,7 @@ void nvc0_validate_textures(struct nvc0_context *nvc0) } } -static bool +bool nvc0_validate_tsc(struct nvc0_context *nvc0, int s) { uint32_t commands[16]; @@ -410,6 +605,7 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s) commands[n++] = (i << 4) | 0; continue; } + nvc0->seamless_cube_map = tsc->seamless_cube_map; if (tsc->id < 0) { tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc); @@ -428,7 +624,10 @@ nvc0_validate_tsc(struct nvc0_context *nvc0, int s) nvc0->state.num_samplers[s] = nvc0->num_samplers[s]; if (n) { - BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n); + if (unlikely(s == 5)) + BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n); + else + BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n); PUSH_DATAp(push, commands, n); } nvc0->samplers_dirty[s] = 0; @@ -513,7 +712,7 @@ nve4_set_tex_handles(struct nvc0_context *nvc0) if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) return; - address = nvc0->screen->uniform_bo->offset + (5 << 16); + address = nvc0->screen->uniform_bo->offset + (6 << 16); for (s = 0; s < 5; ++s, address += (1 << 10)) { uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s]; @@ -686,45 +885,45 @@ nvc0_validate_surfaces(struct nvc0_context *nvc0) static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] = { - [PIPE_FORMAT_R32G32B32A32_FLOAT] = NVE4_IMAGE_FORMAT_RGBA32_FLOAT, - [PIPE_FORMAT_R32G32B32A32_SINT] = NVE4_IMAGE_FORMAT_RGBA32_SINT, - [PIPE_FORMAT_R32G32B32A32_UINT] = NVE4_IMAGE_FORMAT_RGBA32_UINT, - [PIPE_FORMAT_R16G16B16A16_FLOAT] = NVE4_IMAGE_FORMAT_RGBA16_FLOAT, - [PIPE_FORMAT_R16G16B16A16_UNORM] = NVE4_IMAGE_FORMAT_RGBA16_UNORM, - [PIPE_FORMAT_R16G16B16A16_SNORM] = NVE4_IMAGE_FORMAT_RGBA16_SNORM, - [PIPE_FORMAT_R16G16B16A16_SINT] = NVE4_IMAGE_FORMAT_RGBA16_SINT, - [PIPE_FORMAT_R16G16B16A16_UINT] = NVE4_IMAGE_FORMAT_RGBA16_UINT, - [PIPE_FORMAT_R8G8B8A8_UNORM] = NVE4_IMAGE_FORMAT_RGBA8_UNORM, - [PIPE_FORMAT_R8G8B8A8_SNORM] = NVE4_IMAGE_FORMAT_RGBA8_SNORM, - [PIPE_FORMAT_R8G8B8A8_SINT] = NVE4_IMAGE_FORMAT_RGBA8_SINT, - [PIPE_FORMAT_R8G8B8A8_UINT] = NVE4_IMAGE_FORMAT_RGBA8_UINT, - [PIPE_FORMAT_R11G11B10_FLOAT] = NVE4_IMAGE_FORMAT_R11G11B10_FLOAT, - [PIPE_FORMAT_R10G10B10A2_UNORM] = NVE4_IMAGE_FORMAT_RGB10_A2_UNORM, -/* [PIPE_FORMAT_R10G10B10A2_UINT] = NVE4_IMAGE_FORMAT_RGB10_A2_UINT, */ - [PIPE_FORMAT_R32G32_FLOAT] = NVE4_IMAGE_FORMAT_RG32_FLOAT, - [PIPE_FORMAT_R32G32_SINT] = NVE4_IMAGE_FORMAT_RG32_SINT, - [PIPE_FORMAT_R32G32_UINT] = NVE4_IMAGE_FORMAT_RG32_UINT, - [PIPE_FORMAT_R16G16_FLOAT] = NVE4_IMAGE_FORMAT_RG16_FLOAT, - [PIPE_FORMAT_R16G16_UNORM] = NVE4_IMAGE_FORMAT_RG16_UNORM, - [PIPE_FORMAT_R16G16_SNORM] = NVE4_IMAGE_FORMAT_RG16_SNORM, - [PIPE_FORMAT_R16G16_SINT] = NVE4_IMAGE_FORMAT_RG16_SINT, - [PIPE_FORMAT_R16G16_UINT] = NVE4_IMAGE_FORMAT_RG16_UINT, - [PIPE_FORMAT_R8G8_UNORM] = NVE4_IMAGE_FORMAT_RG8_UNORM, - [PIPE_FORMAT_R8G8_SNORM] = NVE4_IMAGE_FORMAT_RG8_SNORM, - [PIPE_FORMAT_R8G8_SINT] = NVE4_IMAGE_FORMAT_RG8_SINT, - [PIPE_FORMAT_R8G8_UINT] = NVE4_IMAGE_FORMAT_RG8_UINT, - [PIPE_FORMAT_R32_FLOAT] = NVE4_IMAGE_FORMAT_R32_FLOAT, - [PIPE_FORMAT_R32_SINT] = NVE4_IMAGE_FORMAT_R32_SINT, - [PIPE_FORMAT_R32_UINT] = NVE4_IMAGE_FORMAT_R32_UINT, - [PIPE_FORMAT_R16_FLOAT] = NVE4_IMAGE_FORMAT_R16_FLOAT, - [PIPE_FORMAT_R16_UNORM] = NVE4_IMAGE_FORMAT_R16_UNORM, - [PIPE_FORMAT_R16_SNORM] = NVE4_IMAGE_FORMAT_R16_SNORM, - [PIPE_FORMAT_R16_SINT] = NVE4_IMAGE_FORMAT_R16_SINT, - [PIPE_FORMAT_R16_UINT] = NVE4_IMAGE_FORMAT_R16_UINT, - [PIPE_FORMAT_R8_UNORM] = NVE4_IMAGE_FORMAT_R8_UNORM, - [PIPE_FORMAT_R8_SNORM] = NVE4_IMAGE_FORMAT_R8_SNORM, - [PIPE_FORMAT_R8_SINT] = NVE4_IMAGE_FORMAT_R8_SINT, - [PIPE_FORMAT_R8_UINT] = NVE4_IMAGE_FORMAT_R8_UINT, + [PIPE_FORMAT_R32G32B32A32_FLOAT] = GK104_IMAGE_FORMAT_RGBA32_FLOAT, + [PIPE_FORMAT_R32G32B32A32_SINT] = GK104_IMAGE_FORMAT_RGBA32_SINT, + [PIPE_FORMAT_R32G32B32A32_UINT] = GK104_IMAGE_FORMAT_RGBA32_UINT, + [PIPE_FORMAT_R16G16B16A16_FLOAT] = GK104_IMAGE_FORMAT_RGBA16_FLOAT, + [PIPE_FORMAT_R16G16B16A16_UNORM] = GK104_IMAGE_FORMAT_RGBA16_UNORM, + [PIPE_FORMAT_R16G16B16A16_SNORM] = GK104_IMAGE_FORMAT_RGBA16_SNORM, + [PIPE_FORMAT_R16G16B16A16_SINT] = GK104_IMAGE_FORMAT_RGBA16_SINT, + [PIPE_FORMAT_R16G16B16A16_UINT] = GK104_IMAGE_FORMAT_RGBA16_UINT, + [PIPE_FORMAT_R8G8B8A8_UNORM] = GK104_IMAGE_FORMAT_RGBA8_UNORM, + [PIPE_FORMAT_R8G8B8A8_SNORM] = GK104_IMAGE_FORMAT_RGBA8_SNORM, + [PIPE_FORMAT_R8G8B8A8_SINT] = GK104_IMAGE_FORMAT_RGBA8_SINT, + [PIPE_FORMAT_R8G8B8A8_UINT] = GK104_IMAGE_FORMAT_RGBA8_UINT, + [PIPE_FORMAT_R11G11B10_FLOAT] = GK104_IMAGE_FORMAT_R11G11B10_FLOAT, + [PIPE_FORMAT_R10G10B10A2_UNORM] = GK104_IMAGE_FORMAT_RGB10_A2_UNORM, +/* [PIPE_FORMAT_R10G10B10A2_UINT] = GK104_IMAGE_FORMAT_RGB10_A2_UINT, */ + [PIPE_FORMAT_R32G32_FLOAT] = GK104_IMAGE_FORMAT_RG32_FLOAT, + [PIPE_FORMAT_R32G32_SINT] = GK104_IMAGE_FORMAT_RG32_SINT, + [PIPE_FORMAT_R32G32_UINT] = GK104_IMAGE_FORMAT_RG32_UINT, + [PIPE_FORMAT_R16G16_FLOAT] = GK104_IMAGE_FORMAT_RG16_FLOAT, + [PIPE_FORMAT_R16G16_UNORM] = GK104_IMAGE_FORMAT_RG16_UNORM, + [PIPE_FORMAT_R16G16_SNORM] = GK104_IMAGE_FORMAT_RG16_SNORM, + [PIPE_FORMAT_R16G16_SINT] = GK104_IMAGE_FORMAT_RG16_SINT, + [PIPE_FORMAT_R16G16_UINT] = GK104_IMAGE_FORMAT_RG16_UINT, + [PIPE_FORMAT_R8G8_UNORM] = GK104_IMAGE_FORMAT_RG8_UNORM, + [PIPE_FORMAT_R8G8_SNORM] = GK104_IMAGE_FORMAT_RG8_SNORM, + [PIPE_FORMAT_R8G8_SINT] = GK104_IMAGE_FORMAT_RG8_SINT, + [PIPE_FORMAT_R8G8_UINT] = GK104_IMAGE_FORMAT_RG8_UINT, + [PIPE_FORMAT_R32_FLOAT] = GK104_IMAGE_FORMAT_R32_FLOAT, + [PIPE_FORMAT_R32_SINT] = GK104_IMAGE_FORMAT_R32_SINT, + [PIPE_FORMAT_R32_UINT] = GK104_IMAGE_FORMAT_R32_UINT, + [PIPE_FORMAT_R16_FLOAT] = GK104_IMAGE_FORMAT_R16_FLOAT, + [PIPE_FORMAT_R16_UNORM] = GK104_IMAGE_FORMAT_R16_UNORM, + [PIPE_FORMAT_R16_SNORM] = GK104_IMAGE_FORMAT_R16_SNORM, + [PIPE_FORMAT_R16_SINT] = GK104_IMAGE_FORMAT_R16_SINT, + [PIPE_FORMAT_R16_UINT] = GK104_IMAGE_FORMAT_R16_UINT, + [PIPE_FORMAT_R8_UNORM] = GK104_IMAGE_FORMAT_R8_UNORM, + [PIPE_FORMAT_R8_SNORM] = GK104_IMAGE_FORMAT_R8_SNORM, + [PIPE_FORMAT_R8_SINT] = GK104_IMAGE_FORMAT_R8_SINT, + [PIPE_FORMAT_R8_UINT] = GK104_IMAGE_FORMAT_R8_UINT, }; /* Auxiliary format description values for surface instructions. diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c index 279c7e93cc8..24d23d29bbf 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c @@ -3,8 +3,6 @@ #include "nvc0/nvc0_context.h" -#include "nv50/nv50_defs.xml.h" - struct nvc0_transfer { struct pipe_transfer base; struct nv50_m2mf_rect rect[2]; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c index 032b3c125cf..647aa10ec35 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c @@ -80,7 +80,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe, enum pipe_format fmt = ve->src_format; so->element[i].pipe = elements[i]; - so->element[i].state = nvc0_format_table[fmt].vtx; + so->element[i].state = nvc0_vertex_format[fmt].vtx; if (!so->element[i].state) { switch (util_format_get_nr_components(fmt)) { @@ -93,7 +93,7 @@ nvc0_vertex_state_create(struct pipe_context *pipe, FREE(so); return NULL; } - so->element[i].state = nvc0_format_table[fmt].vtx; + so->element[i].state = nvc0_vertex_format[fmt].vtx; so->need_conversion = true; pipe_debug_message(&nouveau_context(pipe)->debug, FALLBACK, "Converting vertex element %d, no hw format %s", @@ -222,7 +222,7 @@ static inline void nvc0_release_user_vbufs(struct nvc0_context *nvc0) { if (nvc0->vbo_user) { - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP); nouveau_scratch_done(&nvc0->base); } } @@ -257,7 +257,7 @@ nvc0_update_user_vbufs(struct nvc0_context *nvc0) address[b] = nouveau_scratch_data(&nvc0->base, vb->user_buffer, base, size, &bo); if (bo) - BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, bo_flags, bo); NOUVEAU_DRV_STAT(&nvc0->screen->base, user_buffer_upload_bytes, size); } @@ -292,7 +292,7 @@ nvc0_update_user_vbufs_shared(struct nvc0_context *nvc0) address = nouveau_scratch_data(&nvc0->base, nvc0->vtxbuf[b].user_buffer, base, size, &bo); if (bo) - BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo); + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, bo_flags, bo); BEGIN_1IC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_SELECT), 5); PUSH_DATA (push, b); @@ -368,7 +368,7 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0) if (!(refd & (1 << b))) { refd |= 1 << b; - BCTX_REFN(nvc0->bufctx_3d, VTX, res, RD); + BCTX_REFN(nvc0->bufctx_3d, 3D_VTX, res, RD); } } if (nvc0->vbo_user) @@ -412,7 +412,7 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0) PUSH_DATAh(push, buf->address + limit); PUSH_DATA (push, buf->address + limit); - BCTX_REFN(nvc0->bufctx_3d, VTX, buf, RD); + BCTX_REFN(nvc0->bufctx_3d, 3D_VTX, buf, RD); } /* If there are more elements than buffers, we might not have unset * fetching on the later elements. @@ -435,7 +435,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) uint8_t vbo_mode; bool update_vertex; - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX); assert(vertex); if (unlikely(vertex->need_conversion) || @@ -446,7 +446,7 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0) } const_vbos = vbo_mode ? 0 : nvc0->constant_vbos; - update_vertex = (nvc0->dirty & NVC0_NEW_VERTEX) || + update_vertex = (nvc0->dirty_3d & NVC0_NEW_3D_VERTEX) || (const_vbos != nvc0->state.constant_vbos) || (vbo_mode != nvc0->state.vbo_mode); @@ -537,7 +537,7 @@ nvc0_idxbuf_validate(struct nvc0_context *nvc0) PUSH_DATA (push, buf->address + buf->base.width0 - 1); PUSH_DATA (push, nvc0->idxbuf.index_size >> 1); - BCTX_REFN(nvc0->bufctx_3d, IDX, buf, RD); + BCTX_REFN(nvc0->bufctx_3d, 3D_IDX, buf, RD); } #define NVC0_PRIM_GL_CASE(n) \ @@ -833,8 +833,10 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info) /* Queue things up to let the macros write params to the driver constbuf */ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 512); - PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9)); - PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9)); + PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9)); + PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9)); + BEGIN_NVC0(push, NVC0_3D(CB_POS), 1); + PUSH_DATA (push, 256 + 128); if (info->indexed) { assert(nvc0->idxbuf.buffer); @@ -947,12 +949,12 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->indexed && (nvc0->vb_elt_limit >= (info->count * 2)); /* Check whether we want to switch vertex-submission mode. */ - if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_ARRAYS | NVC0_NEW_VERTEX))) { + if (nvc0->vbo_user && !(nvc0->dirty_3d & (NVC0_NEW_3D_ARRAYS | NVC0_NEW_3D_VERTEX))) { if (nvc0->vbo_push_hint != !!nvc0->state.vbo_mode) if (nvc0->state.vbo_mode != 3) - nvc0->dirty |= NVC0_NEW_ARRAYS; + nvc0->dirty_3d |= NVC0_NEW_3D_ARRAYS; - if (!(nvc0->dirty & NVC0_NEW_ARRAYS) && nvc0->state.vbo_mode == 0) { + if (!(nvc0->dirty_3d & NVC0_NEW_3D_ARRAYS) && nvc0->state.vbo_mode == 0) { if (nvc0->vertex->shared_slots) nvc0_update_user_vbufs_shared(nvc0); else @@ -973,8 +975,8 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) PUSH_SPACE(push, 9); BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); PUSH_DATA (push, 512); - PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9)); - PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (5 << 16) + (0 << 9)); + PUSH_DATAh(push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9)); + PUSH_DATA (push, nvc0->screen->uniform_bo->offset + (6 << 16) + (0 << 9)); if (!info->indirect) { BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 3); PUSH_DATA (push, 256 + 128); @@ -984,6 +986,14 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) } } + if (nvc0->screen->base.class_3d < NVE4_3D_CLASS && + nvc0->seamless_cube_map != nvc0->state.seamless_cube_map) { + nvc0->state.seamless_cube_map = nvc0->seamless_cube_map; + PUSH_SPACE(push, 1); + IMMED_NVC0(push, NVC0_3D(TEX_MISC), + nvc0->seamless_cube_map ? NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP : 0); + } + push->kick_notify = nvc0_draw_vbo_kick_notify; for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) { diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c index 9c19ba20a7e..20b6742d8d7 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c @@ -225,7 +225,7 @@ nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count) PUSH_DATAh(push, va + size - 1); PUSH_DATA (push, va + size - 1); - BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, bo); nouveau_pushbuf_validate(push); @@ -554,7 +554,7 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; ++ctx.instance_id; } - nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP); + nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_VTX_TMP); nouveau_scratch_done(&nvc0->base); } while (inst_count); @@ -629,7 +629,7 @@ nvc0_push_upload_vertex_ids(struct push_context *ctx, data = (uint32_t *)nouveau_scratch_get(&nvc0->base, info->count * index_size, &va, &bo); - BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, + BCTX_REFN_bo(nvc0->bufctx_3d, 3D_VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, bo); nouveau_pushbuf_validate(push); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h index 79abe78b77a..4d07546c310 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h @@ -50,9 +50,9 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) #define NVC0_3D(n) SUBC_3D(NVC0_3D_##n) #define NVE4_3D(n) SUBC_3D(NVE4_3D_##n) -#define SUBC_COMPUTE(m) 1, (m) -#define NVC0_COMPUTE(n) SUBC_COMPUTE(NVC0_COMPUTE_##n) -#define NVE4_COMPUTE(n) SUBC_COMPUTE(NVE4_COMPUTE_##n) +#define SUBC_CP(m) 1, (m) +#define NVC0_CP(n) SUBC_CP(NVC0_COMPUTE_##n) +#define NVE4_CP(n) SUBC_CP(NVE4_COMPUTE_##n) #define SUBC_M2MF(m) 2, (m) #define SUBC_P2MF(m) 2, (m) diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index d3e5676873e..652bc6d83d6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -39,7 +39,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, { struct nouveau_device *dev = screen->base.device; struct nouveau_object *chan = screen->base.channel; - unsigned i; + int i; int ret; uint32_t obj_class; @@ -51,6 +51,9 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, case 0xe0: obj_class = NVE4_COMPUTE_CLASS; /* GK104 */ break; + case 0x110: + obj_class = GM107_COMPUTE_CLASS; + break; default: NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset); return -1; @@ -68,21 +71,21 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, if (ret) return ret; - BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1); + BEGIN_NVC0(push, SUBC_CP(NV01_SUBCHAN_OBJECT), 1); PUSH_DATA (push, screen->compute->oclass); - BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(TEMP_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->tls->offset); PUSH_DATA (push, screen->tls->offset); /* No idea why there are 2. Divide size by 2 to be safe. * Actually this might be per-MP TEMP size and looks like I'm only using * 2 MPs instead of all 8. */ - BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(0)), 3); + BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(0)), 3); PUSH_DATAh(push, screen->tls->size / screen->mp_count); PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); PUSH_DATA (push, 0xff); - BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(1)), 3); + BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3); PUSH_DATAh(push, screen->tls->size / screen->mp_count); PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); PUSH_DATA (push, 0xff); @@ -92,52 +95,53 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, * FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be * accessible. We cannot prevent that at the moment, so expect failure. */ - BEGIN_NVC0(push, NVE4_COMPUTE(LOCAL_BASE), 1); + BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1); PUSH_DATA (push, 1 << 24); - BEGIN_NVC0(push, NVE4_COMPUTE(SHARED_BASE), 1); + BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1); PUSH_DATA (push, 2 << 24); - BEGIN_NVC0(push, NVE4_COMPUTE(CODE_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->text->offset); PUSH_DATA (push, screen->text->offset); - BEGIN_NVC0(push, SUBC_COMPUTE(0x0310), 1); + BEGIN_NVC0(push, SUBC_CP(0x0310), 1); PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300); /* NOTE: these do not affect the state used by the 3D object */ - BEGIN_NVC0(push, NVE4_COMPUTE(TIC_ADDRESS_HIGH), 3); + BEGIN_NVC0(push, NVE4_CP(TIC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset); PUSH_DATA (push, screen->txc->offset); PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1); - BEGIN_NVC0(push, NVE4_COMPUTE(TSC_ADDRESS_HIGH), 3); + BEGIN_NVC0(push, NVE4_CP(TSC_ADDRESS_HIGH), 3); PUSH_DATAh(push, screen->txc->offset + 65536); PUSH_DATA (push, screen->txc->offset + 65536); PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1); if (obj_class >= NVF0_COMPUTE_CLASS) { - BEGIN_NVC0(push, SUBC_COMPUTE(0x0248), 1); - PUSH_DATA (push, 0x100); - BEGIN_NIC0(push, SUBC_COMPUTE(0x0248), 63); - for (i = 63; i >= 1; --i) + /* The blob calls GK110_COMPUTE.FIRMWARE[0x6], along with the args (0x1) + * passed with GK110_COMPUTE.GRAPH.SCRATCH[0x2]. This is currently + * disabled because our firmware doesn't support these commands and the + * GPU hangs if they are used. */ + BEGIN_NIC0(push, SUBC_CP(0x0248), 64); + for (i = 63; i >= 0; i--) PUSH_DATA(push, 0x38000 | i); - IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0); - IMMED_NVC0(push, SUBC_COMPUTE(0x518), 0); + IMMED_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 0); } - BEGIN_NVC0(push, NVE4_COMPUTE(TEX_CB_INDEX), 1); + BEGIN_NVC0(push, NVE4_CP(TEX_CB_INDEX), 1); PUSH_DATA (push, 0); /* does not interefere with 3D */ - if (obj_class >= NVF0_COMPUTE_CLASS) - IMMED_NVC0(push, SUBC_COMPUTE(0x02c4), 1); + if (obj_class == NVF0_COMPUTE_CLASS) + IMMED_NVC0(push, SUBC_CP(0x02c4), 1); /* MS sample coordinate offsets: these do not work with _ALT modes ! */ - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS); PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, 64); PUSH_DATA (push, 1); - BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 17); PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); PUSH_DATA (push, 0); /* 0 */ PUSH_DATA (push, 0); @@ -157,13 +161,13 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, PUSH_DATA (push, 1); #ifdef DEBUG - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR); PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, 28); PUSH_DATA (push, 1); - BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 8); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 8); PUSH_DATA (push, 1); PUSH_DATA (push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO); PUSH_DATAh(push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO); @@ -174,7 +178,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, PUSH_DATA (push, 0); /* warp cfstack size */ #endif - BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); return 0; @@ -201,13 +205,13 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0) * NVE4's surface load/store instructions receive all the information * directly instead of via binding points, so we have to supply them. */ - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i)); PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i)); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, 64); PUSH_DATA (push, 1); - BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 17); PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); nve4_set_surface_info(push, nvc0->surfaces[t][i], screen); @@ -223,7 +227,7 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0) } } if (nvc0->surfaces_dirty[t]) { - BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); } @@ -252,7 +256,7 @@ nve4_compute_validate_samplers(struct nvc0_context *nvc0) { bool need_flush = nve4_validate_tsc(nvc0, 5); if (need_flush) { - BEGIN_NVC0(nvc0->base.pushbuf, NVE4_COMPUTE(TSC_FLUSH), 1); + BEGIN_NVC0(nvc0->base.pushbuf, NVE4_CP(TSC_FLUSH), 1); PUSH_DATA (nvc0->base.pushbuf, 0); } } @@ -281,17 +285,17 @@ nve4_compute_set_tex_handles(struct nvc0_context *nvc0) address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, address); PUSH_DATA (push, address); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, n * 4); PUSH_DATA (push, 0x1); - BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + n); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + n); PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); PUSH_DATAp(push, &nvc0->tex_handles[s][i], n); - BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); nvc0->textures_dirty[s] = 0; @@ -338,29 +342,29 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input, struct nvc0_program *cp = nvc0->compprog; if (cp->parm_size) { - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->parm->offset); PUSH_DATA (push, screen->parm->offset); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, cp->parm_size); PUSH_DATA (push, 0x1); - BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (cp->parm_size / 4)); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (cp->parm_size / 4)); PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); PUSH_DATAp(push, input, cp->parm_size / 4); } - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0)); PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0)); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, 7 * 4); PUSH_DATA (push, 0x1); - BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + 7); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7); PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); PUSH_DATAp(push, block_layout, 3); PUSH_DATAp(push, grid_layout, 3); PUSH_DATA (push, 0); - BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); } @@ -429,10 +433,7 @@ nve4_compute_alloc_launch_desc(struct nouveau_context *nv, } void -nve4_launch_grid(struct pipe_context *pipe, - const uint *block_layout, const uint *grid_layout, - uint32_t label, - const void *input) +nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) { struct nvc0_context *nvc0 = nvc0_context(pipe); struct nouveau_pushbuf *push = nvc0->base.pushbuf; @@ -453,33 +454,34 @@ nve4_launch_grid(struct pipe_context *pipe, if (ret) goto out; - nve4_compute_setup_launch_desc(nvc0, desc, label, block_layout, grid_layout); + nve4_compute_setup_launch_desc(nvc0, desc, info->pc, + info->block, info->grid); #ifdef DEBUG if (debug_get_num_option("NV50_PROG_DEBUG", 0)) nve4_compute_dump_launch_desc(desc); #endif - nve4_compute_upload_input(nvc0, input, block_layout, grid_layout); + nve4_compute_upload_input(nvc0, info->input, info->block, info->grid); /* upload descriptor and flush */ #if 0 - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, desc_gpuaddr); PUSH_DATA (push, desc_gpuaddr); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, 256); PUSH_DATA (push, 1); - BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (256 / 4)); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + (256 / 4)); PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1)); PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4); - BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1); + BEGIN_NVC0(push, NVE4_CP(FLUSH), 1); PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB | NVE4_COMPUTE_FLUSH_CODE); #endif - BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH_DESC_ADDRESS), 1); + BEGIN_NVC0(push, NVE4_CP(LAUNCH_DESC_ADDRESS), 1); PUSH_DATA (push, desc_gpuaddr >> 8); - BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH), 1); + BEGIN_NVC0(push, NVE4_CP(LAUNCH), 1); PUSH_DATA (push, 0x3); - BEGIN_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1); + BEGIN_NVC0(push, SUBC_CP(NV50_GRAPH_SERIALIZE), 1); PUSH_DATA (push, 0); out: @@ -517,13 +519,13 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0) tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic); PUSH_SPACE(push, 16); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2); PUSH_DATAh(push, txc->offset + (tic->id * 32)); PUSH_DATA (push, txc->offset + (tic->id * 32)); - BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2); + BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2); PUSH_DATA (push, 32); PUSH_DATA (push, 1); - BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 9); + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 9); PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1)); PUSH_DATAp(push, &tic->tic[0], 8); @@ -546,11 +548,11 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0) nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID; if (n[0]) { - BEGIN_NIC0(push, NVE4_COMPUTE(TIC_FLUSH), n[0]); + BEGIN_NIC0(push, NVE4_CP(TIC_FLUSH), n[0]); PUSH_DATAp(push, commands[0], n[0]); } if (n[1]) { - BEGIN_NIC0(push, NVE4_COMPUTE(TEX_CACHE_CTL), n[1]); + BEGIN_NIC0(push, NVE4_CP(TEX_CACHE_CTL), n[1]); PUSH_DATAp(push, commands[1], n[1]); } diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h index 7364a68a579..84f8593b9b6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h @@ -2,7 +2,6 @@ #ifndef NVE4_COMPUTE_H #define NVE4_COMPUTE_H -#include "nv50/nv50_defs.xml.h" #include "nvc0/nve4_compute.xml.h" /* Input space is implemented as c0[], to which we bind the screen->parm bo. diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index a2b7f87855d..15a94d90721 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -319,11 +319,14 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; } break; case PIPE_SHADER_VERTEX: @@ -378,11 +381,14 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; } break; } diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 56c7fb93f73..997e5f0e383 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -553,25 +553,24 @@ void evergreen_emit_cs_shader( } static void evergreen_launch_grid( - struct pipe_context *ctx_, - const uint *block_layout, const uint *grid_layout, - uint32_t pc, const void *input) + struct pipe_context *ctx_, const struct pipe_grid_info *info) { struct r600_context *ctx = (struct r600_context *)ctx_; #ifdef HAVE_OPENCL struct r600_pipe_compute *shader = ctx->cs_shader_state.shader; boolean use_kill; - ctx->cs_shader_state.pc = pc; + ctx->cs_shader_state.pc = info->pc; /* Get the config information for this kernel. */ - r600_shader_binary_read_config(&shader->binary, &shader->bc, pc, &use_kill); + r600_shader_binary_read_config(&shader->binary, &shader->bc, + info->pc, &use_kill); #endif - COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc); + COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", info->pc); - evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input); - compute_emit_cs(ctx, block_layout, grid_layout); + evergreen_compute_upload_input(ctx_, info->block, info->grid, info->input); + compute_emit_cs(ctx, info->block, info->grid); } static void evergreen_set_compute_resources(struct pipe_context * ctx_, diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index 0fe7c74418d..7eab29c6eb4 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -784,12 +784,12 @@ LLVMModuleRef r600_tgsi_llvm( { struct tgsi_shader_info shader_info; struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base; - radeon_llvm_context_init(ctx); + radeon_llvm_context_init(ctx, "r600--"); LLVMTypeRef Arguments[32]; unsigned ArgumentsCount = 0; for (unsigned i = 0; i < ctx->inputs_count; i++) Arguments[ArgumentsCount++] = LLVMVectorType(bld_base->base.elem_type, 4); - radeon_llvm_create_func(ctx, Arguments, ArgumentsCount); + radeon_llvm_create_func(ctx, NULL, 0, Arguments, ArgumentsCount); for (unsigned i = 0; i < ctx->inputs_count; i++) { LLVMValueRef P = LLVMGetParam(ctx->main_fn, i); LLVMAddAttribute(P, LLVMInRegAttribute); diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index c8580d807d7..7fb4108a188 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -532,6 +532,8 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e } else { return PIPE_SHADER_IR_TGSI; } + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; case PIPE_SHADER_CAP_DOUBLES: if (rscreen->b.family == CHIP_CYPRESS || rscreen->b.family == CHIP_CAYMAN || rscreen->b.family == CHIP_ARUBA) @@ -541,6 +543,7 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: /* due to a bug in the shader compiler, some loops hang diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 324d2719f44..ea028272ccd 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -411,6 +411,7 @@ static const struct debug_named_value common_debug_options[] = { { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." }, { "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." }, { "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." }, + { "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" }, DEBUG_NAMED_VALUE_END /* must be last */ }; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index e92df876c22..7df617737a7 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -89,6 +89,7 @@ #define DBG_NO_DCC_CLEAR (1llu << 44) #define DBG_NO_RB_PLUS (1llu << 45) #define DBG_SI_SCHED (1llu << 46) +#define DBG_MONOLITHIC_SHADERS (1llu << 47) #define R600_MAP_BUFFER_ALIGNMENT 64 @@ -96,7 +97,7 @@ struct r600_common_context; struct r600_perfcounters; struct radeon_shader_reloc { - char *name; + char name[32]; uint64_t offset; }; diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index af206e43860..1df0c300e85 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -1293,6 +1293,7 @@ unsigned r600_translate_colorswap(enum pipe_format format) break; case 4: /* check the middle channels, the 1st and 4th channel can be NONE */ +#ifdef PIPE_ARCH_LITTLE_ENDIAN if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) return V_0280A0_SWAP_STD; /* XYZW */ else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) @@ -1301,6 +1302,16 @@ unsigned r600_translate_colorswap(enum pipe_format format) return V_0280A0_SWAP_ALT; /* ZYXW */ else if (HAS_SWIZZLE(1,X) && HAS_SWIZZLE(2,Y)) return V_0280A0_SWAP_ALT_REV; /* WXYZ */ +#else + if (HAS_SWIZZLE(1,W) && HAS_SWIZZLE(2,X)) + return V_0280A0_SWAP_STD; /* ZWXY */ + else if (HAS_SWIZZLE(1,X) && HAS_SWIZZLE(2,W)) + return V_0280A0_SWAP_STD_REV; /* YXWZ */ + else if (HAS_SWIZZLE(1,W) && HAS_SWIZZLE(2,Z)) + return V_0280A0_SWAP_ALT; /* XWZY */ + else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) + return V_0280A0_SWAP_ALT_REV; /* YZWX */ +#endif break; } return ~0U; diff --git a/src/gallium/drivers/radeon/radeon_elf_util.c b/src/gallium/drivers/radeon/radeon_elf_util.c index 2e45d439e7a..8aaa85d02f6 100644 --- a/src/gallium/drivers/radeon/radeon_elf_util.c +++ b/src/gallium/drivers/radeon/radeon_elf_util.c @@ -98,7 +98,8 @@ static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols, symbol_name = elf_strptr(elf, symbol_sh_link, symbol.st_name); reloc->offset = rel.r_offset; - reloc->name = strdup(symbol_name); + strncpy(reloc->name, symbol_name, sizeof(reloc->name)-1); + reloc->name[sizeof(reloc->name)-1] = 0; } } @@ -194,26 +195,3 @@ const unsigned char *radeon_shader_binary_config_start( } return binary->config; } - -void radeon_shader_binary_free_relocs(struct radeon_shader_reloc *relocs, - unsigned reloc_count) -{ - unsigned i; - for (i = 0; i < reloc_count; i++) { - FREE(relocs[i].name); - } - FREE(relocs); -} - -void radeon_shader_binary_free_members(struct radeon_shader_binary *binary, - unsigned free_relocs) -{ - FREE(binary->code); - FREE(binary->config); - FREE(binary->rodata); - - if (free_relocs) { - radeon_shader_binary_free_relocs(binary->relocs, - binary->reloc_count); - } -} diff --git a/src/gallium/drivers/radeon/radeon_elf_util.h b/src/gallium/drivers/radeon/radeon_elf_util.h index ea4ab2f14b2..c2af9e0dfe0 100644 --- a/src/gallium/drivers/radeon/radeon_elf_util.h +++ b/src/gallium/drivers/radeon/radeon_elf_util.h @@ -47,18 +47,4 @@ const unsigned char *radeon_shader_binary_config_start( const struct radeon_shader_binary *binary, uint64_t symbol_offset); -/** - * Free all memory allocated for members of \p binary. This function does - * not free \p binary. - * - * @param free_relocs If false, reolc information will not be freed. - */ -void radeon_shader_binary_free_members(struct radeon_shader_binary *binary, - unsigned free_relocs); - -/** - * Free \p relocs and all member data. - */ -void radeon_shader_binary_free_relocs(struct radeon_shader_reloc *relocs, - unsigned reloc_count); #endif /* RADEON_ELF_UTIL_H */ diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index e967ad2214e..bdee2f8020a 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -113,6 +113,7 @@ struct radeon_llvm_context { struct tgsi_declaration_range *arrays; LLVMValueRef main_fn; + LLVMTypeRef return_type; struct gallivm_state gallivm; }; @@ -158,10 +159,12 @@ void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context * bld_bas LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg); -void radeon_llvm_context_init(struct radeon_llvm_context * ctx); +void radeon_llvm_context_init(struct radeon_llvm_context * ctx, + const char *triple); void radeon_llvm_create_func(struct radeon_llvm_context * ctx, - LLVMTypeRef *ParamTypes, unsigned ParamCount); + LLVMTypeRef *return_types, unsigned num_return_elems, + LLVMTypeRef *ParamTypes, unsigned ParamCount); void radeon_llvm_dispose(struct radeon_llvm_context * ctx); diff --git a/src/gallium/drivers/radeon/radeon_llvm_util.c b/src/gallium/drivers/radeon/radeon_llvm_util.c index 0dfd9ad4867..da19533b862 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_util.c +++ b/src/gallium/drivers/radeon/radeon_llvm_util.c @@ -55,8 +55,10 @@ unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx, static void radeon_llvm_optimize(LLVMModuleRef mod) { +#if HAVE_LLVM < 0x0309 const char *data_layout = LLVMGetDataLayout(mod); LLVMTargetDataRef TD = LLVMCreateTargetData(data_layout); +#endif LLVMPassManagerBuilderRef builder = LLVMPassManagerBuilderCreate(); LLVMPassManagerRef pass_manager = LLVMCreatePassManager(); @@ -77,14 +79,18 @@ static void radeon_llvm_optimize(LLVMModuleRef mod) } } +#if HAVE_LLVM < 0x0309 LLVMAddTargetData(TD, pass_manager); +#endif LLVMAddAlwaysInlinerPass(pass_manager); LLVMPassManagerBuilderPopulateModulePassManager(builder, pass_manager); LLVMRunPassManager(pass_manager, mod); LLVMPassManagerBuilderDispose(builder); LLVMDisposePassManager(pass_manager); +#if HAVE_LLVM < 0x0309 LLVMDisposeTargetData(TD); +#endif } LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index, diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index f5e3f6af1a0..c74397fb5c9 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -30,6 +30,7 @@ #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_intr.h" +#include "gallivm/lp_bld_misc.h" #include "gallivm/lp_bld_swizzle.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" @@ -1520,7 +1521,7 @@ static void emit_up2h(const struct lp_build_tgsi_action *action, } } -void radeon_llvm_context_init(struct radeon_llvm_context * ctx) +void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char *triple) { struct lp_type type; @@ -1534,6 +1535,13 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) ctx->gallivm.context = LLVMContextCreate(); ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi", ctx->gallivm.context); + LLVMSetTarget(ctx->gallivm.module, + +#if HAVE_LLVM < 0x0306 + "r600--"); +#else + triple); +#endif ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context); struct lp_build_tgsi_context * bld_base = &ctx->soa.bld_base; @@ -1693,14 +1701,22 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) } void radeon_llvm_create_func(struct radeon_llvm_context * ctx, + LLVMTypeRef *return_types, unsigned num_return_elems, LLVMTypeRef *ParamTypes, unsigned ParamCount) { - LLVMTypeRef main_fn_type; + LLVMTypeRef main_fn_type, ret_type; LLVMBasicBlockRef main_fn_body; + if (num_return_elems) + ret_type = LLVMStructTypeInContext(ctx->gallivm.context, + return_types, + num_return_elems, true); + else + ret_type = LLVMVoidTypeInContext(ctx->gallivm.context); + /* Setup the function */ - main_fn_type = LLVMFunctionType(LLVMVoidTypeInContext(ctx->gallivm.context), - ParamTypes, ParamCount, 0); + ctx->return_type = ret_type; + main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0); ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type); main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context, ctx->main_fn, "main_body"); @@ -1710,11 +1726,16 @@ void radeon_llvm_create_func(struct radeon_llvm_context * ctx, void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx) { struct gallivm_state * gallivm = ctx->soa.bld_base.base.gallivm; + const char *triple = LLVMGetTarget(gallivm->module); + LLVMTargetLibraryInfoRef target_library_info; /* Create the pass manager */ gallivm->passmgr = LLVMCreateFunctionPassManagerForModule( gallivm->module); + target_library_info = gallivm_create_target_library_info(triple); + LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr); + /* This pass should eliminate all the load and store instructions */ LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr); @@ -1730,7 +1751,7 @@ void radeon_llvm_finalize_module(struct radeon_llvm_context * ctx) LLVMDisposeBuilder(gallivm->builder); LLVMDisposePassManager(gallivm->passmgr); - + gallivm_dispose_target_library_info(target_library_info); } void radeon_llvm_dispose(struct radeon_llvm_context * ctx) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 7370a113d3d..9f5f4c682bc 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -196,9 +196,7 @@ static unsigned compute_num_waves_for_scratch( } static void si_launch_grid( - struct pipe_context *ctx, - const uint *block_layout, const uint *grid_layout, - uint32_t pc, const void *input) + struct pipe_context *ctx, const struct pipe_grid_info *info) { struct si_context *sctx = (struct si_context*)ctx; struct radeon_winsys_cs *cs = sctx->b.gfx.cs; @@ -232,7 +230,7 @@ static void si_launch_grid( pm4->compute_pkt = true; /* Read the config information */ - si_shader_binary_read_config(&shader->binary, &shader->config, pc); + si_shader_binary_read_config(&shader->binary, &shader->config, info->pc); /* Upload the kernel arguments */ @@ -242,15 +240,16 @@ static void si_launch_grid( kernel_args = sctx->b.ws->buffer_map(input_buffer->buf, sctx->b.gfx.cs, PIPE_TRANSFER_WRITE); for (i = 0; i < 3; i++) { - kernel_args[i] = grid_layout[i]; - kernel_args[i + 3] = grid_layout[i] * block_layout[i]; - kernel_args[i + 6] = block_layout[i]; + kernel_args[i] = info->grid[i]; + kernel_args[i + 3] = info->grid[i] * info->block[i]; + kernel_args[i + 6] = info->block[i]; } num_waves_for_scratch = compute_num_waves_for_scratch( - &sctx->screen->b.info, block_layout, grid_layout); + &sctx->screen->b.info, info->block, info->grid); - memcpy(kernel_args + (num_work_size_bytes / 4), input, program->input_size); + memcpy(kernel_args + (num_work_size_bytes / 4), info->input, + program->input_size); if (shader->config.scratch_bytes_per_wave > 0) { @@ -291,11 +290,11 @@ static void si_launch_grid( si_pm4_set_reg(pm4, R_00B818_COMPUTE_START_Z, 0); si_pm4_set_reg(pm4, R_00B81C_COMPUTE_NUM_THREAD_X, - S_00B81C_NUM_THREAD_FULL(block_layout[0])); + S_00B81C_NUM_THREAD_FULL(info->block[0])); si_pm4_set_reg(pm4, R_00B820_COMPUTE_NUM_THREAD_Y, - S_00B820_NUM_THREAD_FULL(block_layout[1])); + S_00B820_NUM_THREAD_FULL(info->block[1])); si_pm4_set_reg(pm4, R_00B824_COMPUTE_NUM_THREAD_Z, - S_00B824_NUM_THREAD_FULL(block_layout[2])); + S_00B824_NUM_THREAD_FULL(info->block[2])); /* Global buffers */ for (i = 0; i < MAX_GLOBAL_BUFFERS; i++) { @@ -323,7 +322,7 @@ static void si_launch_grid( } shader_va = shader->bo->gpu_address; - shader_va += pc; + shader_va += info->pc; radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); @@ -375,9 +374,9 @@ static void si_launch_grid( ; si_pm4_cmd_begin(pm4, PKT3_DISPATCH_DIRECT); - si_pm4_cmd_add(pm4, grid_layout[0]); /* Thread groups DIM_X */ - si_pm4_cmd_add(pm4, grid_layout[1]); /* Thread groups DIM_Y */ - si_pm4_cmd_add(pm4, grid_layout[2]); /* Thread gropus DIM_Z */ + si_pm4_cmd_add(pm4, info->grid[0]); /* Thread groups DIM_X */ + si_pm4_cmd_add(pm4, info->grid[1]); /* Thread groups DIM_Y */ + si_pm4_cmd_add(pm4, info->grid[2]); /* Thread gropus DIM_Z */ si_pm4_cmd_add(pm4, 1); /* DISPATCH_INITIATOR */ si_pm4_cmd_end(pm4, false); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index e9d69d2db38..37fd4a25d59 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -22,6 +22,7 @@ */ #include "si_pipe.h" +#include "si_shader.h" #include "si_public.h" #include "sid.h" @@ -448,6 +449,10 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu switch (param) { case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_NATIVE; + + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; + case PIPE_SHADER_CAP_DOUBLES: return HAVE_LLVM >= 0x0307; @@ -511,6 +516,8 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu return 16; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; case PIPE_SHADER_CAP_DOUBLES: return HAVE_LLVM >= 0x0307; case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: @@ -522,6 +529,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; } return 0; @@ -530,6 +538,14 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu static void si_destroy_screen(struct pipe_screen* pscreen) { struct si_screen *sscreen = (struct si_screen *)pscreen; + struct si_shader_part *parts[] = { + sscreen->vs_prologs, + sscreen->vs_epilogs, + sscreen->tcs_epilogs, + sscreen->ps_prologs, + sscreen->ps_epilogs + }; + unsigned i; if (!sscreen) return; @@ -537,6 +553,18 @@ static void si_destroy_screen(struct pipe_screen* pscreen) if (!sscreen->b.ws->unref(sscreen->b.ws)) return; + /* Free shader parts. */ + for (i = 0; i < ARRAY_SIZE(parts); i++) { + while (parts[i]) { + struct si_shader_part *part = parts[i]; + + parts[i] = part->next; + radeon_shader_binary_clean(&part->binary); + FREE(part); + } + } + pipe_mutex_destroy(sscreen->shader_parts_mutex); + si_destroy_shader_cache(sscreen); r600_destroy_common_screen(&sscreen->b); } @@ -584,7 +612,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) sscreen->b.b.resource_create = r600_resource_create_common; if (!r600_common_screen_init(&sscreen->b, ws) || - !si_init_gs_info(sscreen)) { + !si_init_gs_info(sscreen) || + !si_init_shader_cache(sscreen)) { FREE(sscreen); return NULL; } @@ -594,6 +623,10 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) sscreen->b.has_cp_dma = true; sscreen->b.has_streamout = true; + pipe_mutex_init(sscreen->shader_parts_mutex); + sscreen->use_monolithic_shaders = + HAVE_LLVM < 0x0308 || + (sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0; if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE)) sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index b5790d6b564..ef860a58b83 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -80,10 +80,36 @@ #define SI_MAX_BORDER_COLORS 4096 struct si_compute; +struct hash_table; struct si_screen { struct r600_common_screen b; unsigned gs_table_depth; + + /* Whether shaders are monolithic (1-part) or separate (3-part). */ + bool use_monolithic_shaders; + + pipe_mutex shader_parts_mutex; + struct si_shader_part *vs_prologs; + struct si_shader_part *vs_epilogs; + struct si_shader_part *tcs_epilogs; + struct si_shader_part *ps_prologs; + struct si_shader_part *ps_epilogs; + + /* Shader cache in memory. + * + * Design & limitations: + * - The shader cache is per screen (= per process), never saved to + * disk, and skips redundant shader compilations from TGSI to bytecode. + * - It can only be used with one-variant-per-shader support, in which + * case only the main (typically middle) part of shaders is cached. + * - Only VS, TCS, TES, PS are cached, out of which only the hw VS + * variants of VS and TES are cached, so LS and ES aren't. + * - GS and CS aren't cached, but it's certainly possible to cache + * those as well. + */ + pipe_mutex shader_cache_mutex; + struct hash_table *shader_cache; }; struct si_blend_color { diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index baa1090e2fb..57458ae1381 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -70,6 +70,12 @@ struct si_shader_context unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */ bool is_gs_copy_shader; + + /* Whether to generate the optimized shader variant compiled as a whole + * (without a prolog and epilog) + */ + bool is_monolithic; + int param_streamout_config; int param_streamout_write_index; int param_streamout_offset[4]; @@ -77,6 +83,7 @@ struct si_shader_context int param_rel_auto_id; int param_vs_prim_id; int param_instance_id; + int param_vertex_index0; int param_tes_u; int param_tes_v; int param_tes_rel_patch_id; @@ -96,14 +103,17 @@ struct si_shader_context LLVMValueRef esgs_ring; LLVMValueRef gsvs_ring[4]; LLVMValueRef gs_next_vertex[4]; + LLVMValueRef return_value; LLVMTypeRef voidt; LLVMTypeRef i1; LLVMTypeRef i8; LLVMTypeRef i32; + LLVMTypeRef i64; LLVMTypeRef i128; LLVMTypeRef f32; LLVMTypeRef v16i8; + LLVMTypeRef v2i32; LLVMTypeRef v4i32; LLVMTypeRef v4f32; LLVMTypeRef v8i32; @@ -118,9 +128,17 @@ static struct si_shader_context *si_shader_context( static void si_init_shader_ctx(struct si_shader_context *ctx, struct si_screen *sscreen, struct si_shader *shader, - LLVMTargetMachineRef tm, - struct tgsi_shader_info *info); + LLVMTargetMachineRef tm); +/* Ideally pass the sample mask input to the PS epilog as v13, which + * is its usual location, so that the shader doesn't have to add v_mov. + */ +#define PS_EPILOG_SAMPLEMASK_MIN_LOC 13 + +/* The VS location of the PrimitiveID input is the same in the epilog, + * so that the main shader part doesn't have to move it. + */ +#define VS_EPILOG_PRIMID_LOC 2 #define PERSPECTIVE_BASE 0 #define LINEAR_BASE 9 @@ -196,6 +214,10 @@ static LLVMValueRef unpack_param(struct si_shader_context *ctx, LLVMValueRef value = LLVMGetParam(ctx->radeon_bld.main_fn, param); + if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMFloatTypeKind) + value = bitcast(&ctx->radeon_bld.soa.bld_base, + TGSI_TYPE_UNSIGNED, value); + if (rshift) value = LLVMBuildLShr(gallivm->builder, value, lp_build_const_int32(gallivm, rshift), ""); @@ -375,7 +397,7 @@ static LLVMValueRef build_indexed_load_const( static LLVMValueRef get_instance_index_for_fetch( struct radeon_llvm_context *radeon_bld, - unsigned divisor) + unsigned param_start_instance, unsigned divisor) { struct si_shader_context *ctx = si_shader_context(&radeon_bld->soa.bld_base); @@ -389,8 +411,8 @@ static LLVMValueRef get_instance_index_for_fetch( result = LLVMBuildUDiv(gallivm->builder, result, lp_build_const_int32(gallivm, divisor), ""); - return LLVMBuildAdd(gallivm->builder, result, LLVMGetParam( - radeon_bld->main_fn, SI_PARAM_START_INSTANCE), ""); + return LLVMBuildAdd(gallivm->builder, result, + LLVMGetParam(radeon_bld->main_fn, param_start_instance), ""); } static void declare_input_vs( @@ -402,7 +424,8 @@ static void declare_input_vs( struct gallivm_state *gallivm = base->gallivm; struct si_shader_context *ctx = si_shader_context(&radeon_bld->soa.bld_base); - unsigned divisor = ctx->shader->key.vs.instance_divisors[input_index]; + unsigned divisor = + ctx->shader->key.vs.prolog.instance_divisors[input_index]; unsigned chan; @@ -424,10 +447,16 @@ static void declare_input_vs( /* Build the attribute offset */ attribute_offset = lp_build_const_int32(gallivm, 0); - if (divisor) { + if (!ctx->is_monolithic) { + buffer_index = LLVMGetParam(radeon_bld->main_fn, + ctx->param_vertex_index0 + + input_index); + } else if (divisor) { /* Build index from instance ID, start instance and divisor */ - ctx->shader->uses_instanceid = true; - buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld, divisor); + ctx->shader->info.uses_instanceid = true; + buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld, + SI_PARAM_START_INSTANCE, + divisor); } else { /* Load the buffer index for vertices. */ LLVMValueRef vertex_id = LLVMGetParam(ctx->radeon_bld.main_fn, @@ -853,7 +882,8 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location) static unsigned select_interp_param(struct si_shader_context *ctx, unsigned param) { - if (!ctx->shader->key.ps.force_persample_interp) + if (!ctx->shader->key.ps.prolog.force_persample_interp || + !ctx->is_monolithic) return param; /* If the shader doesn't use center/centroid, just return the parameter. @@ -923,7 +953,7 @@ static void interp_fs_input(struct si_shader_context *ctx, intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant"; if (semantic_name == TGSI_SEMANTIC_COLOR && - ctx->shader->key.ps.color_two_side) { + ctx->shader->key.ps.prolog.color_two_side) { LLVMValueRef args[4]; LLVMValueRef is_face_positive; LLVMValueRef back_attr_number; @@ -997,6 +1027,7 @@ static void declare_input_fs( unsigned input_index, const struct tgsi_full_declaration *decl) { + struct lp_build_context *base = &radeon_bld->soa.bld_base.base; struct si_shader_context *ctx = si_shader_context(&radeon_bld->soa.bld_base); struct si_shader *shader = ctx->shader; @@ -1004,6 +1035,26 @@ static void declare_input_fs( LLVMValueRef interp_param = NULL; int interp_param_idx; + /* Get colors from input VGPRs (set by the prolog). */ + if (!ctx->is_monolithic && + decl->Semantic.Name == TGSI_SEMANTIC_COLOR) { + unsigned i = decl->Semantic.Index; + unsigned colors_read = shader->selector->info.colors_read; + unsigned mask = colors_read >> (i * 4); + unsigned offset = SI_PARAM_POS_FIXED_PT + 1 + + (i ? util_bitcount(colors_read & 0xf) : 0); + + radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] = + mask & 0x1 ? LLVMGetParam(main_fn, offset++) : base->undef; + radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] = + mask & 0x2 ? LLVMGetParam(main_fn, offset++) : base->undef; + radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] = + mask & 0x4 ? LLVMGetParam(main_fn, offset++) : base->undef; + radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] = + mask & 0x8 ? LLVMGetParam(main_fn, offset++) : base->undef; + return; + } + interp_param_idx = lookup_interp_param_index(decl->Interp.Interpolate, decl->Interp.Location); if (interp_param_idx == -1) @@ -1330,12 +1381,12 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { const union si_shader_key *key = &ctx->shader->key; - unsigned col_formats = key->ps.spi_shader_col_format; + unsigned col_formats = key->ps.epilog.spi_shader_col_format; int cbuf = target - V_008DFC_SQ_EXP_MRT; assert(cbuf >= 0 && cbuf < 8); spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf; - is_int8 = (key->ps.color_is_int8 >> cbuf) & 0x1; + is_int8 = (key->ps.epilog.color_is_int8 >> cbuf) & 0x1; } args[4] = uint->zero; /* COMPR flag */ @@ -1488,13 +1539,13 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base, struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; - if (ctx->shader->key.ps.alpha_func != PIPE_FUNC_NEVER) { + if (ctx->shader->key.ps.epilog.alpha_func != PIPE_FUNC_NEVER) { LLVMValueRef alpha_ref = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_ALPHA_REF); LLVMValueRef alpha_pass = lp_build_cmp(&bld_base->base, - ctx->shader->key.ps.alpha_func, + ctx->shader->key.ps.epilog.alpha_func, alpha, alpha_ref); LLVMValueRef arg = lp_build_select(&bld_base->base, @@ -1511,7 +1562,8 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base, } static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base, - LLVMValueRef alpha) + LLVMValueRef alpha, + unsigned samplemask_param) { struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; @@ -1519,7 +1571,7 @@ static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context * /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */ coverage = LLVMGetParam(ctx->radeon_bld.main_fn, - SI_PARAM_SAMPLE_COVERAGE); + samplemask_param); coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage); coverage = lp_build_intrinsic(gallivm->builder, "llvm.ctpop.i32", @@ -1841,7 +1893,8 @@ handle_semantic: case TGSI_SEMANTIC_COLOR: case TGSI_SEMANTIC_BCOLOR: target = V_008DFC_SQ_EXP_PARAM + param_count; - shader->vs_output_param_offset[i] = param_count; + assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset)); + shader->info.vs_output_param_offset[i] = param_count; param_count++; break; case TGSI_SEMANTIC_CLIPDIST: @@ -1855,7 +1908,8 @@ handle_semantic: case TGSI_SEMANTIC_TEXCOORD: case TGSI_SEMANTIC_GENERIC: target = V_008DFC_SQ_EXP_PARAM + param_count; - shader->vs_output_param_offset[i] = param_count; + assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset)); + shader->info.vs_output_param_offset[i] = param_count; param_count++; break; default: @@ -1883,7 +1937,7 @@ handle_semantic: } } - shader->nr_param_exports = param_count; + shader->info.nr_param_exports = param_count; /* We need to add the position output manually if it's missing. */ if (!pos_args[0][0]) { @@ -1945,7 +1999,7 @@ handle_semantic: for (i = 0; i < 4; i++) if (pos_args[i][0]) - shader->nr_pos_exports++; + shader->info.nr_pos_exports++; pos_idx = 0; for (i = 0; i < 4; i++) { @@ -1955,7 +2009,7 @@ handle_semantic: /* Specify the target we are exporting */ pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++); - if (pos_idx == shader->nr_pos_exports) + if (pos_idx == shader->info.nr_pos_exports) /* Specify that this is the last export */ pos_args[i][2] = uint->one; @@ -1989,7 +2043,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, invocation_id, bld_base->uint_bld.zero, "")); /* Determine the layout of one tess factor element in the buffer. */ - switch (shader->key.tcs.prim_mode) { + switch (shader->key.tcs.epilog.prim_mode) { case PIPE_PRIM_LINES: stride = 2; /* 2 dwords, 1 vec2 store */ outer_comps = 2; @@ -2061,14 +2115,51 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base) { struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef invocation_id; + LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset; + rel_patch_id = get_rel_patch_id(ctx); invocation_id = unpack_param(ctx, SI_PARAM_REL_IDS, 8, 5); + tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx); - si_write_tess_factors(bld_base, - get_rel_patch_id(ctx), - invocation_id, - get_tcs_out_current_patch_data_offset(ctx)); + if (!ctx->is_monolithic) { + /* Return epilog parameters from this function. */ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMValueRef ret = ctx->return_value; + LLVMValueRef rw_buffers, rw0, rw1, tf_soffset; + unsigned vgpr; + + /* RW_BUFFERS pointer */ + rw_buffers = LLVMGetParam(ctx->radeon_bld.main_fn, + SI_PARAM_RW_BUFFERS); + rw_buffers = LLVMBuildPtrToInt(builder, rw_buffers, ctx->i64, ""); + rw_buffers = LLVMBuildBitCast(builder, rw_buffers, ctx->v2i32, ""); + rw0 = LLVMBuildExtractElement(builder, rw_buffers, + bld_base->uint_bld.zero, ""); + rw1 = LLVMBuildExtractElement(builder, rw_buffers, + bld_base->uint_bld.one, ""); + ret = LLVMBuildInsertValue(builder, ret, rw0, 0, ""); + ret = LLVMBuildInsertValue(builder, ret, rw1, 1, ""); + + /* Tess factor buffer soffset is after user SGPRs. */ + tf_soffset = LLVMGetParam(ctx->radeon_bld.main_fn, + SI_PARAM_TESS_FACTOR_OFFSET); + ret = LLVMBuildInsertValue(builder, ret, tf_soffset, + SI_TCS_NUM_USER_SGPR, ""); + + /* VGPRs */ + rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id); + invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, invocation_id); + tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, tf_lds_offset); + + vgpr = SI_TCS_NUM_USER_SGPR + 1; + ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, ""); + ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, ""); + ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, ""); + ctx->return_value = ret; + return; + } + + si_write_tess_factors(bld_base, rel_patch_id, invocation_id, tf_lds_offset); } static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base) @@ -2214,16 +2305,26 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base) ""); } - /* Export PrimitiveID when PS needs it. */ - if (si_vs_exports_prim_id(ctx->shader)) { - outputs[i].name = TGSI_SEMANTIC_PRIMID; - outputs[i].sid = 0; - outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT, - get_primitive_id(bld_base, 0)); - outputs[i].values[1] = bld_base->base.undef; - outputs[i].values[2] = bld_base->base.undef; - outputs[i].values[3] = bld_base->base.undef; - i++; + if (ctx->is_monolithic) { + /* Export PrimitiveID when PS needs it. */ + if (si_vs_exports_prim_id(ctx->shader)) { + outputs[i].name = TGSI_SEMANTIC_PRIMID; + outputs[i].sid = 0; + outputs[i].values[0] = bitcast(bld_base, TGSI_TYPE_FLOAT, + get_primitive_id(bld_base, 0)); + outputs[i].values[1] = bld_base->base.undef; + outputs[i].values[2] = bld_base->base.undef; + outputs[i].values[3] = bld_base->base.undef; + i++; + } + } else { + /* Return the primitive ID from the LLVM function. */ + ctx->return_value = + LLVMBuildInsertValue(gallivm->builder, + ctx->return_value, + bitcast(bld_base, TGSI_TYPE_FLOAT, + get_primitive_id(bld_base, 0)), + VS_EPILOG_PRIMID_LOC, ""); } si_llvm_export_vs(bld_base, outputs, i); @@ -2284,6 +2385,7 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base, static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, LLVMValueRef *color, unsigned index, + unsigned samplemask_param, bool is_last) { struct si_shader_context *ctx = si_shader_context(bld_base); @@ -2291,30 +2393,31 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, int i; /* Clamp color */ - if (ctx->shader->key.ps.clamp_color) + if (ctx->shader->key.ps.epilog.clamp_color) for (i = 0; i < 4; i++) color[i] = radeon_llvm_saturate(bld_base, color[i]); /* Alpha to one */ - if (ctx->shader->key.ps.alpha_to_one) + if (ctx->shader->key.ps.epilog.alpha_to_one) color[3] = base->one; /* Alpha test */ if (index == 0 && - ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS) + ctx->shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS) si_alpha_test(bld_base, color[3]); /* Line & polygon smoothing */ - if (ctx->shader->key.ps.poly_line_smoothing) - color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]); + if (ctx->shader->key.ps.epilog.poly_line_smoothing) + color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3], + samplemask_param); /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ - if (ctx->shader->key.ps.last_cbuf > 0) { + if (ctx->shader->key.ps.epilog.last_cbuf > 0) { LLVMValueRef args[8][9]; int c, last = -1; /* Get the export arguments, also find out what the last one is. */ - for (c = 0; c <= ctx->shader->key.ps.last_cbuf; c++) { + for (c = 0; c <= ctx->shader->key.ps.epilog.last_cbuf; c++) { si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + c, args[c]); if (args[c][0] != bld_base->uint_bld.zero) @@ -2322,7 +2425,7 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, } /* Emit all exports. */ - for (c = 0; c <= ctx->shader->key.ps.last_cbuf; c++) { + for (c = 0; c <= ctx->shader->key.ps.epilog.last_cbuf; c++) { if (is_last && last == c) { args[c][1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */ args[c][2] = bld_base->uint_bld.one; /* DONE bit */ @@ -2385,11 +2488,11 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base) * Otherwise, find the last color export. */ if (!info->writes_z && !info->writes_stencil && !info->writes_samplemask) { - unsigned spi_format = shader->key.ps.spi_shader_col_format; + unsigned spi_format = shader->key.ps.epilog.spi_shader_col_format; /* Don't export NULL and return if alpha-test is enabled. */ - if (shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS && - shader->key.ps.alpha_func != PIPE_FUNC_NEVER && + if (shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS && + shader->key.ps.epilog.alpha_func != PIPE_FUNC_NEVER && (spi_format & 0xf) == 0) spi_format |= V_028714_SPI_SHADER_32_AR; @@ -2400,10 +2503,10 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base) continue; /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ - if (shader->key.ps.last_cbuf > 0) { + if (shader->key.ps.epilog.last_cbuf > 0) { /* Just set this if any of the colorbuffers are enabled. */ if (spi_format & - ((1llu << (4 * (shader->key.ps.last_cbuf + 1))) - 1)) + ((1llu << (4 * (shader->key.ps.epilog.last_cbuf + 1))) - 1)) last_color_export = i; continue; } @@ -2445,6 +2548,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base) ctx->radeon_bld.soa.outputs[i][j], ""); si_export_mrt_color(bld_base, color, semantic_index, + SI_PARAM_SAMPLE_COVERAGE, last_color_export == i); break; default: @@ -2458,6 +2562,100 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base) si_export_mrt_z(bld_base, depth, stencil, samplemask); } +/** + * Return PS outputs in this order: + * + * v[0:3] = color0.xyzw + * v[4:7] = color1.xyzw + * ... + * vN+0 = Depth + * vN+1 = Stencil + * vN+2 = SampleMask + * vN+3 = SampleMaskIn (used for OpenGL smoothing) + * + * The alpha-ref SGPR is returned via its original location. + */ +static void si_llvm_return_fs_outputs(struct lp_build_tgsi_context *bld_base) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + struct si_shader *shader = ctx->shader; + struct lp_build_context *base = &bld_base->base; + struct tgsi_shader_info *info = &shader->selector->info; + LLVMBuilderRef builder = base->gallivm->builder; + unsigned i, j, first_vgpr, vgpr; + + LLVMValueRef color[8][4] = {}; + LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; + LLVMValueRef ret; + + /* Read the output values. */ + for (i = 0; i < info->num_outputs; i++) { + unsigned semantic_name = info->output_semantic_name[i]; + unsigned semantic_index = info->output_semantic_index[i]; + + switch (semantic_name) { + case TGSI_SEMANTIC_COLOR: + assert(semantic_index < 8); + for (j = 0; j < 4; j++) { + LLVMValueRef ptr = ctx->radeon_bld.soa.outputs[i][j]; + LLVMValueRef result = LLVMBuildLoad(builder, ptr, ""); + color[semantic_index][j] = result; + } + break; + case TGSI_SEMANTIC_POSITION: + depth = LLVMBuildLoad(builder, + ctx->radeon_bld.soa.outputs[i][2], ""); + break; + case TGSI_SEMANTIC_STENCIL: + stencil = LLVMBuildLoad(builder, + ctx->radeon_bld.soa.outputs[i][1], ""); + break; + case TGSI_SEMANTIC_SAMPLEMASK: + samplemask = LLVMBuildLoad(builder, + ctx->radeon_bld.soa.outputs[i][0], ""); + break; + default: + fprintf(stderr, "Warning: SI unhandled fs output type:%d\n", + semantic_name); + } + } + + /* Fill the return structure. */ + ret = ctx->return_value; + + /* Set SGPRs. */ + ret = LLVMBuildInsertValue(builder, ret, + bitcast(bld_base, TGSI_TYPE_SIGNED, + LLVMGetParam(ctx->radeon_bld.main_fn, + SI_PARAM_ALPHA_REF)), + SI_SGPR_ALPHA_REF, ""); + + /* Set VGPRs */ + first_vgpr = vgpr = SI_SGPR_ALPHA_REF + 1; + for (i = 0; i < ARRAY_SIZE(color); i++) { + if (!color[i][0]) + continue; + + for (j = 0; j < 4; j++) + ret = LLVMBuildInsertValue(builder, ret, color[i][j], vgpr++, ""); + } + if (depth) + ret = LLVMBuildInsertValue(builder, ret, depth, vgpr++, ""); + if (stencil) + ret = LLVMBuildInsertValue(builder, ret, stencil, vgpr++, ""); + if (samplemask) + ret = LLVMBuildInsertValue(builder, ret, samplemask, vgpr++, ""); + + /* Add the input sample mask for smoothing at the end. */ + if (vgpr < first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC) + vgpr = first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC; + ret = LLVMBuildInsertValue(builder, ret, + LLVMGetParam(ctx->radeon_bld.main_fn, + SI_PARAM_SAMPLE_COVERAGE), vgpr++, ""); + + ctx->return_value = ret; +} + static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data); @@ -2536,13 +2734,12 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements) /** * Load an image view, fmask view. or sampler state descriptor. */ -static LLVMValueRef get_sampler_desc(struct si_shader_context *ctx, - LLVMValueRef index, enum desc_type type) +static LLVMValueRef get_sampler_desc_custom(struct si_shader_context *ctx, + LLVMValueRef list, LLVMValueRef index, + enum desc_type type) { struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm; LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, - SI_PARAM_SAMPLERS); switch (type) { case DESC_IMAGE: @@ -2558,12 +2755,21 @@ static LLVMValueRef get_sampler_desc(struct si_shader_context *ctx, /* The sampler state is at [12:15]. */ index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), ""); index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), ""); - ptr = LLVMBuildPointerCast(builder, ptr, - const_array(ctx->v4i32, 0), ""); + list = LLVMBuildPointerCast(builder, list, + const_array(ctx->v4i32, 0), ""); break; } - return build_indexed_load_const(ctx, ptr, index); + return build_indexed_load_const(ctx, list, index); +} + +static LLVMValueRef get_sampler_desc(struct si_shader_context *ctx, + LLVMValueRef index, enum desc_type type) +{ + LLVMValueRef list = LLVMGetParam(ctx->radeon_bld.main_fn, + SI_PARAM_SAMPLERS); + + return get_sampler_desc_custom(ctx, list, index, type); } static void tex_fetch_ptrs( @@ -3546,6 +3752,30 @@ static const struct lp_build_tgsi_action interp_action = { .emit = build_interp_intrinsic, }; +static void si_create_function(struct si_shader_context *ctx, + LLVMTypeRef *returns, unsigned num_returns, + LLVMTypeRef *params, unsigned num_params, + int last_array_pointer, int last_sgpr) +{ + int i; + + radeon_llvm_create_func(&ctx->radeon_bld, returns, num_returns, + params, num_params); + radeon_llvm_shader_type(ctx->radeon_bld.main_fn, ctx->type); + ctx->return_value = LLVMGetUndef(ctx->radeon_bld.return_type); + + for (i = 0; i <= last_sgpr; ++i) { + LLVMValueRef P = LLVMGetParam(ctx->radeon_bld.main_fn, i); + + /* We tell llvm that array inputs are passed by value to allow Sinking pass + * to move load. Inputs are constant so this is fine. */ + if (i <= last_array_pointer) + LLVMAddAttribute(P, LLVMByValAttribute); + else + LLVMAddAttribute(P, LLVMInRegAttribute); + } +} + static void create_meta_data(struct si_shader_context *ctx) { struct gallivm_state *gallivm = ctx->radeon_bld.soa.bld_base.base.gallivm; @@ -3579,15 +3809,57 @@ static void declare_streamout_params(struct si_shader_context *ctx, } } +static unsigned llvm_get_type_size(LLVMTypeRef type) +{ + LLVMTypeKind kind = LLVMGetTypeKind(type); + + switch (kind) { + case LLVMIntegerTypeKind: + return LLVMGetIntTypeWidth(type) / 8; + case LLVMFloatTypeKind: + return 4; + case LLVMPointerTypeKind: + return 8; + case LLVMVectorTypeKind: + return LLVMGetVectorSize(type) * + llvm_get_type_size(LLVMGetElementType(type)); + default: + assert(0); + return 0; + } +} + +static void declare_tess_lds(struct si_shader_context *ctx) +{ + struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm; + LLVMTypeRef i32 = ctx->radeon_bld.soa.bld_base.uint_bld.elem_type; + + /* This is the upper bound, maximum is 32 inputs times 32 vertices */ + unsigned vertex_data_dw_size = 32*32*4; + unsigned patch_data_dw_size = 32*4; + /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */ + unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size; + unsigned lds_dwords = patch_dw_size; + + /* The actual size is computed outside of the shader to reduce + * the number of shader variants. */ + ctx->lds = + LLVMAddGlobalInAddressSpace(gallivm->module, + LLVMArrayType(i32, lds_dwords), + "tess_lds", + LOCAL_ADDR_SPACE); +} + static void create_function(struct si_shader_context *ctx) { struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base; struct gallivm_state *gallivm = bld_base->base.gallivm; struct si_shader *shader = ctx->shader; - LLVMTypeRef params[SI_NUM_PARAMS], v2i32, v3i32; - unsigned i, last_array_pointer, last_sgpr, num_params; + LLVMTypeRef params[SI_NUM_PARAMS + SI_NUM_VERTEX_BUFFERS], v3i32; + LLVMTypeRef returns[16+32*4]; + unsigned i, last_array_pointer, last_sgpr, num_params, num_return_sgprs; + unsigned num_returns = 0; - v2i32 = LLVMVectorType(ctx->i32, 2); v3i32 = LLVMVectorType(ctx->i32, 3); params[SI_PARAM_RW_BUFFERS] = const_array(ctx->v16i8, SI_NUM_RW_BUFFERS); @@ -3630,6 +3902,20 @@ static void create_function(struct si_shader_context *ctx) params[ctx->param_rel_auto_id = num_params++] = ctx->i32; params[ctx->param_vs_prim_id = num_params++] = ctx->i32; params[ctx->param_instance_id = num_params++] = ctx->i32; + + if (!ctx->is_monolithic && + !ctx->is_gs_copy_shader) { + /* Vertex load indices. */ + ctx->param_vertex_index0 = num_params; + + for (i = 0; i < shader->selector->info.num_inputs; i++) + params[num_params++] = ctx->i32; + + /* PrimitiveID output. */ + if (!shader->key.vs.as_es && !shader->key.vs.as_ls) + for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++) + returns[num_returns++] = ctx->f32; + } break; case TGSI_PROCESSOR_TESS_CTRL: @@ -3643,6 +3929,15 @@ static void create_function(struct si_shader_context *ctx) params[SI_PARAM_PATCH_ID] = ctx->i32; params[SI_PARAM_REL_IDS] = ctx->i32; num_params = SI_PARAM_REL_IDS+1; + + if (!ctx->is_monolithic) { + /* PARAM_TESS_FACTOR_OFFSET is after user SGPRs. */ + for (i = 0; i <= SI_TCS_NUM_USER_SGPR; i++) + returns[num_returns++] = ctx->i32; /* SGPRs */ + + for (i = 0; i < 3; i++) + returns[num_returns++] = ctx->f32; /* VGPRs */ + } break; case TGSI_PROCESSOR_TESS_EVAL: @@ -3663,6 +3958,11 @@ static void create_function(struct si_shader_context *ctx) params[ctx->param_tes_v = num_params++] = ctx->f32; params[ctx->param_tes_rel_patch_id = num_params++] = ctx->i32; params[ctx->param_tes_patch_id = num_params++] = ctx->i32; + + /* PrimitiveID output. */ + if (!ctx->is_monolithic && !shader->key.tes.as_es) + for (i = 0; i <= VS_EPILOG_PRIMID_LOC; i++) + returns[num_returns++] = ctx->f32; break; case TGSI_PROCESSOR_GEOMETRY: @@ -3686,13 +3986,13 @@ static void create_function(struct si_shader_context *ctx) params[SI_PARAM_ALPHA_REF] = ctx->f32; params[SI_PARAM_PRIM_MASK] = ctx->i32; last_sgpr = SI_PARAM_PRIM_MASK; - params[SI_PARAM_PERSP_SAMPLE] = v2i32; - params[SI_PARAM_PERSP_CENTER] = v2i32; - params[SI_PARAM_PERSP_CENTROID] = v2i32; + params[SI_PARAM_PERSP_SAMPLE] = ctx->v2i32; + params[SI_PARAM_PERSP_CENTER] = ctx->v2i32; + params[SI_PARAM_PERSP_CENTROID] = ctx->v2i32; params[SI_PARAM_PERSP_PULL_MODEL] = v3i32; - params[SI_PARAM_LINEAR_SAMPLE] = v2i32; - params[SI_PARAM_LINEAR_CENTER] = v2i32; - params[SI_PARAM_LINEAR_CENTROID] = v2i32; + params[SI_PARAM_LINEAR_SAMPLE] = ctx->v2i32; + params[SI_PARAM_LINEAR_CENTER] = ctx->v2i32; + params[SI_PARAM_LINEAR_CENTROID] = ctx->v2i32; params[SI_PARAM_LINE_STIPPLE_TEX] = ctx->f32; params[SI_PARAM_POS_X_FLOAT] = ctx->f32; params[SI_PARAM_POS_Y_FLOAT] = ctx->f32; @@ -3701,8 +4001,39 @@ static void create_function(struct si_shader_context *ctx) params[SI_PARAM_FRONT_FACE] = ctx->i32; params[SI_PARAM_ANCILLARY] = ctx->i32; params[SI_PARAM_SAMPLE_COVERAGE] = ctx->f32; - params[SI_PARAM_POS_FIXED_PT] = ctx->f32; + params[SI_PARAM_POS_FIXED_PT] = ctx->i32; num_params = SI_PARAM_POS_FIXED_PT+1; + + if (!ctx->is_monolithic) { + /* Color inputs from the prolog. */ + if (shader->selector->info.colors_read) { + unsigned num_color_elements = + util_bitcount(shader->selector->info.colors_read); + + assert(num_params + num_color_elements <= ARRAY_SIZE(params)); + for (i = 0; i < num_color_elements; i++) + params[num_params++] = ctx->f32; + } + + /* Outputs for the epilog. */ + num_return_sgprs = SI_SGPR_ALPHA_REF + 1; + num_returns = + num_return_sgprs + + util_bitcount(shader->selector->info.colors_written) * 4 + + shader->selector->info.writes_z + + shader->selector->info.writes_stencil + + shader->selector->info.writes_samplemask + + 1 /* SampleMaskIn */; + + num_returns = MAX2(num_returns, + num_return_sgprs + + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1); + + for (i = 0; i < num_return_sgprs; i++) + returns[i] = ctx->i32; + for (; i < num_returns; i++) + returns[i] = ctx->f32; + } break; default: @@ -3711,20 +4042,38 @@ static void create_function(struct si_shader_context *ctx) } assert(num_params <= Elements(params)); - radeon_llvm_create_func(&ctx->radeon_bld, params, num_params); - radeon_llvm_shader_type(ctx->radeon_bld.main_fn, ctx->type); - - for (i = 0; i <= last_sgpr; ++i) { - LLVMValueRef P = LLVMGetParam(ctx->radeon_bld.main_fn, i); - /* We tell llvm that array inputs are passed by value to allow Sinking pass - * to move load. Inputs are constant so this is fine. */ - if (i <= last_array_pointer) - LLVMAddAttribute(P, LLVMByValAttribute); - else - LLVMAddAttribute(P, LLVMInRegAttribute); + si_create_function(ctx, returns, num_returns, params, + num_params, last_array_pointer, last_sgpr); + + /* Reserve register locations for VGPR inputs the PS prolog may need. */ + if (ctx->type == TGSI_PROCESSOR_FRAGMENT && + !ctx->is_monolithic) { + radeon_llvm_add_attribute(ctx->radeon_bld.main_fn, + "InitialPSInputAddr", + S_0286D0_PERSP_SAMPLE_ENA(1) | + S_0286D0_PERSP_CENTER_ENA(1) | + S_0286D0_PERSP_CENTROID_ENA(1) | + S_0286D0_LINEAR_SAMPLE_ENA(1) | + S_0286D0_LINEAR_CENTER_ENA(1) | + S_0286D0_LINEAR_CENTROID_ENA(1) | + S_0286D0_FRONT_FACE_ENA(1) | + S_0286D0_POS_FIXED_PT_ENA(1)); } + shader->info.num_input_sgprs = 0; + shader->info.num_input_vgprs = 0; + + for (i = 0; i <= last_sgpr; ++i) + shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 4; + + /* Unused fragment shader inputs are eliminated by the compiler, + * so we don't know yet how many there will be. + */ + if (ctx->type != TGSI_PROCESSOR_FRAGMENT) + for (; i < num_params; ++i) + shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4; + if (bld_base->info && (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 || bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 || @@ -3740,22 +4089,8 @@ static void create_function(struct si_shader_context *ctx) if ((ctx->type == TGSI_PROCESSOR_VERTEX && shader->key.vs.as_ls) || ctx->type == TGSI_PROCESSOR_TESS_CTRL || - ctx->type == TGSI_PROCESSOR_TESS_EVAL) { - /* This is the upper bound, maximum is 32 inputs times 32 vertices */ - unsigned vertex_data_dw_size = 32*32*4; - unsigned patch_data_dw_size = 32*4; - /* The formula is: TCS inputs + TCS outputs + TCS patch outputs. */ - unsigned patch_dw_size = vertex_data_dw_size*2 + patch_data_dw_size; - unsigned lds_dwords = patch_dw_size; - - /* The actual size is computed outside of the shader to reduce - * the number of shader variants. */ - ctx->lds = - LLVMAddGlobalInAddressSpace(gallivm->module, - LLVMArrayType(ctx->i32, lds_dwords), - "tess_lds", - LOCAL_ADDR_SPACE); - } + ctx->type == TGSI_PROCESSOR_TESS_EVAL) + declare_tess_lds(ctx); } static void preload_constants(struct si_shader_context *ctx) @@ -3887,6 +4222,49 @@ static void preload_ring_buffers(struct si_shader_context *ctx) } } +static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx, + LLVMValueRef param_sampler_views, + unsigned param_pos_fixed_pt) +{ + struct lp_build_tgsi_context *bld_base = + &ctx->radeon_bld.soa.bld_base; + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct lp_build_emit_data result = {}; + struct tgsi_full_instruction inst = {}; + LLVMValueRef desc, sampler_index, address[2], pix; + + /* Use the fixed-point gl_FragCoord input. + * Since the stipple pattern is 32x32 and it repeats, just get 5 bits + * per coordinate to get the repeating effect. + */ + address[0] = unpack_param(ctx, param_pos_fixed_pt, 0, 5); + address[1] = unpack_param(ctx, param_pos_fixed_pt, 16, 5); + + /* Load the sampler view descriptor. */ + sampler_index = lp_build_const_int32(gallivm, SI_POLY_STIPPLE_SAMPLER); + desc = get_sampler_desc_custom(ctx, param_sampler_views, + sampler_index, DESC_IMAGE); + + /* Load the texel. */ + inst.Instruction.Opcode = TGSI_OPCODE_TXF; + inst.Texture.Texture = TGSI_TEXTURE_2D_MSAA; /* = use load, not load_mip */ + result.inst = &inst; + set_tex_fetch_args(ctx, &result, TGSI_OPCODE_TXF, + inst.Texture.Texture, + desc, NULL, address, ARRAY_SIZE(address), 0xf); + build_tex_intrinsic(&tex_action, bld_base, &result); + + /* Kill the thread accordingly. */ + pix = LLVMBuildExtractElement(gallivm->builder, result.output[0], + lp_build_const_int32(gallivm, 3), ""); + pix = bitcast(bld_base, TGSI_TYPE_FLOAT, pix); + pix = LLVMBuildFNeg(gallivm->builder, pix, ""); + + lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill", + LLVMVoidTypeInContext(gallivm->context), + &pix, 1, 0); +} + void si_shader_binary_read_config(struct radeon_shader_binary *binary, struct si_shader_config *conf, unsigned symbol_offset) @@ -3972,41 +4350,70 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx, } } +static unsigned si_get_shader_binary_size(struct si_shader *shader) +{ + unsigned size = shader->binary.code_size; + + if (shader->prolog) + size += shader->prolog->binary.code_size; + if (shader->epilog) + size += shader->epilog->binary.code_size; + return size; +} + int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader) { - const struct radeon_shader_binary *binary = &shader->binary; - unsigned code_size = binary->code_size + binary->rodata_size; + const struct radeon_shader_binary *prolog = + shader->prolog ? &shader->prolog->binary : NULL; + const struct radeon_shader_binary *epilog = + shader->epilog ? &shader->epilog->binary : NULL; + const struct radeon_shader_binary *mainb = &shader->binary; + unsigned bo_size = si_get_shader_binary_size(shader) + + (!epilog ? mainb->rodata_size : 0); unsigned char *ptr; + assert(!prolog || !prolog->rodata_size); + assert((!prolog && !epilog) || !mainb->rodata_size); + assert(!epilog || !epilog->rodata_size); + r600_resource_reference(&shader->bo, NULL); shader->bo = si_resource_create_custom(&sscreen->b.b, PIPE_USAGE_IMMUTABLE, - code_size); + bo_size); if (!shader->bo) return -ENOMEM; + /* Upload. */ ptr = sscreen->b.ws->buffer_map(shader->bo->buf, NULL, PIPE_TRANSFER_READ_WRITE); - util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size); - if (binary->rodata_size > 0) { - ptr += binary->code_size; - util_memcpy_cpu_to_le32(ptr, binary->rodata, - binary->rodata_size); + + if (prolog) { + util_memcpy_cpu_to_le32(ptr, prolog->code, prolog->code_size); + ptr += prolog->code_size; } + util_memcpy_cpu_to_le32(ptr, mainb->code, mainb->code_size); + ptr += mainb->code_size; + + if (epilog) + util_memcpy_cpu_to_le32(ptr, epilog->code, epilog->code_size); + else if (mainb->rodata_size > 0) + util_memcpy_cpu_to_le32(ptr, mainb->rodata, mainb->rodata_size); + sscreen->b.ws->buffer_unmap(shader->bo->buf); return 0; } static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary, - struct pipe_debug_callback *debug) + struct pipe_debug_callback *debug, + const char *name) { char *line, *p; unsigned i, count; if (binary->disasm_string) { - fprintf(stderr, "\nShader Disassembly:\n\n"); - fprintf(stderr, "%s\n", binary->disasm_string); + fprintf(stderr, "Shader %s disassembly:\n", name); + fprintf(stderr, "%s", binary->disasm_string); if (debug && debug->debug_message) { /* Very long debug messages are cut off, so send the @@ -4036,7 +4443,7 @@ static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary "Shader Disassembly End"); } } else { - fprintf(stderr, "SI CODE:\n"); + fprintf(stderr, "Shader %s binary:\n", name); for (i = 0; i < binary->code_size; i += 4) { fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3], binary->code[i + 2], @@ -4115,16 +4522,60 @@ static void si_shader_dump_stats(struct si_screen *sscreen, max_simd_waves); } +static const char *si_get_shader_name(struct si_shader *shader, + unsigned processor) +{ + switch (processor) { + case TGSI_PROCESSOR_VERTEX: + if (shader->key.vs.as_es) + return "Vertex Shader as ES"; + else if (shader->key.vs.as_ls) + return "Vertex Shader as LS"; + else + return "Vertex Shader as VS"; + case TGSI_PROCESSOR_TESS_CTRL: + return "Tessellation Control Shader"; + case TGSI_PROCESSOR_TESS_EVAL: + if (shader->key.tes.as_es) + return "Tessellation Evaluation Shader as ES"; + else + return "Tessellation Evaluation Shader as VS"; + case TGSI_PROCESSOR_GEOMETRY: + if (shader->gs_copy_shader == NULL) + return "GS Copy Shader as VS"; + else + return "Geometry Shader"; + case TGSI_PROCESSOR_FRAGMENT: + return "Pixel Shader"; + case TGSI_PROCESSOR_COMPUTE: + return "Compute Shader"; + default: + return "Unknown Shader"; + } +} + void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, struct pipe_debug_callback *debug, unsigned processor) { - if (r600_can_dump_shader(&sscreen->b, processor)) - if (!(sscreen->b.debug_flags & DBG_NO_ASM)) - si_shader_dump_disassembly(&shader->binary, debug); + if (r600_can_dump_shader(&sscreen->b, processor) && + !(sscreen->b.debug_flags & DBG_NO_ASM)) { + fprintf(stderr, "\n%s:\n", si_get_shader_name(shader, processor)); + + if (shader->prolog) + si_shader_dump_disassembly(&shader->prolog->binary, + debug, "prolog"); + + si_shader_dump_disassembly(&shader->binary, debug, "main"); + + if (shader->epilog) + si_shader_dump_disassembly(&shader->epilog->binary, + debug, "epilog"); + fprintf(stderr, "\n"); + } si_shader_dump_stats(sscreen, &shader->config, shader->selector ? shader->selector->info.num_inputs : 0, - shader->binary.code_size, debug, processor); + si_get_shader_binary_size(shader), debug, processor); } int si_compile_llvm(struct si_screen *sscreen, @@ -4177,6 +4628,19 @@ int si_compile_llvm(struct si_screen *sscreen, FREE(binary->global_symbol_offsets); binary->config = NULL; binary->global_symbol_offsets = NULL; + + /* Some shaders can't have rodata because their binaries can be + * concatenated. + */ + if (binary->rodata_size && + (processor == TGSI_PROCESSOR_VERTEX || + processor == TGSI_PROCESSOR_TESS_CTRL || + processor == TGSI_PROCESSOR_TESS_EVAL || + processor == TGSI_PROCESSOR_FRAGMENT)) { + fprintf(stderr, "radeonsi: The shader can't have rodata."); + return -EINVAL; + } + return r; } @@ -4196,7 +4660,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0])); - si_init_shader_ctx(ctx, sscreen, ctx->shader, ctx->tm, gsinfo); + si_init_shader_ctx(ctx, sscreen, ctx->shader, ctx->tm); ctx->type = TGSI_PROCESSOR_VERTEX; ctx->is_gs_copy_shader = true; @@ -4241,7 +4705,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs); - LLVMBuildRetVoid(bld_base->base.gallivm->builder); + LLVMBuildRet(gallivm->builder, ctx->return_value); /* Dump LLVM IR before any optimization passes */ if (sscreen->b.debug_flags & DBG_PREOPT_IR && @@ -4278,35 +4742,38 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f) switch (shader) { case PIPE_SHADER_VERTEX: fprintf(f, " instance_divisors = {"); - for (i = 0; i < Elements(key->vs.instance_divisors); i++) + for (i = 0; i < Elements(key->vs.prolog.instance_divisors); i++) fprintf(f, !i ? "%u" : ", %u", - key->vs.instance_divisors[i]); + key->vs.prolog.instance_divisors[i]); fprintf(f, "}\n"); fprintf(f, " as_es = %u\n", key->vs.as_es); fprintf(f, " as_ls = %u\n", key->vs.as_ls); - fprintf(f, " export_prim_id = %u\n", key->vs.export_prim_id); + fprintf(f, " export_prim_id = %u\n", key->vs.epilog.export_prim_id); break; case PIPE_SHADER_TESS_CTRL: - fprintf(f, " prim_mode = %u\n", key->tcs.prim_mode); + fprintf(f, " prim_mode = %u\n", key->tcs.epilog.prim_mode); break; case PIPE_SHADER_TESS_EVAL: fprintf(f, " as_es = %u\n", key->tes.as_es); - fprintf(f, " export_prim_id = %u\n", key->tes.export_prim_id); + fprintf(f, " export_prim_id = %u\n", key->tes.epilog.export_prim_id); break; case PIPE_SHADER_GEOMETRY: break; case PIPE_SHADER_FRAGMENT: - fprintf(f, " spi_shader_col_format = 0x%x\n", key->ps.spi_shader_col_format); - fprintf(f, " last_cbuf = %u\n", key->ps.last_cbuf); - fprintf(f, " color_two_side = %u\n", key->ps.color_two_side); - fprintf(f, " alpha_func = %u\n", key->ps.alpha_func); - fprintf(f, " alpha_to_one = %u\n", key->ps.alpha_to_one); - fprintf(f, " poly_stipple = %u\n", key->ps.poly_stipple); - fprintf(f, " clamp_color = %u\n", key->ps.clamp_color); + fprintf(f, " prolog.color_two_side = %u\n", key->ps.prolog.color_two_side); + fprintf(f, " prolog.poly_stipple = %u\n", key->ps.prolog.poly_stipple); + fprintf(f, " prolog.force_persample_interp = %u\n", key->ps.prolog.force_persample_interp); + fprintf(f, " epilog.spi_shader_col_format = 0x%x\n", key->ps.epilog.spi_shader_col_format); + fprintf(f, " epilog.color_is_int8 = 0x%X\n", key->ps.epilog.color_is_int8); + fprintf(f, " epilog.last_cbuf = %u\n", key->ps.epilog.last_cbuf); + fprintf(f, " epilog.alpha_func = %u\n", key->ps.epilog.alpha_func); + fprintf(f, " epilog.alpha_to_one = %u\n", key->ps.epilog.alpha_to_one); + fprintf(f, " epilog.poly_line_smoothing = %u\n", key->ps.epilog.poly_line_smoothing); + fprintf(f, " epilog.clamp_color = %u\n", key->ps.epilog.clamp_color); break; default: @@ -4317,13 +4784,12 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f) static void si_init_shader_ctx(struct si_shader_context *ctx, struct si_screen *sscreen, struct si_shader *shader, - LLVMTargetMachineRef tm, - struct tgsi_shader_info *info) + LLVMTargetMachineRef tm) { struct lp_build_tgsi_context *bld_base; memset(ctx, 0, sizeof(*ctx)); - radeon_llvm_context_init(&ctx->radeon_bld); + radeon_llvm_context_init(&ctx->radeon_bld, "amdgcn--"); ctx->tm = tm; ctx->screen = sscreen; if (shader && shader->selector) @@ -4336,15 +4802,18 @@ static void si_init_shader_ctx(struct si_shader_context *ctx, ctx->i1 = LLVMInt1TypeInContext(ctx->radeon_bld.gallivm.context); ctx->i8 = LLVMInt8TypeInContext(ctx->radeon_bld.gallivm.context); ctx->i32 = LLVMInt32TypeInContext(ctx->radeon_bld.gallivm.context); - ctx->i128 = LLVMInt128TypeInContext(ctx->radeon_bld.gallivm.context); + ctx->i64 = LLVMInt64TypeInContext(ctx->radeon_bld.gallivm.context); + ctx->i128 = LLVMIntTypeInContext(ctx->radeon_bld.gallivm.context, 128); ctx->f32 = LLVMFloatTypeInContext(ctx->radeon_bld.gallivm.context); ctx->v16i8 = LLVMVectorType(ctx->i8, 16); + ctx->v2i32 = LLVMVectorType(ctx->i32, 2); ctx->v4i32 = LLVMVectorType(ctx->i32, 4); ctx->v4f32 = LLVMVectorType(ctx->f32, 4); ctx->v8i32 = LLVMVectorType(ctx->i32, 8); bld_base = &ctx->radeon_bld.soa.bld_base; - bld_base->info = info; + if (shader && shader->selector) + bld_base->info = &shader->selector->info; bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant; bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action; @@ -4380,40 +4849,31 @@ static void si_init_shader_ctx(struct si_shader_context *ctx, bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32"; } -int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, - struct si_shader *shader, - struct pipe_debug_callback *debug) +int si_compile_tgsi_shader(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct si_shader *shader, + bool is_monolithic, + struct pipe_debug_callback *debug) { struct si_shader_selector *sel = shader->selector; - struct tgsi_token *tokens = sel->tokens; struct si_shader_context ctx; struct lp_build_tgsi_context *bld_base; - struct tgsi_shader_info stipple_shader_info; LLVMModuleRef mod; int r = 0; - bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT && - shader->key.ps.poly_stipple; - - if (poly_stipple) { - tokens = util_pstipple_create_fragment_shader(tokens, NULL, - SI_POLY_STIPPLE_SAMPLER, - TGSI_FILE_SYSTEM_VALUE); - tgsi_scan_shader(tokens, &stipple_shader_info); - } /* Dump TGSI code before doing TGSI->LLVM conversion in case the * conversion fails. */ if (r600_can_dump_shader(&sscreen->b, sel->info.processor) && !(sscreen->b.debug_flags & DBG_NO_TGSI)) { si_dump_shader_key(sel->type, &shader->key, stderr); - tgsi_dump(tokens, 0); + tgsi_dump(sel->tokens, 0); si_dump_streamout(&sel->so); } - si_init_shader_ctx(&ctx, sscreen, shader, tm, - poly_stipple ? &stipple_shader_info : &sel->info); + si_init_shader_ctx(&ctx, sscreen, shader, tm); + ctx.is_monolithic = is_monolithic; - shader->uses_instanceid = sel->info.uses_instanceid; + shader->info.uses_instanceid = sel->info.uses_instanceid; bld_base = &ctx.radeon_bld.soa.bld_base; ctx.radeon_bld.load_system_value = declare_system_value; @@ -4447,7 +4907,10 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, break; case TGSI_PROCESSOR_FRAGMENT: ctx.radeon_bld.load_input = declare_input_fs; - bld_base->emit_epilogue = si_llvm_emit_fs_epilogue; + if (is_monolithic) + bld_base->emit_epilogue = si_llvm_emit_fs_epilogue; + else + bld_base->emit_epilogue = si_llvm_return_fs_outputs; break; default: assert(!"Unsupported shader type"); @@ -4461,6 +4924,14 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, preload_streamout_buffers(&ctx); preload_ring_buffers(&ctx); + if (ctx.is_monolithic && sel->type == PIPE_SHADER_FRAGMENT && + shader->key.ps.prolog.poly_stipple) { + LLVMValueRef views = LLVMGetParam(ctx.radeon_bld.main_fn, + SI_PARAM_SAMPLERS); + si_llvm_emit_polygon_stipple(&ctx, views, + SI_PARAM_POS_FIXED_PT); + } + if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { int i; for (i = 0; i < 4; i++) { @@ -4470,12 +4941,12 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, } } - if (!lp_build_tgsi_llvm(bld_base, tokens)) { + if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) { fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n"); goto out; } - LLVMBuildRetVoid(bld_base->base.gallivm->builder); + LLVMBuildRet(bld_base->base.gallivm->builder, ctx.return_value); mod = bld_base->base.gallivm->module; /* Dump LLVM IR before any optimization passes */ @@ -4492,16 +4963,49 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, goto out; } - si_shader_dump(sscreen, shader, debug, ctx.type); + radeon_llvm_dispose(&ctx.radeon_bld); - r = si_shader_binary_upload(sscreen, shader); - if (r) { - fprintf(stderr, "LLVM failed to upload shader\n"); - goto out; + /* Calculate the number of fragment input VGPRs. */ + if (ctx.type == TGSI_PROCESSOR_FRAGMENT) { + shader->info.num_input_vgprs = 0; + shader->info.face_vgpr_index = -1; + + if (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 2; + if (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 2; + if (G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 2; + if (G_0286CC_PERSP_PULL_MODEL_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 3; + if (G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 2; + if (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 2; + if (G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 2; + if (G_0286CC_LINE_STIPPLE_TEX_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 1; + if (G_0286CC_POS_X_FLOAT_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 1; + if (G_0286CC_POS_Y_FLOAT_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 1; + if (G_0286CC_POS_Z_FLOAT_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 1; + if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 1; + if (G_0286CC_FRONT_FACE_ENA(shader->config.spi_ps_input_addr)) { + shader->info.face_vgpr_index = shader->info.num_input_vgprs; + shader->info.num_input_vgprs += 1; + } + if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 1; + if (G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 1; + if (G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr)) + shader->info.num_input_vgprs += 1; } - radeon_llvm_dispose(&ctx.radeon_bld); - if (ctx.type == TGSI_PROCESSOR_GEOMETRY) { shader->gs_copy_shader = CALLOC_STRUCT(si_shader); shader->gs_copy_shader->selector = shader->selector; @@ -4517,11 +5021,968 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, out: for (int i = 0; i < SI_NUM_CONST_BUFFERS; i++) FREE(ctx.constants[i]); - if (poly_stipple) - tgsi_free_tokens(tokens); return r; } +/** + * Create, compile and return a shader part (prolog or epilog). + * + * \param sscreen screen + * \param list list of shader parts of the same category + * \param key shader part key + * \param tm LLVM target machine + * \param debug debug callback + * \param compile the callback responsible for compilation + * \return non-NULL on success + */ +static struct si_shader_part * +si_get_shader_part(struct si_screen *sscreen, + struct si_shader_part **list, + union si_shader_part_key *key, + LLVMTargetMachineRef tm, + struct pipe_debug_callback *debug, + bool (*compile)(struct si_screen *, + LLVMTargetMachineRef, + struct pipe_debug_callback *, + struct si_shader_part *)) +{ + struct si_shader_part *result; + + pipe_mutex_lock(sscreen->shader_parts_mutex); + + /* Find existing. */ + for (result = *list; result; result = result->next) { + if (memcmp(&result->key, key, sizeof(*key)) == 0) { + pipe_mutex_unlock(sscreen->shader_parts_mutex); + return result; + } + } + + /* Compile a new one. */ + result = CALLOC_STRUCT(si_shader_part); + result->key = *key; + if (!compile(sscreen, tm, debug, result)) { + FREE(result); + pipe_mutex_unlock(sscreen->shader_parts_mutex); + return NULL; + } + + result->next = *list; + *list = result; + pipe_mutex_unlock(sscreen->shader_parts_mutex); + return result; +} + +/** + * Create a vertex shader prolog. + * + * The inputs are the same as VS (a lot of SGPRs and 4 VGPR system values). + * All inputs are returned unmodified. The vertex load indices are + * stored after them, which will used by the API VS for fetching inputs. + * + * For example, the expected outputs for instance_divisors[] = {0, 1, 2} are: + * input_v0, + * input_v1, + * input_v2, + * input_v3, + * (VertexID + BaseVertex), + * (InstanceID + StartInstance), + * (InstanceID / 2 + StartInstance) + */ +static bool si_compile_vs_prolog(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct pipe_debug_callback *debug, + struct si_shader_part *out) +{ + union si_shader_part_key *key = &out->key; + struct si_shader shader = {}; + struct si_shader_context ctx; + struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm; + LLVMTypeRef *params, *returns; + LLVMValueRef ret, func; + int last_sgpr, num_params, num_returns, i; + bool status = true; + + si_init_shader_ctx(&ctx, sscreen, &shader, tm); + ctx.type = TGSI_PROCESSOR_VERTEX; + ctx.param_vertex_id = key->vs_prolog.num_input_sgprs; + ctx.param_instance_id = key->vs_prolog.num_input_sgprs + 3; + + /* 4 preloaded VGPRs + vertex load indices as prolog outputs */ + params = alloca((key->vs_prolog.num_input_sgprs + 4) * + sizeof(LLVMTypeRef)); + returns = alloca((key->vs_prolog.num_input_sgprs + 4 + + key->vs_prolog.last_input + 1) * + sizeof(LLVMTypeRef)); + num_params = 0; + num_returns = 0; + + /* Declare input and output SGPRs. */ + num_params = 0; + for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) { + params[num_params++] = ctx.i32; + returns[num_returns++] = ctx.i32; + } + last_sgpr = num_params - 1; + + /* 4 preloaded VGPRs (outputs must be floats) */ + for (i = 0; i < 4; i++) { + params[num_params++] = ctx.i32; + returns[num_returns++] = ctx.f32; + } + + /* Vertex load indices. */ + for (i = 0; i <= key->vs_prolog.last_input; i++) + returns[num_returns++] = ctx.f32; + + /* Create the function. */ + si_create_function(&ctx, returns, num_returns, params, + num_params, -1, last_sgpr); + func = ctx.radeon_bld.main_fn; + + /* Copy inputs to outputs. This should be no-op, as the registers match, + * but it will prevent the compiler from overwriting them unintentionally. + */ + ret = ctx.return_value; + for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) { + LLVMValueRef p = LLVMGetParam(func, i); + ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, ""); + } + for (i = num_params - 4; i < num_params; i++) { + LLVMValueRef p = LLVMGetParam(func, i); + p = LLVMBuildBitCast(gallivm->builder, p, ctx.f32, ""); + ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, ""); + } + + /* Compute vertex load indices from instance divisors. */ + for (i = 0; i <= key->vs_prolog.last_input; i++) { + unsigned divisor = key->vs_prolog.states.instance_divisors[i]; + LLVMValueRef index; + + if (divisor) { + /* InstanceID / Divisor + StartInstance */ + index = get_instance_index_for_fetch(&ctx.radeon_bld, + SI_SGPR_START_INSTANCE, + divisor); + } else { + /* VertexID + BaseVertex */ + index = LLVMBuildAdd(gallivm->builder, + LLVMGetParam(func, ctx.param_vertex_id), + LLVMGetParam(func, SI_SGPR_BASE_VERTEX), ""); + } + + index = LLVMBuildBitCast(gallivm->builder, index, ctx.f32, ""); + ret = LLVMBuildInsertValue(gallivm->builder, ret, index, + num_params++, ""); + } + + /* Compile. */ + LLVMBuildRet(gallivm->builder, ret); + radeon_llvm_finalize_module(&ctx.radeon_bld); + + if (si_compile_llvm(sscreen, &out->binary, &out->config, tm, + gallivm->module, debug, ctx.type, + "Vertex Shader Prolog")) + status = false; + + radeon_llvm_dispose(&ctx.radeon_bld); + return status; +} + +/** + * Compile the vertex shader epilog. This is also used by the tessellation + * evaluation shader compiled as VS. + * + * The input is PrimitiveID. + * + * If PrimitiveID is required by the pixel shader, export it. + * Otherwise, do nothing. + */ +static bool si_compile_vs_epilog(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct pipe_debug_callback *debug, + struct si_shader_part *out) +{ + union si_shader_part_key *key = &out->key; + struct si_shader_context ctx; + struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm; + struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base; + LLVMTypeRef params[5]; + int num_params, i; + bool status = true; + + si_init_shader_ctx(&ctx, sscreen, NULL, tm); + ctx.type = TGSI_PROCESSOR_VERTEX; + + /* Declare input VGPRs. */ + num_params = key->vs_epilog.states.export_prim_id ? + (VS_EPILOG_PRIMID_LOC + 1) : 0; + assert(num_params <= ARRAY_SIZE(params)); + + for (i = 0; i < num_params; i++) + params[i] = ctx.f32; + + /* Create the function. */ + si_create_function(&ctx, NULL, 0, params, num_params, + -1, -1); + + /* Emit exports. */ + if (key->vs_epilog.states.export_prim_id) { + struct lp_build_context *base = &bld_base->base; + struct lp_build_context *uint = &bld_base->uint_bld; + LLVMValueRef args[9]; + + args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */ + args[1] = uint->zero; /* whether the EXEC mask is valid */ + args[2] = uint->zero; /* DONE bit */ + args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_PARAM + + key->vs_epilog.prim_id_param_offset); + args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */ + args[5] = LLVMGetParam(ctx.radeon_bld.main_fn, + VS_EPILOG_PRIMID_LOC); /* X */ + args[6] = uint->undef; /* Y */ + args[7] = uint->undef; /* Z */ + args[8] = uint->undef; /* W */ + + lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + args, 9, 0); + } + + /* Compile. */ + LLVMBuildRet(gallivm->builder, ctx.return_value); + radeon_llvm_finalize_module(&ctx.radeon_bld); + + if (si_compile_llvm(sscreen, &out->binary, &out->config, tm, + gallivm->module, debug, ctx.type, + "Vertex Shader Epilog")) + status = false; + + radeon_llvm_dispose(&ctx.radeon_bld); + return status; +} + +/** + * Create & compile a vertex shader epilog. This a helper used by VS and TES. + */ +static bool si_get_vs_epilog(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct si_shader *shader, + struct pipe_debug_callback *debug, + struct si_vs_epilog_bits *states) +{ + union si_shader_part_key epilog_key; + + memset(&epilog_key, 0, sizeof(epilog_key)); + epilog_key.vs_epilog.states = *states; + + /* Set up the PrimitiveID output. */ + if (shader->key.vs.epilog.export_prim_id) { + unsigned index = shader->selector->info.num_outputs; + unsigned offset = shader->info.nr_param_exports++; + + epilog_key.vs_epilog.prim_id_param_offset = offset; + assert(index < ARRAY_SIZE(shader->info.vs_output_param_offset)); + shader->info.vs_output_param_offset[index] = offset; + } + + shader->epilog = si_get_shader_part(sscreen, &sscreen->vs_epilogs, + &epilog_key, tm, debug, + si_compile_vs_epilog); + return shader->epilog != NULL; +} + +/** + * Select and compile (or reuse) vertex shader parts (prolog & epilog). + */ +static bool si_shader_select_vs_parts(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct si_shader *shader, + struct pipe_debug_callback *debug) +{ + struct tgsi_shader_info *info = &shader->selector->info; + union si_shader_part_key prolog_key; + unsigned i; + + /* Get the prolog. */ + memset(&prolog_key, 0, sizeof(prolog_key)); + prolog_key.vs_prolog.states = shader->key.vs.prolog; + prolog_key.vs_prolog.num_input_sgprs = shader->info.num_input_sgprs; + prolog_key.vs_prolog.last_input = MAX2(1, info->num_inputs) - 1; + + /* The prolog is a no-op if there are no inputs. */ + if (info->num_inputs) { + shader->prolog = + si_get_shader_part(sscreen, &sscreen->vs_prologs, + &prolog_key, tm, debug, + si_compile_vs_prolog); + if (!shader->prolog) + return false; + } + + /* Get the epilog. */ + if (!shader->key.vs.as_es && !shader->key.vs.as_ls && + !si_get_vs_epilog(sscreen, tm, shader, debug, + &shader->key.vs.epilog)) + return false; + + /* Set the instanceID flag. */ + for (i = 0; i < info->num_inputs; i++) + if (prolog_key.vs_prolog.states.instance_divisors[i]) + shader->info.uses_instanceid = true; + + return true; +} + +/** + * Select and compile (or reuse) TES parts (epilog). + */ +static bool si_shader_select_tes_parts(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct si_shader *shader, + struct pipe_debug_callback *debug) +{ + if (shader->key.tes.as_es) + return true; + + /* TES compiled as VS. */ + return si_get_vs_epilog(sscreen, tm, shader, debug, + &shader->key.tes.epilog); +} + +/** + * Compile the TCS epilog. This writes tesselation factors to memory based on + * the output primitive type of the tesselator (determined by TES). + */ +static bool si_compile_tcs_epilog(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct pipe_debug_callback *debug, + struct si_shader_part *out) +{ + union si_shader_part_key *key = &out->key; + struct si_shader shader = {}; + struct si_shader_context ctx; + struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm; + struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base; + LLVMTypeRef params[16]; + LLVMValueRef func; + int last_array_pointer, last_sgpr, num_params; + bool status = true; + + si_init_shader_ctx(&ctx, sscreen, &shader, tm); + ctx.type = TGSI_PROCESSOR_TESS_CTRL; + shader.key.tcs.epilog = key->tcs_epilog.states; + + /* Declare inputs. Only RW_BUFFERS and TESS_FACTOR_OFFSET are used. */ + params[SI_PARAM_RW_BUFFERS] = const_array(ctx.v16i8, SI_NUM_RW_BUFFERS); + last_array_pointer = SI_PARAM_RW_BUFFERS; + params[SI_PARAM_CONST_BUFFERS] = ctx.i64; + params[SI_PARAM_SAMPLERS] = ctx.i64; + params[SI_PARAM_UNUSED] = ctx.i64; + params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32; + params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32; + params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32; + params[SI_PARAM_TESS_FACTOR_OFFSET] = ctx.i32; + last_sgpr = SI_PARAM_TESS_FACTOR_OFFSET; + num_params = last_sgpr + 1; + + params[num_params++] = ctx.i32; /* patch index within the wave (REL_PATCH_ID) */ + params[num_params++] = ctx.i32; /* invocation ID within the patch */ + params[num_params++] = ctx.i32; /* LDS offset where tess factors should be loaded from */ + + /* Create the function. */ + si_create_function(&ctx, NULL, 0, params, num_params, + last_array_pointer, last_sgpr); + declare_tess_lds(&ctx); + func = ctx.radeon_bld.main_fn; + + si_write_tess_factors(bld_base, + LLVMGetParam(func, last_sgpr + 1), + LLVMGetParam(func, last_sgpr + 2), + LLVMGetParam(func, last_sgpr + 3)); + + /* Compile. */ + LLVMBuildRet(gallivm->builder, ctx.return_value); + radeon_llvm_finalize_module(&ctx.radeon_bld); + + if (si_compile_llvm(sscreen, &out->binary, &out->config, tm, + gallivm->module, debug, ctx.type, + "Tessellation Control Shader Epilog")) + status = false; + + radeon_llvm_dispose(&ctx.radeon_bld); + return status; +} + +/** + * Select and compile (or reuse) TCS parts (epilog). + */ +static bool si_shader_select_tcs_parts(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct si_shader *shader, + struct pipe_debug_callback *debug) +{ + union si_shader_part_key epilog_key; + + /* Get the epilog. */ + memset(&epilog_key, 0, sizeof(epilog_key)); + epilog_key.tcs_epilog.states = shader->key.tcs.epilog; + + shader->epilog = si_get_shader_part(sscreen, &sscreen->tcs_epilogs, + &epilog_key, tm, debug, + si_compile_tcs_epilog); + return shader->epilog != NULL; +} + +/** + * Compile the pixel shader prolog. This handles: + * - two-side color selection and interpolation + * - overriding interpolation parameters for the API PS + * - polygon stippling + * + * All preloaded SGPRs and VGPRs are passed through unmodified unless they are + * overriden by other states. (e.g. per-sample interpolation) + * Interpolated colors are stored after the preloaded VGPRs. + */ +static bool si_compile_ps_prolog(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct pipe_debug_callback *debug, + struct si_shader_part *out) +{ + union si_shader_part_key *key = &out->key; + struct si_shader shader = {}; + struct si_shader_context ctx; + struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm; + LLVMTypeRef *params; + LLVMValueRef ret, func; + int last_sgpr, num_params, num_returns, i, num_color_channels; + bool status = true; + + si_init_shader_ctx(&ctx, sscreen, &shader, tm); + ctx.type = TGSI_PROCESSOR_FRAGMENT; + shader.key.ps.prolog = key->ps_prolog.states; + + /* Number of inputs + 8 color elements. */ + params = alloca((key->ps_prolog.num_input_sgprs + + key->ps_prolog.num_input_vgprs + 8) * + sizeof(LLVMTypeRef)); + + /* Declare inputs. */ + num_params = 0; + for (i = 0; i < key->ps_prolog.num_input_sgprs; i++) + params[num_params++] = ctx.i32; + last_sgpr = num_params - 1; + + for (i = 0; i < key->ps_prolog.num_input_vgprs; i++) + params[num_params++] = ctx.f32; + + /* Declare outputs (same as inputs + add colors if needed) */ + num_returns = num_params; + num_color_channels = util_bitcount(key->ps_prolog.colors_read); + for (i = 0; i < num_color_channels; i++) + params[num_returns++] = ctx.f32; + + /* Create the function. */ + si_create_function(&ctx, params, num_returns, params, + num_params, -1, last_sgpr); + func = ctx.radeon_bld.main_fn; + + /* Copy inputs to outputs. This should be no-op, as the registers match, + * but it will prevent the compiler from overwriting them unintentionally. + */ + ret = ctx.return_value; + for (i = 0; i < num_params; i++) { + LLVMValueRef p = LLVMGetParam(func, i); + ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, ""); + } + + /* Polygon stippling. */ + if (key->ps_prolog.states.poly_stipple) { + /* POS_FIXED_PT is always last. */ + unsigned pos = key->ps_prolog.num_input_sgprs + + key->ps_prolog.num_input_vgprs - 1; + LLVMValueRef ptr[2], views; + + /* Get the pointer to sampler views. */ + ptr[0] = LLVMGetParam(func, SI_SGPR_SAMPLERS); + ptr[1] = LLVMGetParam(func, SI_SGPR_SAMPLERS+1); + views = lp_build_gather_values(gallivm, ptr, 2); + views = LLVMBuildBitCast(gallivm->builder, views, ctx.i64, ""); + views = LLVMBuildIntToPtr(gallivm->builder, views, + const_array(ctx.v8i32, SI_NUM_SAMPLERS), ""); + + si_llvm_emit_polygon_stipple(&ctx, views, pos); + } + + /* Interpolate colors. */ + for (i = 0; i < 2; i++) { + unsigned writemask = (key->ps_prolog.colors_read >> (i * 4)) & 0xf; + unsigned face_vgpr = key->ps_prolog.num_input_sgprs + + key->ps_prolog.face_vgpr_index; + LLVMValueRef interp[2], color[4]; + LLVMValueRef interp_ij = NULL, prim_mask = NULL, face = NULL; + + if (!writemask) + continue; + + /* If the interpolation qualifier is not CONSTANT (-1). */ + if (key->ps_prolog.color_interp_vgpr_index[i] != -1) { + unsigned interp_vgpr = key->ps_prolog.num_input_sgprs + + key->ps_prolog.color_interp_vgpr_index[i]; + + interp[0] = LLVMGetParam(func, interp_vgpr); + interp[1] = LLVMGetParam(func, interp_vgpr + 1); + interp_ij = lp_build_gather_values(gallivm, interp, 2); + interp_ij = LLVMBuildBitCast(gallivm->builder, interp_ij, + ctx.v2i32, ""); + } + + /* Use the absolute location of the input. */ + prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR); + + if (key->ps_prolog.states.color_two_side) { + face = LLVMGetParam(func, face_vgpr); + face = LLVMBuildBitCast(gallivm->builder, face, ctx.i32, ""); + } + + interp_fs_input(&ctx, + key->ps_prolog.color_attr_index[i], + TGSI_SEMANTIC_COLOR, i, + key->ps_prolog.num_interp_inputs, + key->ps_prolog.colors_read, interp_ij, + prim_mask, face, color); + + while (writemask) { + unsigned chan = u_bit_scan(&writemask); + ret = LLVMBuildInsertValue(gallivm->builder, ret, color[chan], + num_params++, ""); + } + } + + /* Force per-sample interpolation. */ + if (key->ps_prolog.states.force_persample_interp) { + unsigned i, base = key->ps_prolog.num_input_sgprs; + LLVMValueRef persp_sample[2], linear_sample[2]; + + /* Read PERSP_SAMPLE. */ + for (i = 0; i < 2; i++) + persp_sample[i] = LLVMGetParam(func, base + i); + /* Overwrite PERSP_CENTER. */ + for (i = 0; i < 2; i++) + ret = LLVMBuildInsertValue(gallivm->builder, ret, + persp_sample[i], base + 2 + i, ""); + /* Overwrite PERSP_CENTROID. */ + for (i = 0; i < 2; i++) + ret = LLVMBuildInsertValue(gallivm->builder, ret, + persp_sample[i], base + 4 + i, ""); + /* Read LINEAR_SAMPLE. */ + for (i = 0; i < 2; i++) + linear_sample[i] = LLVMGetParam(func, base + 6 + i); + /* Overwrite LINEAR_CENTER. */ + for (i = 0; i < 2; i++) + ret = LLVMBuildInsertValue(gallivm->builder, ret, + linear_sample[i], base + 8 + i, ""); + /* Overwrite LINEAR_CENTROID. */ + for (i = 0; i < 2; i++) + ret = LLVMBuildInsertValue(gallivm->builder, ret, + linear_sample[i], base + 10 + i, ""); + } + + /* Compile. */ + LLVMBuildRet(gallivm->builder, ret); + radeon_llvm_finalize_module(&ctx.radeon_bld); + + if (si_compile_llvm(sscreen, &out->binary, &out->config, tm, + gallivm->module, debug, ctx.type, + "Fragment Shader Prolog")) + status = false; + + radeon_llvm_dispose(&ctx.radeon_bld); + return status; +} + +/** + * Compile the pixel shader epilog. This handles everything that must be + * emulated for pixel shader exports. (alpha-test, format conversions, etc) + */ +static bool si_compile_ps_epilog(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct pipe_debug_callback *debug, + struct si_shader_part *out) +{ + union si_shader_part_key *key = &out->key; + struct si_shader shader = {}; + struct si_shader_context ctx; + struct gallivm_state *gallivm = &ctx.radeon_bld.gallivm; + struct lp_build_tgsi_context *bld_base = &ctx.radeon_bld.soa.bld_base; + LLVMTypeRef params[16+8*4+3]; + LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; + int last_array_pointer, last_sgpr, num_params, i; + bool status = true; + + si_init_shader_ctx(&ctx, sscreen, &shader, tm); + ctx.type = TGSI_PROCESSOR_FRAGMENT; + shader.key.ps.epilog = key->ps_epilog.states; + + /* Declare input SGPRs. */ + params[SI_PARAM_RW_BUFFERS] = ctx.i64; + params[SI_PARAM_CONST_BUFFERS] = ctx.i64; + params[SI_PARAM_SAMPLERS] = ctx.i64; + params[SI_PARAM_UNUSED] = ctx.i64; + params[SI_PARAM_ALPHA_REF] = ctx.f32; + last_array_pointer = -1; + last_sgpr = SI_PARAM_ALPHA_REF; + + /* Declare input VGPRs. */ + num_params = (last_sgpr + 1) + + util_bitcount(key->ps_epilog.colors_written) * 4 + + key->ps_epilog.writes_z + + key->ps_epilog.writes_stencil + + key->ps_epilog.writes_samplemask; + + num_params = MAX2(num_params, + last_sgpr + 1 + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1); + + assert(num_params <= ARRAY_SIZE(params)); + + for (i = last_sgpr + 1; i < num_params; i++) + params[i] = ctx.f32; + + /* Create the function. */ + si_create_function(&ctx, NULL, 0, params, num_params, + last_array_pointer, last_sgpr); + /* Disable elimination of unused inputs. */ + radeon_llvm_add_attribute(ctx.radeon_bld.main_fn, + "InitialPSInputAddr", 0xffffff); + + /* Process colors. */ + unsigned vgpr = last_sgpr + 1; + unsigned colors_written = key->ps_epilog.colors_written; + int last_color_export = -1; + + /* Find the last color export. */ + if (!key->ps_epilog.writes_z && + !key->ps_epilog.writes_stencil && + !key->ps_epilog.writes_samplemask) { + unsigned spi_format = key->ps_epilog.states.spi_shader_col_format; + + /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ + if (colors_written == 0x1 && key->ps_epilog.states.last_cbuf > 0) { + /* Just set this if any of the colorbuffers are enabled. */ + if (spi_format & + ((1llu << (4 * (key->ps_epilog.states.last_cbuf + 1))) - 1)) + last_color_export = 0; + } else { + for (i = 0; i < 8; i++) + if (colors_written & (1 << i) && + (spi_format >> (i * 4)) & 0xf) + last_color_export = i; + } + } + + while (colors_written) { + LLVMValueRef color[4]; + int mrt = u_bit_scan(&colors_written); + + for (i = 0; i < 4; i++) + color[i] = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++); + + si_export_mrt_color(bld_base, color, mrt, + num_params - 1, + mrt == last_color_export); + } + + /* Process depth, stencil, samplemask. */ + if (key->ps_epilog.writes_z) + depth = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++); + if (key->ps_epilog.writes_stencil) + stencil = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++); + if (key->ps_epilog.writes_samplemask) + samplemask = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++); + + if (depth || stencil || samplemask) + si_export_mrt_z(bld_base, depth, stencil, samplemask); + else if (last_color_export == -1) + si_export_null(bld_base); + + /* Compile. */ + LLVMBuildRetVoid(gallivm->builder); + radeon_llvm_finalize_module(&ctx.radeon_bld); + + if (si_compile_llvm(sscreen, &out->binary, &out->config, tm, + gallivm->module, debug, ctx.type, + "Fragment Shader Epilog")) + status = false; + + radeon_llvm_dispose(&ctx.radeon_bld); + return status; +} + +/** + * Select and compile (or reuse) pixel shader parts (prolog & epilog). + */ +static bool si_shader_select_ps_parts(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct si_shader *shader, + struct pipe_debug_callback *debug) +{ + struct tgsi_shader_info *info = &shader->selector->info; + union si_shader_part_key prolog_key; + union si_shader_part_key epilog_key; + unsigned i; + + /* Get the prolog. */ + memset(&prolog_key, 0, sizeof(prolog_key)); + prolog_key.ps_prolog.states = shader->key.ps.prolog; + prolog_key.ps_prolog.colors_read = info->colors_read; + prolog_key.ps_prolog.num_input_sgprs = shader->info.num_input_sgprs; + prolog_key.ps_prolog.num_input_vgprs = shader->info.num_input_vgprs; + + if (info->colors_read) { + unsigned *color = shader->selector->color_attr_index; + + if (shader->key.ps.prolog.color_two_side) { + /* BCOLORs are stored after the last input. */ + prolog_key.ps_prolog.num_interp_inputs = info->num_inputs; + prolog_key.ps_prolog.face_vgpr_index = shader->info.face_vgpr_index; + shader->config.spi_ps_input_ena |= S_0286CC_FRONT_FACE_ENA(1); + } + + for (i = 0; i < 2; i++) { + unsigned location = info->input_interpolate_loc[color[i]]; + + if (!(info->colors_read & (0xf << i*4))) + continue; + + prolog_key.ps_prolog.color_attr_index[i] = color[i]; + + /* Force per-sample interpolation for the colors here. */ + if (shader->key.ps.prolog.force_persample_interp) + location = TGSI_INTERPOLATE_LOC_SAMPLE; + + switch (info->input_interpolate[color[i]]) { + case TGSI_INTERPOLATE_CONSTANT: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = -1; + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + case TGSI_INTERPOLATE_COLOR: + switch (location) { + case TGSI_INTERPOLATE_LOC_SAMPLE: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = 0; + shader->config.spi_ps_input_ena |= + S_0286CC_PERSP_SAMPLE_ENA(1); + break; + case TGSI_INTERPOLATE_LOC_CENTER: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = 2; + shader->config.spi_ps_input_ena |= + S_0286CC_PERSP_CENTER_ENA(1); + break; + case TGSI_INTERPOLATE_LOC_CENTROID: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = 4; + shader->config.spi_ps_input_ena |= + S_0286CC_PERSP_CENTROID_ENA(1); + break; + default: + assert(0); + } + break; + case TGSI_INTERPOLATE_LINEAR: + switch (location) { + case TGSI_INTERPOLATE_LOC_SAMPLE: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = 6; + shader->config.spi_ps_input_ena |= + S_0286CC_LINEAR_SAMPLE_ENA(1); + break; + case TGSI_INTERPOLATE_LOC_CENTER: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = 8; + shader->config.spi_ps_input_ena |= + S_0286CC_LINEAR_CENTER_ENA(1); + break; + case TGSI_INTERPOLATE_LOC_CENTROID: + prolog_key.ps_prolog.color_interp_vgpr_index[i] = 10; + shader->config.spi_ps_input_ena |= + S_0286CC_LINEAR_CENTROID_ENA(1); + break; + default: + assert(0); + } + break; + default: + assert(0); + } + } + } + + /* The prolog is a no-op if these aren't set. */ + if (prolog_key.ps_prolog.colors_read || + prolog_key.ps_prolog.states.force_persample_interp || + prolog_key.ps_prolog.states.poly_stipple) { + shader->prolog = + si_get_shader_part(sscreen, &sscreen->ps_prologs, + &prolog_key, tm, debug, + si_compile_ps_prolog); + if (!shader->prolog) + return false; + } + + /* Get the epilog. */ + memset(&epilog_key, 0, sizeof(epilog_key)); + epilog_key.ps_epilog.colors_written = info->colors_written; + epilog_key.ps_epilog.writes_z = info->writes_z; + epilog_key.ps_epilog.writes_stencil = info->writes_stencil; + epilog_key.ps_epilog.writes_samplemask = info->writes_samplemask; + epilog_key.ps_epilog.states = shader->key.ps.epilog; + + shader->epilog = + si_get_shader_part(sscreen, &sscreen->ps_epilogs, + &epilog_key, tm, debug, + si_compile_ps_epilog); + if (!shader->epilog) + return false; + + /* Enable POS_FIXED_PT if polygon stippling is enabled. */ + if (shader->key.ps.prolog.poly_stipple) { + shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1); + assert(G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr)); + } + + /* Set up the enable bits for per-sample shading if needed. */ + if (shader->key.ps.prolog.force_persample_interp) { + if (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_ena) || + G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena)) { + shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTER_ENA; + shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA; + shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1); + } + if (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_ena) || + G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena)) { + shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTER_ENA; + shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA; + shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1); + } + } + + /* POW_W_FLOAT requires that one of the perspective weights is enabled. */ + if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) && + !(shader->config.spi_ps_input_ena & 0xf)) { + shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1); + assert(G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr)); + } + + /* At least one pair of interpolation weights must be enabled. */ + if (!(shader->config.spi_ps_input_ena & 0x7f)) { + shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1); + assert(G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr)); + } + + /* The sample mask input is always enabled, because the API shader always + * passes it through to the epilog. Disable it here if it's unused. + */ + if (!shader->key.ps.epilog.poly_line_smoothing && + !shader->selector->info.reads_samplemask) + shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA; + + return true; +} + +int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, + struct si_shader *shader, + struct pipe_debug_callback *debug) +{ + struct si_shader *mainp = shader->selector->main_shader_part; + int r; + + /* LS and ES are always compiled on demand. */ + if (!mainp || + (shader->selector->type == PIPE_SHADER_VERTEX && + (shader->key.vs.as_es || shader->key.vs.as_ls)) || + (shader->selector->type == PIPE_SHADER_TESS_EVAL && + shader->key.tes.as_es)) { + /* Monolithic shader (compiled as a whole, has many variants, + * may take a long time to compile). + */ + r = si_compile_tgsi_shader(sscreen, tm, shader, true, debug); + if (r) + return r; + } else { + /* The shader consists of 2-3 parts: + * + * - the middle part is the user shader, it has 1 variant only + * and it was compiled during the creation of the shader + * selector + * - the prolog part is inserted at the beginning + * - the epilog part is inserted at the end + * + * The prolog and epilog have many (but simple) variants. + */ + + /* Copy the compiled TGSI shader data over. */ + shader->is_binary_shared = true; + shader->binary = mainp->binary; + shader->config = mainp->config; + shader->info.num_input_sgprs = mainp->info.num_input_sgprs; + shader->info.num_input_vgprs = mainp->info.num_input_vgprs; + shader->info.face_vgpr_index = mainp->info.face_vgpr_index; + memcpy(shader->info.vs_output_param_offset, + mainp->info.vs_output_param_offset, + sizeof(mainp->info.vs_output_param_offset)); + shader->info.uses_instanceid = mainp->info.uses_instanceid; + shader->info.nr_pos_exports = mainp->info.nr_pos_exports; + shader->info.nr_param_exports = mainp->info.nr_param_exports; + + /* Select prologs and/or epilogs. */ + switch (shader->selector->type) { + case PIPE_SHADER_VERTEX: + if (!si_shader_select_vs_parts(sscreen, tm, shader, debug)) + return -1; + break; + case PIPE_SHADER_TESS_CTRL: + if (!si_shader_select_tcs_parts(sscreen, tm, shader, debug)) + return -1; + break; + case PIPE_SHADER_TESS_EVAL: + if (!si_shader_select_tes_parts(sscreen, tm, shader, debug)) + return -1; + break; + case PIPE_SHADER_FRAGMENT: + if (!si_shader_select_ps_parts(sscreen, tm, shader, debug)) + return -1; + + /* Make sure we have at least as many VGPRs as there + * are allocated inputs. + */ + shader->config.num_vgprs = MAX2(shader->config.num_vgprs, + shader->info.num_input_vgprs); + break; + } + + /* Update SGPR and VGPR counts. */ + if (shader->prolog) { + shader->config.num_sgprs = MAX2(shader->config.num_sgprs, + shader->prolog->config.num_sgprs); + shader->config.num_vgprs = MAX2(shader->config.num_vgprs, + shader->prolog->config.num_vgprs); + } + if (shader->epilog) { + shader->config.num_sgprs = MAX2(shader->config.num_sgprs, + shader->epilog->config.num_sgprs); + shader->config.num_vgprs = MAX2(shader->config.num_vgprs, + shader->epilog->config.num_vgprs); + } + } + + si_shader_dump(sscreen, shader, debug, shader->selector->info.processor); + + /* Upload. */ + r = si_shader_binary_upload(sscreen, shader); + if (r) { + fprintf(stderr, "LLVM failed to upload shader\n"); + return r; + } + + return 0; +} + void si_shader_destroy(struct si_shader *shader) { if (shader->gs_copy_shader) { @@ -4534,5 +5995,6 @@ void si_shader_destroy(struct si_shader *shader) r600_resource_reference(&shader->bo, NULL); - radeon_shader_binary_clean(&shader->binary); + if (!shader->is_binary_shared) + radeon_shader_binary_clean(&shader->binary); } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index dc75e0330e4..ff5c24d8918 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -75,6 +75,8 @@ struct radeon_shader_binary; struct radeon_shader_reloc; +#define SI_MAX_VS_OUTPUTS 40 + #define SI_SGPR_RW_BUFFERS 0 /* rings (& stream-out, VS only) */ #define SI_SGPR_CONST_BUFFERS 2 #define SI_SGPR_SAMPLERS 4 /* images & sampler states interleaved */ @@ -169,7 +171,7 @@ struct radeon_shader_reloc; #define SI_PARAM_SAMPLE_COVERAGE 20 #define SI_PARAM_POS_FIXED_PT 21 -#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1) +#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 9) /* +8 for COLOR[0..1] */ struct si_shader; @@ -181,6 +183,11 @@ struct si_shader_selector { struct si_shader *first_variant; /* immutable after the first variant */ struct si_shader *last_variant; /* mutable */ + /* The compiled TGSI shader expecting a prolog and/or epilog (not + * uploaded to a buffer). + */ + struct si_shader *main_shader_part; + struct tgsi_token *tokens; struct pipe_stream_output_info so; struct tgsi_shader_info info; @@ -199,6 +206,7 @@ struct si_shader_selector { unsigned max_gsvs_emit_size; /* PS parameters. */ + unsigned color_attr_index[2]; unsigned db_shader_control; /* Set 0xf or 0x0 (4 bits) per each written output. * ANDed with spi_shader_col_format. @@ -221,37 +229,103 @@ struct si_shader_selector { * With both: LS | HS | ES | GS | VS | PS */ +/* Common VS bits between the shader key and the prolog key. */ +struct si_vs_prolog_bits { + unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS]; +}; + +/* Common VS bits between the shader key and the epilog key. */ +struct si_vs_epilog_bits { + unsigned export_prim_id:1; /* when PS needs it and GS is disabled */ + /* TODO: + * - skip clipdist, culldist (including clipvertex code) exports based + * on which clip_plane_enable bits are set + * - skip layer, viewport, clipdist, and culldist parameter exports + * if PS doesn't read them + */ +}; + +/* Common TCS bits between the shader key and the epilog key. */ +struct si_tcs_epilog_bits { + unsigned prim_mode:3; +}; + +/* Common PS bits between the shader key and the prolog key. */ +struct si_ps_prolog_bits { + unsigned color_two_side:1; + /* TODO: add a flatshade bit that skips interpolation for colors */ + unsigned poly_stipple:1; + unsigned force_persample_interp:1; + /* TODO: + * - add force_center_interp if MSAA is disabled and centroid or + * sample are present + * - add force_center_interp_bc_optimize to force center interpolation + * based on the bc_optimize SGPR bit if MSAA is enabled, centroid is + * present and sample isn't present. + */ +}; + +/* Common PS bits between the shader key and the epilog key. */ +struct si_ps_epilog_bits { + unsigned spi_shader_col_format; + unsigned color_is_int8:8; + unsigned last_cbuf:3; + unsigned alpha_func:3; + unsigned alpha_to_one:1; + unsigned poly_line_smoothing:1; + unsigned clamp_color:1; +}; + +union si_shader_part_key { + struct { + struct si_vs_prolog_bits states; + unsigned num_input_sgprs:5; + unsigned last_input:4; + } vs_prolog; + struct { + struct si_vs_epilog_bits states; + unsigned prim_id_param_offset:5; + } vs_epilog; + struct { + struct si_tcs_epilog_bits states; + } tcs_epilog; + struct { + struct si_ps_prolog_bits states; + unsigned num_input_sgprs:5; + unsigned num_input_vgprs:5; + /* Color interpolation and two-side color selection. */ + unsigned colors_read:8; /* color input components read */ + unsigned num_interp_inputs:5; /* BCOLOR is at this location */ + unsigned face_vgpr_index:5; + char color_attr_index[2]; + char color_interp_vgpr_index[2]; /* -1 == constant */ + } ps_prolog; + struct { + struct si_ps_epilog_bits states; + unsigned colors_written:8; + unsigned writes_z:1; + unsigned writes_stencil:1; + unsigned writes_samplemask:1; + } ps_epilog; +}; + union si_shader_key { struct { - unsigned spi_shader_col_format; - unsigned color_is_int8:8; - unsigned last_cbuf:3; - unsigned color_two_side:1; - unsigned alpha_func:3; - unsigned alpha_to_one:1; - unsigned poly_stipple:1; - unsigned poly_line_smoothing:1; - unsigned clamp_color:1; - unsigned force_persample_interp:1; + struct si_ps_prolog_bits prolog; + struct si_ps_epilog_bits epilog; } ps; struct { - unsigned instance_divisors[SI_NUM_VERTEX_BUFFERS]; - /* Mask of "get_unique_index" bits - which outputs are read - * by the next stage (needed by ES). - * This describes how outputs are laid out in memory. */ + struct si_vs_prolog_bits prolog; + struct si_vs_epilog_bits epilog; unsigned as_es:1; /* export shader */ unsigned as_ls:1; /* local shader */ - unsigned export_prim_id:1; /* when PS needs it and GS is disabled */ } vs; struct { - unsigned prim_mode:3; + struct si_tcs_epilog_bits epilog; } tcs; /* tessellation control shader */ struct { - /* Mask of "get_unique_index" bits - which outputs are read - * by the next stage (needed by ES). - * This describes how outputs are laid out in memory. */ + struct si_vs_epilog_bits epilog; /* same as VS */ unsigned as_es:1; /* export shader */ - unsigned export_prim_id:1; /* when PS needs it and GS is disabled */ } tes; /* tessellation evaluation shader */ }; @@ -267,22 +341,42 @@ struct si_shader_config { unsigned rsrc2; }; +/* GCN-specific shader info. */ +struct si_shader_info { + ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS]; + ubyte num_input_sgprs; + ubyte num_input_vgprs; + char face_vgpr_index; + bool uses_instanceid; + ubyte nr_pos_exports; + ubyte nr_param_exports; +}; + struct si_shader { struct si_shader_selector *selector; struct si_shader *next_variant; + struct si_shader_part *prolog; + struct si_shader_part *epilog; + struct si_shader *gs_copy_shader; struct si_pm4_state *pm4; struct r600_resource *bo; struct r600_resource *scratch_bo; union si_shader_key key; + bool is_binary_shared; + + /* The following data is all that's needed for binary shaders. */ struct radeon_shader_binary binary; struct si_shader_config config; + struct si_shader_info info; +}; - unsigned vs_output_param_offset[PIPE_MAX_SHADER_OUTPUTS]; - bool uses_instanceid; - unsigned nr_pos_exports; - unsigned nr_param_exports; +struct si_shader_part { + struct si_shader_part *next; + union si_shader_part_key key; + struct radeon_shader_binary binary; + struct si_shader_config config; }; static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx) @@ -310,14 +404,19 @@ static inline struct si_shader* si_get_vs_state(struct si_context *sctx) static inline bool si_vs_exports_prim_id(struct si_shader *shader) { if (shader->selector->type == PIPE_SHADER_VERTEX) - return shader->key.vs.export_prim_id; + return shader->key.vs.epilog.export_prim_id; else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) - return shader->key.tes.export_prim_id; + return shader->key.tes.epilog.export_prim_id; else return false; } -/* radeonsi_shader.c */ +/* si_shader.c */ +int si_compile_tgsi_shader(struct si_screen *sscreen, + LLVMTargetMachineRef tm, + struct si_shader *shader, + bool is_monolithic, + struct pipe_debug_callback *debug); int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, struct si_shader *shader, struct pipe_debug_callback *debug); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index bf780777b50..2dfdbeb8d8f 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -277,7 +277,7 @@ static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *a if (sctx->b.family == CHIP_STONEY) { unsigned spi_shader_col_format = sctx->ps_shader.cso ? - sctx->ps_shader.current->key.ps.spi_shader_col_format : 0; + sctx->ps_shader.current->key.ps.epilog.spi_shader_col_format : 0; unsigned sx_ps_downconvert = 0; unsigned sx_blend_opt_epsilon = 0; unsigned sx_blend_opt_control = 0; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index f64c4d45f1b..40792cbc1d5 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -280,6 +280,8 @@ si_create_sampler_view_custom(struct pipe_context *ctx, /* si_state_shader.c */ bool si_update_shaders(struct si_context *sctx); void si_init_shader_functions(struct si_context *sctx); +bool si_init_shader_cache(struct si_screen *sscreen); +void si_destroy_shader_cache(struct si_screen *sscreen); /* si_state_draw.c */ void si_emit_cache_flush(struct si_context *sctx, struct r600_atom *atom); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 77a4e47c809..a6753a7a528 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -32,10 +32,221 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_ureg.h" +#include "util/hash_table.h" +#include "util/u_hash.h" #include "util/u_memory.h" #include "util/u_prim.h" #include "util/u_simple_shaders.h" +/* SHADER_CACHE */ + +/** + * Return the TGSI binary in a buffer. The first 4 bytes contain its size as + * integer. + */ +static void *si_get_tgsi_binary(struct si_shader_selector *sel) +{ + unsigned tgsi_size = tgsi_num_tokens(sel->tokens) * + sizeof(struct tgsi_token); + unsigned size = 4 + tgsi_size + sizeof(sel->so); + char *result = (char*)MALLOC(size); + + if (!result) + return NULL; + + *((uint32_t*)result) = size; + memcpy(result + 4, sel->tokens, tgsi_size); + memcpy(result + 4 + tgsi_size, &sel->so, sizeof(sel->so)); + return result; +} + +/** Copy "data" to "ptr" and return the next dword following copied data. */ +static uint32_t *write_data(uint32_t *ptr, const void *data, unsigned size) +{ + memcpy(ptr, data, size); + ptr += DIV_ROUND_UP(size, 4); + return ptr; +} + +/** Read data from "ptr". Return the next dword following the data. */ +static uint32_t *read_data(uint32_t *ptr, void *data, unsigned size) +{ + memcpy(data, ptr, size); + ptr += DIV_ROUND_UP(size, 4); + return ptr; +} + +/** + * Write the size as uint followed by the data. Return the next dword + * following the copied data. + */ +static uint32_t *write_chunk(uint32_t *ptr, const void *data, unsigned size) +{ + *ptr++ = size; + return write_data(ptr, data, size); +} + +/** + * Read the size as uint followed by the data. Return both via parameters. + * Return the next dword following the data. + */ +static uint32_t *read_chunk(uint32_t *ptr, void **data, unsigned *size) +{ + *size = *ptr++; + assert(*data == NULL); + *data = malloc(*size); + return read_data(ptr, *data, *size); +} + +/** + * Return the shader binary in a buffer. The first 4 bytes contain its size + * as integer. + */ +static void *si_get_shader_binary(struct si_shader *shader) +{ + /* There is always a size of data followed by the data itself. */ + unsigned relocs_size = shader->binary.reloc_count * + sizeof(shader->binary.relocs[0]); + unsigned disasm_size = strlen(shader->binary.disasm_string) + 1; + unsigned size = + 4 + /* total size */ + 4 + /* CRC32 of the data below */ + align(sizeof(shader->config), 4) + + align(sizeof(shader->info), 4) + + 4 + align(shader->binary.code_size, 4) + + 4 + align(shader->binary.rodata_size, 4) + + 4 + align(relocs_size, 4) + + 4 + align(disasm_size, 4); + void *buffer = CALLOC(1, size); + uint32_t *ptr = (uint32_t*)buffer; + + if (!buffer) + return NULL; + + *ptr++ = size; + ptr++; /* CRC32 is calculated at the end. */ + + ptr = write_data(ptr, &shader->config, sizeof(shader->config)); + ptr = write_data(ptr, &shader->info, sizeof(shader->info)); + ptr = write_chunk(ptr, shader->binary.code, shader->binary.code_size); + ptr = write_chunk(ptr, shader->binary.rodata, shader->binary.rodata_size); + ptr = write_chunk(ptr, shader->binary.relocs, relocs_size); + ptr = write_chunk(ptr, shader->binary.disasm_string, disasm_size); + assert((char *)ptr - (char *)buffer == size); + + /* Compute CRC32. */ + ptr = (uint32_t*)buffer; + ptr++; + *ptr = util_hash_crc32(ptr + 1, size - 8); + + return buffer; +} + +static bool si_load_shader_binary(struct si_shader *shader, void *binary) +{ + uint32_t *ptr = (uint32_t*)binary; + uint32_t size = *ptr++; + uint32_t crc32 = *ptr++; + unsigned chunk_size; + + if (util_hash_crc32(ptr, size - 8) != crc32) { + fprintf(stderr, "radeonsi: binary shader has invalid CRC32\n"); + return false; + } + + ptr = read_data(ptr, &shader->config, sizeof(shader->config)); + ptr = read_data(ptr, &shader->info, sizeof(shader->info)); + ptr = read_chunk(ptr, (void**)&shader->binary.code, + &shader->binary.code_size); + ptr = read_chunk(ptr, (void**)&shader->binary.rodata, + &shader->binary.rodata_size); + ptr = read_chunk(ptr, (void**)&shader->binary.relocs, &chunk_size); + shader->binary.reloc_count = chunk_size / sizeof(shader->binary.relocs[0]); + ptr = read_chunk(ptr, (void**)&shader->binary.disasm_string, &chunk_size); + + return true; +} + +/** + * Insert a shader into the cache. It's assumed the shader is not in the cache. + * Use si_shader_cache_load_shader before calling this. + * + * Returns false on failure, in which case the tgsi_binary should be freed. + */ +static bool si_shader_cache_insert_shader(struct si_screen *sscreen, + void *tgsi_binary, + struct si_shader *shader) +{ + void *hw_binary = si_get_shader_binary(shader); + + if (!hw_binary) + return false; + + if (_mesa_hash_table_insert(sscreen->shader_cache, tgsi_binary, + hw_binary) == NULL) { + FREE(hw_binary); + return false; + } + + return true; +} + +static bool si_shader_cache_load_shader(struct si_screen *sscreen, + void *tgsi_binary, + struct si_shader *shader) +{ + struct hash_entry *entry = + _mesa_hash_table_search(sscreen->shader_cache, tgsi_binary); + if (!entry) + return false; + + return si_load_shader_binary(shader, entry->data); +} + +static uint32_t si_shader_cache_key_hash(const void *key) +{ + /* The first dword is the key size. */ + return util_hash_crc32(key, *(uint32_t*)key); +} + +static bool si_shader_cache_key_equals(const void *a, const void *b) +{ + uint32_t *keya = (uint32_t*)a; + uint32_t *keyb = (uint32_t*)b; + + /* The first dword is the key size. */ + if (*keya != *keyb) + return false; + + return memcmp(keya, keyb, *keya) == 0; +} + +static void si_destroy_shader_cache_entry(struct hash_entry *entry) +{ + FREE((void*)entry->key); + FREE(entry->data); +} + +bool si_init_shader_cache(struct si_screen *sscreen) +{ + pipe_mutex_init(sscreen->shader_cache_mutex); + sscreen->shader_cache = + _mesa_hash_table_create(NULL, + si_shader_cache_key_hash, + si_shader_cache_key_equals); + return sscreen->shader_cache != NULL; +} + +void si_destroy_shader_cache(struct si_screen *sscreen) +{ + if (sscreen->shader_cache) + _mesa_hash_table_destroy(sscreen->shader_cache, + si_destroy_shader_cache_entry); + pipe_mutex_destroy(sscreen->shader_cache_mutex); +} + +/* SHADER STATES */ + static void si_set_tesseval_regs(struct si_shader *shader, struct si_pm4_state *pm4) { @@ -108,7 +319,7 @@ static void si_shader_ls(struct si_shader *shader) /* We need at least 2 components for LS. * VGPR0-3: (VertexID, RelAutoindex, ???, InstanceID). */ - vgpr_comp_cnt = shader->uses_instanceid ? 3 : 1; + vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 1; num_user_sgprs = SI_LS_NUM_USER_SGPR; num_sgprs = shader->config.num_sgprs; @@ -181,7 +392,7 @@ static void si_shader_es(struct si_shader *shader) si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_USER_SHADER); if (shader->selector->type == PIPE_SHADER_VERTEX) { - vgpr_comp_cnt = shader->uses_instanceid ? 3 : 0; + vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : 0; num_user_sgprs = SI_ES_NUM_USER_SGPR; } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) { vgpr_comp_cnt = 3; /* all components are needed for TES */ @@ -347,7 +558,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) vgpr_comp_cnt = 0; /* only VertexID is needed for GS-COPY. */ num_user_sgprs = SI_GSCOPY_NUM_USER_SGPR; } else if (shader->selector->type == PIPE_SHADER_VERTEX) { - vgpr_comp_cnt = shader->uses_instanceid ? 3 : (enable_prim_id ? 2 : 0); + vgpr_comp_cnt = shader->info.uses_instanceid ? 3 : (enable_prim_id ? 2 : 0); num_user_sgprs = SI_VS_NUM_USER_SGPR; } else if (shader->selector->type == PIPE_SHADER_TESS_EVAL) { vgpr_comp_cnt = 3; /* all components are needed for TES */ @@ -363,19 +574,19 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs) assert(num_sgprs <= 104); /* VS is required to export at least one param. */ - nparams = MAX2(shader->nr_param_exports, 1); + nparams = MAX2(shader->info.nr_param_exports, 1); si_pm4_set_reg(pm4, R_0286C4_SPI_VS_OUT_CONFIG, S_0286C4_VS_EXPORT_COUNT(nparams - 1)); si_pm4_set_reg(pm4, R_02870C_SPI_SHADER_POS_FORMAT, S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | - S_02870C_POS1_EXPORT_FORMAT(shader->nr_pos_exports > 1 ? + S_02870C_POS1_EXPORT_FORMAT(shader->info.nr_pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) | - S_02870C_POS2_EXPORT_FORMAT(shader->nr_pos_exports > 2 ? + S_02870C_POS2_EXPORT_FORMAT(shader->info.nr_pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE) | - S_02870C_POS3_EXPORT_FORMAT(shader->nr_pos_exports > 3 ? + S_02870C_POS3_EXPORT_FORMAT(shader->info.nr_pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP : V_02870C_SPI_SHADER_NONE)); @@ -415,7 +626,7 @@ static unsigned si_get_ps_num_interp(struct si_shader *ps) unsigned num_colors = !!(info->colors_read & 0x0f) + !!(info->colors_read & 0xf0); unsigned num_interp = ps->selector->info.num_inputs + - (ps->key.ps.color_two_side ? num_colors : 0); + (ps->key.ps.prolog.color_two_side ? num_colors : 0); assert(num_interp <= 32); return MIN2(num_interp, 32); @@ -423,7 +634,7 @@ static unsigned si_get_ps_num_interp(struct si_shader *ps) static unsigned si_get_spi_shader_col_format(struct si_shader *shader) { - unsigned value = shader->key.ps.spi_shader_col_format; + unsigned value = shader->key.ps.epilog.spi_shader_col_format; unsigned i, num_targets = (util_last_bit(value) + 3) / 4; /* If the i-th target format is set, all previous target formats must @@ -528,7 +739,7 @@ static void si_shader_ps(struct si_shader *shader) if (!spi_shader_col_format && !info->writes_z && !info->writes_stencil && !info->writes_samplemask && (shader->selector->info.uses_kill || - shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)) + shader->key.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS)) spi_shader_col_format = V_028714_SPI_SHADER_32_R; si_pm4_set_reg(pm4, R_0286CC_SPI_PS_INPUT_ENA, input_ena); @@ -638,11 +849,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, switch (sel->type) { case PIPE_SHADER_VERTEX: - if (sctx->vertex_elements) - for (i = 0; i < sctx->vertex_elements->count; ++i) - key->vs.instance_divisors[i] = + if (sctx->vertex_elements) { + unsigned count = MIN2(sel->info.num_inputs, + sctx->vertex_elements->count); + for (i = 0; i < count; ++i) + key->vs.prolog.instance_divisors[i] = sctx->vertex_elements->elements[i].instance_divisor; - + } if (sctx->tes_shader.cso) key->vs.as_ls = 1; else if (sctx->gs_shader.cso) @@ -650,17 +863,17 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, if (!sctx->gs_shader.cso && sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid) - key->vs.export_prim_id = 1; + key->vs.epilog.export_prim_id = 1; break; case PIPE_SHADER_TESS_CTRL: - key->tcs.prim_mode = + key->tcs.epilog.prim_mode = sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; break; case PIPE_SHADER_TESS_EVAL: if (sctx->gs_shader.cso) key->tes.as_es = 1; else if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid) - key->tes.export_prim_id = 1; + key->tes.epilog.export_prim_id = 1; break; case PIPE_SHADER_GEOMETRY: break; @@ -670,13 +883,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] && sel->info.colors_written == 0x1) - key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1; + key->ps.epilog.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1; if (blend) { /* Select the shader color format based on whether * blending or alpha are needed. */ - key->ps.spi_shader_col_format = + key->ps.epilog.spi_shader_col_format = (blend->blend_enable_4bit & blend->need_src_alpha_4bit & sctx->framebuffer.spi_shader_col_format_blend_alpha) | (blend->blend_enable_4bit & ~blend->need_src_alpha_4bit & @@ -686,26 +899,26 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, (~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit & sctx->framebuffer.spi_shader_col_format); } else - key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format; + key->ps.epilog.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format; /* If alpha-to-coverage is enabled, we have to export alpha * even if there is no color buffer. */ - if (!(key->ps.spi_shader_col_format & 0xf) && + if (!(key->ps.epilog.spi_shader_col_format & 0xf) && blend && blend->alpha_to_coverage) - key->ps.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR; + key->ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR; /* On SI and CIK except Hawaii, the CB doesn't clamp outputs * to the range supported by the type if a channel has less * than 16 bits and the export format is 16_ABGR. */ if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII) - key->ps.color_is_int8 = sctx->framebuffer.color_is_int8; + key->ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8; /* Disable unwritten outputs (if WRITE_ALL_CBUFS isn't enabled). */ - if (!key->ps.last_cbuf) { - key->ps.spi_shader_col_format &= sel->colors_written_4bit; - key->ps.color_is_int8 &= sel->info.colors_written; + if (!key->ps.epilog.last_cbuf) { + key->ps.epilog.spi_shader_col_format &= sel->colors_written_4bit; + key->ps.epilog.color_is_int8 &= sel->info.colors_written; } if (rs) { @@ -714,31 +927,32 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES_ADJACENCY; bool is_line = !is_poly && sctx->current_rast_prim != PIPE_PRIM_POINTS; - key->ps.color_two_side = rs->two_side && sel->info.colors_read; + key->ps.prolog.color_two_side = rs->two_side && sel->info.colors_read; if (sctx->queued.named.blend) { - key->ps.alpha_to_one = sctx->queued.named.blend->alpha_to_one && - rs->multisample_enable && - !sctx->framebuffer.cb0_is_integer; + key->ps.epilog.alpha_to_one = sctx->queued.named.blend->alpha_to_one && + rs->multisample_enable && + !sctx->framebuffer.cb0_is_integer; } - key->ps.poly_stipple = rs->poly_stipple_enable && is_poly; - key->ps.poly_line_smoothing = ((is_poly && rs->poly_smooth) || - (is_line && rs->line_smooth)) && - sctx->framebuffer.nr_samples <= 1; - key->ps.clamp_color = rs->clamp_fragment_color; - - key->ps.force_persample_interp = rs->force_persample_interp && - rs->multisample_enable && - sctx->framebuffer.nr_samples > 1 && - sctx->ps_iter_samples > 1 && - (sel->info.uses_persp_center || - sel->info.uses_persp_centroid || - sel->info.uses_linear_center || - sel->info.uses_linear_centroid); + key->ps.prolog.poly_stipple = rs->poly_stipple_enable && is_poly; + key->ps.epilog.poly_line_smoothing = ((is_poly && rs->poly_smooth) || + (is_line && rs->line_smooth)) && + sctx->framebuffer.nr_samples <= 1; + key->ps.epilog.clamp_color = rs->clamp_fragment_color; + + key->ps.prolog.force_persample_interp = + rs->force_persample_interp && + rs->multisample_enable && + sctx->framebuffer.nr_samples > 1 && + sctx->ps_iter_samples > 1 && + (sel->info.uses_persp_center || + sel->info.uses_persp_centroid || + sel->info.uses_linear_center || + sel->info.uses_linear_centroid); } - key->ps.alpha_func = si_get_alpha_test_func(sctx); + key->ps.epilog.alpha_func = si_get_alpha_test_func(sctx); break; } default: @@ -821,6 +1035,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx, const struct pipe_shader_state *state) { struct si_screen *sscreen = (struct si_screen *)ctx->screen; + struct si_context *sctx = (struct si_context*)ctx; struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector); int i; @@ -900,6 +1115,13 @@ static void *si_create_shader_selector(struct pipe_context *ctx, for (i = 0; i < 8; i++) if (sel->info.colors_written & (1 << i)) sel->colors_written_4bit |= 0xf << (4 * i); + + for (i = 0; i < sel->info.num_inputs; i++) { + if (sel->info.input_semantic_name[i] == TGSI_SEMANTIC_COLOR) { + int index = sel->info.input_semantic_index[i]; + sel->color_attr_index[index] = i; + } + } break; } @@ -921,6 +1143,44 @@ static void *si_create_shader_selector(struct pipe_context *ctx, break; } + /* Compile the main shader part for use with a prolog and/or epilog. */ + if (sel->type != PIPE_SHADER_GEOMETRY && + !sscreen->use_monolithic_shaders) { + struct si_shader *shader = CALLOC_STRUCT(si_shader); + void *tgsi_binary; + + if (!shader) + goto error; + + shader->selector = sel; + + tgsi_binary = si_get_tgsi_binary(sel); + + /* Try to load the shader from the shader cache. */ + pipe_mutex_lock(sscreen->shader_cache_mutex); + + if (tgsi_binary && + si_shader_cache_load_shader(sscreen, tgsi_binary, shader)) { + FREE(tgsi_binary); + } else { + /* Compile the shader if it hasn't been loaded from the cache. */ + if (si_compile_tgsi_shader(sscreen, sctx->tm, shader, false, + &sctx->b.debug) != 0) { + FREE(shader); + FREE(tgsi_binary); + pipe_mutex_unlock(sscreen->shader_cache_mutex); + goto error; + } + + if (tgsi_binary && + !si_shader_cache_insert_shader(sscreen, tgsi_binary, shader)) + FREE(tgsi_binary); + } + pipe_mutex_unlock(sscreen->shader_cache_mutex); + + sel->main_shader_part = shader; + } + /* Pre-compilation. */ if (sel->type == PIPE_SHADER_GEOMETRY || sscreen->b.debug_flags & DBG_PRECOMPILE) { @@ -934,27 +1194,29 @@ static void *si_create_shader_selector(struct pipe_context *ctx, */ switch (sel->type) { case PIPE_SHADER_TESS_CTRL: - key.tcs.prim_mode = PIPE_PRIM_TRIANGLES; + key.tcs.epilog.prim_mode = PIPE_PRIM_TRIANGLES; break; case PIPE_SHADER_FRAGMENT: - key.ps.alpha_func = PIPE_FUNC_ALWAYS; + key.ps.epilog.alpha_func = PIPE_FUNC_ALWAYS; for (i = 0; i < 8; i++) if (sel->info.colors_written & (1 << i)) - key.ps.spi_shader_col_format |= + key.ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_FP16_ABGR << (i * 4); break; } - if (si_shader_select_with_key(ctx, &state, &key)) { - fprintf(stderr, "radeonsi: can't create a shader\n"); - tgsi_free_tokens(sel->tokens); - FREE(sel); - return NULL; - } + if (si_shader_select_with_key(ctx, &state, &key)) + goto error; } pipe_mutex_init(sel->mutex); return sel; + +error: + fprintf(stderr, "radeonsi: can't create a shader\n"); + tgsi_free_tokens(sel->tokens); + FREE(sel); + return NULL; } /** @@ -1119,6 +1381,9 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state) p = c; } + if (sel->main_shader_part) + si_delete_shader(sctx, sel->main_shader_part); + pipe_mutex_destroy(sel->mutex); free(sel->tokens); free(sel); @@ -1144,14 +1409,14 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx, for (j = 0; j < vsinfo->num_outputs; j++) { if (name == vsinfo->output_semantic_name[j] && index == vsinfo->output_semantic_index[j]) { - ps_input_cntl |= S_028644_OFFSET(vs->vs_output_param_offset[j]); + ps_input_cntl |= S_028644_OFFSET(vs->info.vs_output_param_offset[j]); break; } } if (name == TGSI_SEMANTIC_PRIMID) /* PrimID is written after the last output. */ - ps_input_cntl |= S_028644_OFFSET(vs->vs_output_param_offset[vsinfo->num_outputs]); + ps_input_cntl |= S_028644_OFFSET(vs->info.vs_output_param_offset[vsinfo->num_outputs]); else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(ps_input_cntl)) { /* No corresponding output found, load defaults into input. * Don't set any other bits. @@ -1191,7 +1456,7 @@ static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom) } } - if (ps->key.ps.color_two_side) { + if (ps->key.ps.prolog.color_two_side) { unsigned bcol = TGSI_SEMANTIC_BCOLOR; for (i = 0; i < 2; i++) { @@ -1745,8 +2010,8 @@ bool si_update_shaders(struct si_context *sctx) si_mark_atom_dirty(sctx, &sctx->db_render_state); } - if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.poly_line_smoothing) { - sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.poly_line_smoothing; + if (sctx->smoothing_enabled != sctx->ps_shader.current->key.ps.epilog.poly_line_smoothing) { + sctx->smoothing_enabled = sctx->ps_shader.current->key.ps.epilog.poly_line_smoothing; si_mark_atom_dirty(sctx, &sctx->msaa_config); if (sctx->b.chip_class == SI) diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c index 80526ed4d15..fe6cf71a6e5 100644 --- a/src/gallium/drivers/svga/svga_draw.c +++ b/src/gallium/drivers/svga/svga_draw.c @@ -590,6 +590,16 @@ draw_vgpu10(struct svga_hwtnl *hwtnl, } else { /* non-indexed drawing */ + if (svga->state.hw_draw.ib_format != SVGA3D_FORMAT_INVALID) { + /* Unbind previously bound index buffer */ + ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, NULL, + SVGA3D_FORMAT_INVALID, 0); + if (ret != PIPE_OK) + return ret; + svga->state.hw_draw.ib_format = SVGA3D_FORMAT_INVALID; + svga->state.hw_draw.ib = NULL; + } + if (instance_count > 1) { ret = SVGA3D_vgpu10_DrawInstanced(svga->swc, vcount, diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index d5405f8eacf..c9abd49ec1e 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -468,12 +468,15 @@ vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader, return 16; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; @@ -527,12 +530,15 @@ vgpu9_get_shader_param(struct pipe_screen *screen, unsigned shader, return 0; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; @@ -619,12 +625,15 @@ vgpu10_get_shader_param(struct pipe_screen *screen, unsigned shader, return SVGA3D_DX_MAX_SAMPLERS; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; case PIPE_SHADER_CAP_DOUBLES: case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED: case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE: case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; diff --git a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c index 1223e446055..0c5afeb4cf9 100644 --- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c +++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c @@ -1782,7 +1782,7 @@ alloc_immediate_int4(struct svga_shader_emitter_v10 *emit, static unsigned alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index) { - const unsigned n = emit->info.num_inputs + index; + const unsigned n = emit->info.file_max[TGSI_FILE_INPUT] + 1 + index; assert(index < Elements(emit->system_value_indexes)); emit->system_value_indexes[index] = n; return n; @@ -2446,7 +2446,7 @@ emit_input_declarations(struct svga_shader_emitter_v10 *emit) else { assert(emit->unit == PIPE_SHADER_VERTEX); - for (i = 0; i < emit->info.num_inputs; i++) { + for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) { unsigned usage_mask = emit->info.input_usage_mask[i]; unsigned index = i; diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 2ce2b3aef75..57f851833e5 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -50,7 +50,8 @@ struct trace_query static inline struct trace_query * -trace_query(struct pipe_query *query) { +trace_query(struct pipe_query *query) +{ return (struct trace_query *)query; } @@ -93,7 +94,7 @@ trace_surface_unwrap(struct trace_context *tr_ctx, return NULL; assert(surface->texture); - if(!surface->texture) + if (!surface->texture) return surface; tr_surf = trace_surface(surface); @@ -105,7 +106,7 @@ trace_surface_unwrap(struct trace_context *tr_ctx, } -static inline void +static void trace_context_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info) { @@ -135,7 +136,7 @@ trace_context_draw_vbo(struct pipe_context *_pipe, } -static inline struct pipe_query * +static struct pipe_query * trace_context_create_query(struct pipe_context *_pipe, unsigned query_type, unsigned index) @@ -173,7 +174,7 @@ trace_context_create_query(struct pipe_context *_pipe, } -static inline void +static void trace_context_destroy_query(struct pipe_context *_pipe, struct pipe_query *_query) { @@ -195,7 +196,7 @@ trace_context_destroy_query(struct pipe_context *_pipe, } -static inline boolean +static boolean trace_context_begin_query(struct pipe_context *_pipe, struct pipe_query *query) { @@ -217,7 +218,7 @@ trace_context_begin_query(struct pipe_context *_pipe, } -static inline void +static void trace_context_end_query(struct pipe_context *_pipe, struct pipe_query *query) { @@ -237,7 +238,7 @@ trace_context_end_query(struct pipe_context *_pipe, } -static inline boolean +static boolean trace_context_get_query_result(struct pipe_context *_pipe, struct pipe_query *_query, boolean wait, @@ -272,7 +273,7 @@ trace_context_get_query_result(struct pipe_context *_pipe, } -static inline void * +static void * trace_context_create_blend_state(struct pipe_context *_pipe, const struct pipe_blend_state *state) { @@ -295,7 +296,7 @@ trace_context_create_blend_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_bind_blend_state(struct pipe_context *_pipe, void *state) { @@ -313,7 +314,7 @@ trace_context_bind_blend_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_delete_blend_state(struct pipe_context *_pipe, void *state) { @@ -331,7 +332,7 @@ trace_context_delete_blend_state(struct pipe_context *_pipe, } -static inline void * +static void * trace_context_create_sampler_state(struct pipe_context *_pipe, const struct pipe_sampler_state *state) { @@ -354,7 +355,7 @@ trace_context_create_sampler_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_bind_sampler_states(struct pipe_context *_pipe, unsigned shader, unsigned start, @@ -381,7 +382,7 @@ trace_context_bind_sampler_states(struct pipe_context *_pipe, } -static inline void +static void trace_context_delete_sampler_state(struct pipe_context *_pipe, void *state) { @@ -399,7 +400,7 @@ trace_context_delete_sampler_state(struct pipe_context *_pipe, } -static inline void * +static void * trace_context_create_rasterizer_state(struct pipe_context *_pipe, const struct pipe_rasterizer_state *state) { @@ -422,7 +423,7 @@ trace_context_create_rasterizer_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_bind_rasterizer_state(struct pipe_context *_pipe, void *state) { @@ -440,7 +441,7 @@ trace_context_bind_rasterizer_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_delete_rasterizer_state(struct pipe_context *_pipe, void *state) { @@ -458,7 +459,7 @@ trace_context_delete_rasterizer_state(struct pipe_context *_pipe, } -static inline void * +static void * trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe, const struct pipe_depth_stencil_alpha_state *state) { @@ -481,7 +482,7 @@ trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe, void *state) { @@ -499,7 +500,7 @@ trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, void *state) { @@ -518,7 +519,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, #define TRACE_SHADER_STATE(shader_type) \ - static inline void * \ + static void * \ trace_context_create_##shader_type##_state(struct pipe_context *_pipe, \ const struct pipe_shader_state *state) \ { \ @@ -534,7 +535,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, return result; \ } \ \ - static inline void \ + static void \ trace_context_bind_##shader_type##_state(struct pipe_context *_pipe, \ void *state) \ { \ @@ -547,7 +548,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, trace_dump_call_end(); \ } \ \ - static inline void \ + static void \ trace_context_delete_##shader_type##_state(struct pipe_context *_pipe, \ void *state) \ { \ @@ -570,6 +571,51 @@ TRACE_SHADER_STATE(tes) static inline void * +trace_context_create_compute_state(struct pipe_context *_pipe, + const struct pipe_compute_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin("pipe_context", "create_compute_state"); + trace_dump_arg(ptr, pipe); + trace_dump_arg(compute_state, state); + result = pipe->create_compute_state(pipe, state); + trace_dump_ret(ptr, result); + trace_dump_call_end(); + return result; +} + +static inline void +trace_context_bind_compute_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "bind_compute_state"); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + pipe->bind_compute_state(pipe, state); + trace_dump_call_end(); +} + +static inline void +trace_context_delete_compute_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "delete_compute_state"); + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + pipe->delete_compute_state(pipe, state); + trace_dump_call_end(); +} + +static void * trace_context_create_vertex_elements_state(struct pipe_context *_pipe, unsigned num_elements, const struct pipe_vertex_element *elements) @@ -597,7 +643,7 @@ trace_context_create_vertex_elements_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_bind_vertex_elements_state(struct pipe_context *_pipe, void *state) { @@ -615,7 +661,7 @@ trace_context_bind_vertex_elements_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_delete_vertex_elements_state(struct pipe_context *_pipe, void *state) { @@ -633,7 +679,7 @@ trace_context_delete_vertex_elements_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_blend_color(struct pipe_context *_pipe, const struct pipe_blend_color *state) { @@ -651,7 +697,7 @@ trace_context_set_blend_color(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_stencil_ref(struct pipe_context *_pipe, const struct pipe_stencil_ref *state) { @@ -669,7 +715,7 @@ trace_context_set_stencil_ref(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_clip_state(struct pipe_context *_pipe, const struct pipe_clip_state *state) { @@ -686,7 +732,7 @@ trace_context_set_clip_state(struct pipe_context *_pipe, trace_dump_call_end(); } -static inline void +static void trace_context_set_sample_mask(struct pipe_context *_pipe, unsigned sample_mask) { @@ -703,7 +749,7 @@ trace_context_set_sample_mask(struct pipe_context *_pipe, trace_dump_call_end(); } -static inline void +static void trace_context_set_constant_buffer(struct pipe_context *_pipe, uint shader, uint index, struct pipe_constant_buffer *constant_buffer) @@ -731,7 +777,7 @@ trace_context_set_constant_buffer(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_framebuffer_state(struct pipe_context *_pipe, const struct pipe_framebuffer_state *state) { @@ -743,9 +789,9 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe, /* Unwrap the input state */ memcpy(&unwrapped_state, state, sizeof(unwrapped_state)); - for(i = 0; i < state->nr_cbufs; ++i) + for (i = 0; i < state->nr_cbufs; ++i) unwrapped_state.cbufs[i] = trace_surface_unwrap(tr_ctx, state->cbufs[i]); - for(i = state->nr_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i) + for (i = state->nr_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i) unwrapped_state.cbufs[i] = NULL; unwrapped_state.zsbuf = trace_surface_unwrap(tr_ctx, state->zsbuf); state = &unwrapped_state; @@ -761,7 +807,7 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_polygon_stipple(struct pipe_context *_pipe, const struct pipe_poly_stipple *state) { @@ -779,7 +825,7 @@ trace_context_set_polygon_stipple(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_scissor_states(struct pipe_context *_pipe, unsigned start_slot, unsigned num_scissors, @@ -801,7 +847,7 @@ trace_context_set_scissor_states(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_viewport_states(struct pipe_context *_pipe, unsigned start_slot, unsigned num_viewports, @@ -825,8 +871,8 @@ trace_context_set_viewport_states(struct pipe_context *_pipe, static struct pipe_sampler_view * trace_context_create_sampler_view(struct pipe_context *_pipe, - struct pipe_resource *_resource, - const struct pipe_sampler_view *templ) + struct pipe_resource *_resource, + const struct pipe_sampler_view *templ) { struct trace_context *tr_ctx = trace_context(_pipe); struct trace_resource *tr_res = trace_resource(_resource); @@ -868,7 +914,7 @@ trace_context_create_sampler_view(struct pipe_context *_pipe, static void trace_context_sampler_view_destroy(struct pipe_context *_pipe, - struct pipe_sampler_view *_view) + struct pipe_sampler_view *_view) { struct trace_context *tr_ctx = trace_context(_pipe); struct trace_sampler_view *tr_view = trace_sampler_view(_view); @@ -910,7 +956,7 @@ trace_context_create_surface(struct pipe_context *_pipe, trace_dump_arg(ptr, pipe); trace_dump_arg(ptr, resource); - + trace_dump_arg_begin("surf_tmpl"); trace_dump_surface_template(surf_tmpl, resource->target); trace_dump_arg_end(); @@ -948,7 +994,7 @@ trace_context_surface_destroy(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_sampler_views(struct pipe_context *_pipe, unsigned shader, unsigned start, @@ -964,7 +1010,7 @@ trace_context_set_sampler_views(struct pipe_context *_pipe, /* remove this when we have pipe->set_sampler_views(..., start, ...) */ assert(start == 0); - for(i = 0; i < num; ++i) { + for (i = 0; i < num; ++i) { tr_view = trace_sampler_view(views[i]); unwrapped_views[i] = tr_view ? tr_view->sampler_view : NULL; } @@ -984,7 +1030,7 @@ trace_context_set_sampler_views(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_vertex_buffers(struct pipe_context *_pipe, unsigned start_slot, unsigned num_buffers, const struct pipe_vertex_buffer *buffers) @@ -1018,7 +1064,7 @@ trace_context_set_vertex_buffers(struct pipe_context *_pipe, } -static inline void +static void trace_context_set_index_buffer(struct pipe_context *_pipe, const struct pipe_index_buffer *ib) { @@ -1043,7 +1089,7 @@ trace_context_set_index_buffer(struct pipe_context *_pipe, } -static inline struct pipe_stream_output_target * +static struct pipe_stream_output_target * trace_context_create_stream_output_target(struct pipe_context *_pipe, struct pipe_resource *res, unsigned buffer_offset, @@ -1073,7 +1119,7 @@ trace_context_create_stream_output_target(struct pipe_context *_pipe, } -static inline void +static void trace_context_stream_output_target_destroy( struct pipe_context *_pipe, struct pipe_stream_output_target *target) @@ -1092,7 +1138,7 @@ trace_context_stream_output_target_destroy( } -static inline void +static void trace_context_set_stream_output_targets(struct pipe_context *_pipe, unsigned num_targets, struct pipe_stream_output_target **tgs, @@ -1114,7 +1160,7 @@ trace_context_set_stream_output_targets(struct pipe_context *_pipe, } -static inline void +static void trace_context_resource_copy_region(struct pipe_context *_pipe, struct pipe_resource *dst, unsigned dst_level, @@ -1149,7 +1195,7 @@ trace_context_resource_copy_region(struct pipe_context *_pipe, } -static inline void +static void trace_context_blit(struct pipe_context *_pipe, const struct pipe_blit_info *_info) { @@ -1191,7 +1237,7 @@ trace_context_flush_resource(struct pipe_context *_pipe, } -static inline void +static void trace_context_clear(struct pipe_context *_pipe, unsigned buffers, const union pipe_color_union *color, @@ -1220,7 +1266,7 @@ trace_context_clear(struct pipe_context *_pipe, } -static inline void +static void trace_context_clear_render_target(struct pipe_context *_pipe, struct pipe_surface *dst, const union pipe_color_union *color, @@ -1247,7 +1293,7 @@ trace_context_clear_render_target(struct pipe_context *_pipe, trace_dump_call_end(); } -static inline void +static void trace_context_clear_depth_stencil(struct pipe_context *_pipe, struct pipe_surface *dst, unsigned clear_flags, @@ -1306,7 +1352,7 @@ trace_context_clear_texture(struct pipe_context *_pipe, trace_dump_call_end(); } -static inline void +static void trace_context_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence, unsigned flags) @@ -1364,7 +1410,7 @@ trace_context_generate_mipmap(struct pipe_context *_pipe, } -static inline void +static void trace_context_destroy(struct pipe_context *_pipe) { struct trace_context *tr_ctx = trace_context(_pipe); @@ -1414,7 +1460,7 @@ trace_context_transfer_map(struct pipe_context *_context, *transfer = trace_transfer_create(tr_context, tr_res, result); if (map) { - if(usage & PIPE_TRANSFER_WRITE) { + if (usage & PIPE_TRANSFER_WRITE) { trace_transfer(*transfer)->map = map; } } @@ -1432,9 +1478,7 @@ trace_context_transfer_flush_region( struct pipe_context *_context, struct pipe_context *context = tr_context->pipe; struct pipe_transfer *transfer = tr_transfer->transfer; - context->transfer_flush_region(context, - transfer, - box); + context->transfer_flush_region(context, transfer, box); } static void @@ -1446,7 +1490,7 @@ trace_context_transfer_unmap(struct pipe_context *_context, struct pipe_context *context = tr_ctx->pipe; struct pipe_transfer *transfer = tr_trans->transfer; - if(tr_trans->map) { + if (tr_trans->map) { /* * Fake a transfer_inline_write */ @@ -1525,15 +1569,16 @@ trace_context_transfer_inline_write(struct pipe_context *_context, trace_dump_call_end(); - context->transfer_inline_write(context, resource, - level, usage, box, data, stride, layer_stride); + context->transfer_inline_write(context, resource, level, usage, box, + data, stride, layer_stride); } -static void trace_context_render_condition(struct pipe_context *_context, - struct pipe_query *query, - boolean condition, - uint mode) +static void +trace_context_render_condition(struct pipe_context *_context, + struct pipe_query *query, + boolean condition, + uint mode) { struct trace_context *tr_context = trace_context(_context); struct pipe_context *context = tr_context->pipe; @@ -1553,7 +1598,8 @@ static void trace_context_render_condition(struct pipe_context *_context, } -static void trace_context_texture_barrier(struct pipe_context *_context) +static void +trace_context_texture_barrier(struct pipe_context *_context) { struct trace_context *tr_context = trace_context(_context); struct pipe_context *context = tr_context->pipe; @@ -1568,8 +1614,9 @@ static void trace_context_texture_barrier(struct pipe_context *_context) } -static void trace_context_memory_barrier(struct pipe_context *_context, - unsigned flags) +static void +trace_context_memory_barrier(struct pipe_context *_context, + unsigned flags) { struct trace_context *tr_context = trace_context(_context); struct pipe_context *context = tr_context->pipe; @@ -1583,9 +1630,10 @@ static void trace_context_memory_barrier(struct pipe_context *_context, } -static void trace_context_set_tess_state(struct pipe_context *_context, - const float default_outer_level[4], - const float default_inner_level[2]) +static void +trace_context_set_tess_state(struct pipe_context *_context, + const float default_outer_level[4], + const float default_inner_level[2]) { struct trace_context *tr_context = trace_context(_context); struct pipe_context *context = tr_context->pipe; @@ -1638,12 +1686,31 @@ static void trace_context_set_shader_buffers(struct pipe_context *_context, FREE(_buffers); } +static void trace_context_launch_grid(struct pipe_context *_pipe, + const struct pipe_grid_info *info) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "launch_grid"); -static const struct debug_named_value rbug_blocker_flags[] = { - {"before", 1, NULL}, - {"after", 2, NULL}, - DEBUG_NAMED_VALUE_END -}; + trace_dump_arg(ptr, pipe); + trace_dump_arg(grid_info, info); + + trace_dump_trace_flush(); + + if (info->indirect) { + struct pipe_grid_info _info; + + memcpy(&_info, info, sizeof(_info)); + _info.indirect = trace_resource_unwrap(tr_ctx, _info.indirect); + pipe->launch_grid(pipe, &_info); + } else { + pipe->launch_grid(pipe, info); + } + + trace_dump_call_end(); +} struct pipe_context * trace_context_create(struct trace_screen *tr_scr, @@ -1654,7 +1721,7 @@ trace_context_create(struct trace_screen *tr_scr, if (!pipe) goto error1; - if(!trace_enabled()) + if (!trace_enabled()) goto error1; tr_ctx = CALLOC_STRUCT(trace_context); @@ -1703,6 +1770,9 @@ trace_context_create(struct trace_screen *tr_scr, TR_CTX_INIT(create_tes_state); TR_CTX_INIT(bind_tes_state); TR_CTX_INIT(delete_tes_state); + TR_CTX_INIT(create_compute_state); + TR_CTX_INIT(bind_compute_state); + TR_CTX_INIT(delete_compute_state); TR_CTX_INIT(create_vertex_elements_state); TR_CTX_INIT(bind_vertex_elements_state); TR_CTX_INIT(delete_vertex_elements_state); @@ -1738,6 +1808,7 @@ trace_context_create(struct trace_screen *tr_scr, TR_CTX_INIT(memory_barrier); TR_CTX_INIT(set_tess_state); TR_CTX_INIT(set_shader_buffers); + TR_CTX_INIT(launch_grid); TR_CTX_INIT(transfer_map); TR_CTX_INIT(transfer_unmap); @@ -1756,7 +1827,7 @@ error1: /** - * Sanity checker: check that the given context really is a + * Sanity checker: check that the given context really is a * trace context (and not the wrapped driver's context). */ void @@ -1765,4 +1836,3 @@ trace_context_check(const struct pipe_context *pipe) struct trace_context *tr_ctx = (struct trace_context *) pipe; assert(tr_ctx->base.destroy == trace_context_destroy); } - diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index cfbf53cf767..0627e5ab5d7 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -305,6 +305,36 @@ void trace_dump_shader_state(const struct pipe_shader_state *state) } +void trace_dump_compute_state(const struct pipe_compute_state *state) +{ + if (!trace_dumping_enabled_locked()) + return; + + if (!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_compute_state"); + + trace_dump_member_begin("prog"); + if (state->prog) { + static char str[64 * 1024]; + tgsi_dump_str(state->prog, 0, str, sizeof(str)); + trace_dump_string(str); + } else { + trace_dump_null(); + } + trace_dump_member_end(); + + trace_dump_member(uint, state, req_local_mem); + trace_dump_member(uint, state, req_private_mem); + trace_dump_member(uint, state, req_input_mem); + + trace_dump_struct_end(); +} + + void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state) { unsigned i; @@ -864,3 +894,33 @@ trace_dump_query_result(unsigned query_type, break; } } + +void trace_dump_grid_info(const struct pipe_grid_info *state) +{ + if (!trace_dumping_enabled_locked()) + return; + + if (!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_grid_info"); + + trace_dump_member(uint, state, pc); + trace_dump_member(ptr, state, input); + + trace_dump_member_begin("block"); + trace_dump_array(uint, state->block, Elements(state->block)); + trace_dump_member_end(); + + trace_dump_member_begin("grid"); + trace_dump_array(uint, state->grid, Elements(state->grid)); + trace_dump_member_end(); + + trace_dump_member(ptr, state, indirect); + trace_dump_member(uint, state, indirect_offset); + + trace_dump_struct_end(); +} + diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index 4f4ade155bc..ee0720d8ac8 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -50,6 +50,8 @@ void trace_dump_token(const struct tgsi_token *token); void trace_dump_shader_state(const struct pipe_shader_state *state); +void trace_dump_compute_state(const struct pipe_compute_state *state); + void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state); void trace_dump_blend_state(const struct pipe_blend_state *state); @@ -87,4 +89,6 @@ void trace_dump_blit_info(const struct pipe_blit_info *); void trace_dump_query_result(unsigned query_type, const union pipe_query_result *result); +void trace_dump_grid_info(const struct pipe_grid_info *state); + #endif /* TR_STATE_H */ diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index ff057e2a4a4..0612109c800 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -173,6 +173,30 @@ trace_screen_get_paramf(struct pipe_screen *_screen, } +static int +trace_screen_get_compute_param(struct pipe_screen *_screen, + enum pipe_compute_cap param, void *data) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + int result; + + trace_dump_call_begin("pipe_screen", "get_compute_param"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(int, param); + trace_dump_arg(ptr, data); + + result = screen->get_compute_param(screen, param, data); + + trace_dump_ret(int, result); + + trace_dump_call_end(); + + return result; +} + + static boolean trace_screen_is_format_supported(struct pipe_screen *_screen, enum pipe_format format, @@ -472,6 +496,7 @@ trace_screen_create(struct pipe_screen *screen) tr_scr->base.get_param = trace_screen_get_param; tr_scr->base.get_shader_param = trace_screen_get_shader_param; tr_scr->base.get_paramf = trace_screen_get_paramf; + tr_scr->base.get_compute_param = trace_screen_get_compute_param; tr_scr->base.is_format_supported = trace_screen_is_format_supported; assert(screen->context_create); tr_scr->base.context_create = trace_screen_context_create; diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index efbb69b71a7..f9eb0e151c5 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -193,6 +193,7 @@ qir_is_raw_mov(struct qinst *inst) return ((inst->op == QOP_MOV || inst->op == QOP_FMOV || inst->op == QOP_MMOV) && + inst->cond == QPU_COND_ALWAYS && !inst->dst.pack && !inst->src[0].pack); } diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c index 2f280c54523..ee1e9aafbb9 100644 --- a/src/gallium/drivers/vc4/vc4_qir_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c @@ -575,7 +575,7 @@ void qir_schedule_instructions(struct vc4_compile *c) { void *mem_ctx = ralloc_context(NULL); - struct schedule_state state = { 0 }; + struct schedule_state state = { { 0 } }; if (debug) { fprintf(stderr, "Pre-schedule instructions\n"); diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index b06702afea2..450b97fc014 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -312,7 +312,7 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) break; } - bool handled_qinst_cond = true; + bool handled_qinst_cond = false; switch (qinst->op) { case QOP_RCP: diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index b19d31af6ac..a4b3efcfda3 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -357,9 +357,12 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return VC4_MAX_TEXTURE_SAMPLERS; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: + return 0; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: return 32; case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS: + case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return 0; default: fprintf(stderr, "unknown shader param %d\n", param); |