diff options
author | Jason Ekstrand <[email protected]> | 2016-04-07 16:56:34 -0700 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2016-04-07 16:56:34 -0700 |
commit | e26a978773ba8fbff04cd2ab3342fcb02e90c06e (patch) | |
tree | 303f7d5e47f2b11ad2edf7ed0f82e90620646df9 /src/gallium | |
parent | 15895bf777bd5f68a197506fdeaced28aa440622 (diff) | |
parent | 1cd19ebc4a892ada69f9085892441c00674b2764 (diff) |
Merge remote-tracking branch 'public/master' into vulkan
Diffstat (limited to 'src/gallium')
54 files changed, 1100 insertions, 744 deletions
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index cd9ee5434d3..a5f07236e83 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -163,7 +163,7 @@ aa_transform_decl(struct tgsi_transform_context *ctx, uint i; for (i = decl->Range.First; i <= decl->Range.Last; i++) { - aactx->samplersUsed |= 1 << i; + aactx->samplersUsed |= 1u << i; } } else if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { @@ -208,9 +208,11 @@ aa_transform_prolog(struct tgsi_transform_context *ctx) struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; uint i; + STATIC_ASSERT(sizeof(aactx->samplersUsed) * 8 >= PIPE_MAX_SAMPLERS); + /* find free sampler */ aactx->freeSampler = free_bit(aactx->samplersUsed); - if (aactx->freeSampler >= PIPE_MAX_SAMPLERS) + if (aactx->freeSampler < 0 || aactx->freeSampler >= PIPE_MAX_SAMPLERS) aactx->freeSampler = PIPE_MAX_SAMPLERS - 1; /* find two free temp regs */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index d90fb1d68df..c5ef16810a2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -464,7 +464,8 @@ scan_declaration(struct tgsi_shader_info *info, } } } else if (file == TGSI_FILE_SAMPLER) { - info->samplers_declared |= 1 << reg; + STATIC_ASSERT(sizeof(info->samplers_declared) * 8 >= PIPE_MAX_SAMPLERS); + info->samplers_declared |= 1u << reg; } else if (file == TGSI_FILE_SAMPLER_VIEW) { unsigned target = fulldecl->SamplerView.Resource; assert(target < TGSI_TEXTURE_UNKNOWN); diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c index a73a1de2f0b..b1f3982fb4e 100644 --- a/src/gallium/auxiliary/util/u_dump_state.c +++ b/src/gallium/auxiliary/util/u_dump_state.c @@ -645,6 +645,8 @@ util_dump_framebuffer_state(FILE *stream, const struct pipe_framebuffer_state *s util_dump_member(stream, uint, state, width); util_dump_member(stream, uint, state, height); + util_dump_member(stream, uint, state, samples); + util_dump_member(stream, uint, state, layers); util_dump_member(stream, uint, state, nr_cbufs); util_dump_member_array(stream, ptr, state, cbufs); util_dump_member(stream, ptr, state, zsbuf); diff --git a/src/gallium/auxiliary/util/u_framebuffer.c b/src/gallium/auxiliary/util/u_framebuffer.c index 49b391d8162..f9b804673dc 100644 --- a/src/gallium/auxiliary/util/u_framebuffer.c +++ b/src/gallium/auxiliary/util/u_framebuffer.c @@ -55,6 +55,10 @@ util_framebuffer_state_equal(const struct pipe_framebuffer_state *dst, dst->height != src->height) return FALSE; + if (dst->samples != src->samples || + dst->layers != src->layers) + return FALSE; + if (dst->nr_cbufs != src->nr_cbufs) { return FALSE; } @@ -85,6 +89,9 @@ util_copy_framebuffer_state(struct pipe_framebuffer_state *dst, dst->width = src->width; dst->height = src->height; + dst->samples = src->samples; + dst->layers = src->layers; + for (i = 0; i < src->nr_cbufs; i++) pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); @@ -109,6 +116,7 @@ util_unreference_framebuffer_state(struct pipe_framebuffer_state *fb) pipe_surface_reference(&fb->zsbuf, NULL); + fb->samples = fb->layers = 0; fb->width = fb->height = 0; fb->nr_cbufs = 0; } @@ -160,6 +168,14 @@ util_framebuffer_get_num_layers(const struct pipe_framebuffer_state *fb) { unsigned i, num_layers = 0; + /** + * In the case of ARB_framebuffer_no_attachment + * we obtain the number of layers directly from + * the framebuffer state. + */ + if (!(fb->nr_cbufs || fb->zsbuf)) + return fb->layers; + for (i = 0; i < fb->nr_cbufs; i++) { if (fb->cbufs[i]) { unsigned num = fb->cbufs[i]->u.tex.last_layer - @@ -184,6 +200,20 @@ util_framebuffer_get_num_samples(const struct pipe_framebuffer_state *fb) { unsigned i; + /** + * In the case of ARB_framebuffer_no_attachment + * we obtain the number of samples directly from + * the framebuffer state. + * + * NOTE: fb->samples may wind up as zero due to memset()'s on internal + * driver structures on their initialization and so we take the + * MAX here to ensure we have a valid number of samples. However, + * if samples is legitimately not getting set somewhere + * multi-sampling will evidently break. + */ + if (!(fb->nr_cbufs || fb->zsbuf)) + return MAX2(fb->samples, 1); + for (i = 0; i < fb->nr_cbufs; i++) { if (fb->cbufs[i]) { return MAX2(1, fb->cbufs[i]->texture->nr_samples); diff --git a/src/gallium/auxiliary/util/u_pstipple.c b/src/gallium/auxiliary/util/u_pstipple.c index bcbe2a25b25..3ae8923f953 100644 --- a/src/gallium/auxiliary/util/u_pstipple.c +++ b/src/gallium/auxiliary/util/u_pstipple.c @@ -204,7 +204,7 @@ pstip_transform_decl(struct tgsi_transform_context *ctx, if (decl->Declaration.File == TGSI_FILE_SAMPLER) { uint i; for (i = decl->Range.First; i <= decl->Range.Last; i++) { - pctx->samplersUsed |= 1 << i; + pctx->samplersUsed |= 1u << i; } } else if (decl->Declaration.File == pctx->wincoordFile) { @@ -266,9 +266,11 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx) int texTemp; int sampIdx; + STATIC_ASSERT(sizeof(pctx->samplersUsed) * 8 >= PIPE_MAX_SAMPLERS); + /* find free texture sampler */ pctx->freeSampler = free_bit(pctx->samplersUsed); - if (pctx->freeSampler >= PIPE_MAX_SAMPLERS) + if (pctx->freeSampler < 0 || pctx->freeSampler >= PIPE_MAX_SAMPLERS) pctx->freeSampler = PIPE_MAX_SAMPLERS - 1; if (pctx->wincoordInput < 0) diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 47a19de6ea9..824f580ed44 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -323,6 +323,14 @@ The integer capabilities: * ``PIPE_CAP_PCI_BUS``: Return the PCI bus number. * ``PIPE_CAP_PCI_DEVICE``: Return the PCI device number. * ``PIPE_CAP_PCI_FUNCTION``: Return the PCI function number. +* ``PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT``: + If non-zero, rendering to framebuffers with no surface attachments + is supported. The context->is_format_supported function will be expected + to be implemented with PIPE_FORMAT_NONE yeilding the MSAA modes the hardware + supports. N.B., The maximum number of layers supported for rasterizing a + primitive on a layer is obtained from ``PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS`` + even though it can be larger than the number of layers supported by either + rendering or textures. .. _pipe_capf: diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index d47cb07f10b..707be17513b 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -255,6 +255,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; case PIPE_CAP_MAX_VIEWPORTS: diff --git a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c index 599872470fc..e29d1568256 100644 --- a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c +++ b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c @@ -243,7 +243,7 @@ static void print_instr_cat2(instr_t *instr) "?6?", }; - switch (cat2->opc) { + switch (_OPC(2, cat2->opc)) { case OPC_CMPS_F: case OPC_CMPS_U: case OPC_CMPS_S: @@ -274,7 +274,7 @@ static void print_instr_cat2(instr_t *instr) cat2->src1_abs, false); } - switch (cat2->opc) { + switch (_OPC(2, cat2->opc)) { case OPC_ABSNEG_F: case OPC_ABSNEG_S: case OPC_CLZ_B: @@ -382,34 +382,34 @@ static void print_instr_cat5(instr_t *instr) static const struct { bool src1, src2, samp, tex; } info[0x1f] = { - [OPC_ISAM] = { true, false, true, true, }, - [OPC_ISAML] = { true, true, true, true, }, - [OPC_ISAMM] = { true, false, true, true, }, - [OPC_SAM] = { true, false, true, true, }, - [OPC_SAMB] = { true, true, true, true, }, - [OPC_SAML] = { true, true, true, true, }, - [OPC_SAMGQ] = { true, false, true, true, }, - [OPC_GETLOD] = { true, false, true, true, }, - [OPC_CONV] = { true, true, true, true, }, - [OPC_CONVM] = { true, true, true, true, }, - [OPC_GETSIZE] = { true, false, false, true, }, - [OPC_GETBUF] = { false, false, false, true, }, - [OPC_GETPOS] = { true, false, false, true, }, - [OPC_GETINFO] = { false, false, false, true, }, - [OPC_DSX] = { true, false, false, false, }, - [OPC_DSY] = { true, false, false, false, }, - [OPC_GATHER4R] = { true, false, true, true, }, - [OPC_GATHER4G] = { true, false, true, true, }, - [OPC_GATHER4B] = { true, false, true, true, }, - [OPC_GATHER4A] = { true, false, true, true, }, - [OPC_SAMGP0] = { true, false, true, true, }, - [OPC_SAMGP1] = { true, false, true, true, }, - [OPC_SAMGP2] = { true, false, true, true, }, - [OPC_SAMGP3] = { true, false, true, true, }, - [OPC_DSXPP_1] = { true, false, false, false, }, - [OPC_DSYPP_1] = { true, false, false, false, }, - [OPC_RGETPOS] = { false, false, false, false, }, - [OPC_RGETINFO] = { false, false, false, false, }, + [opc_op(OPC_ISAM)] = { true, false, true, true, }, + [opc_op(OPC_ISAML)] = { true, true, true, true, }, + [opc_op(OPC_ISAMM)] = { true, false, true, true, }, + [opc_op(OPC_SAM)] = { true, false, true, true, }, + [opc_op(OPC_SAMB)] = { true, true, true, true, }, + [opc_op(OPC_SAML)] = { true, true, true, true, }, + [opc_op(OPC_SAMGQ)] = { true, false, true, true, }, + [opc_op(OPC_GETLOD)] = { true, false, true, true, }, + [opc_op(OPC_CONV)] = { true, true, true, true, }, + [opc_op(OPC_CONVM)] = { true, true, true, true, }, + [opc_op(OPC_GETSIZE)] = { true, false, false, true, }, + [opc_op(OPC_GETBUF)] = { false, false, false, true, }, + [opc_op(OPC_GETPOS)] = { true, false, false, true, }, + [opc_op(OPC_GETINFO)] = { false, false, false, true, }, + [opc_op(OPC_DSX)] = { true, false, false, false, }, + [opc_op(OPC_DSY)] = { true, false, false, false, }, + [opc_op(OPC_GATHER4R)] = { true, false, true, true, }, + [opc_op(OPC_GATHER4G)] = { true, false, true, true, }, + [opc_op(OPC_GATHER4B)] = { true, false, true, true, }, + [opc_op(OPC_GATHER4A)] = { true, false, true, true, }, + [opc_op(OPC_SAMGP0)] = { true, false, true, true, }, + [opc_op(OPC_SAMGP1)] = { true, false, true, true, }, + [opc_op(OPC_SAMGP2)] = { true, false, true, true, }, + [opc_op(OPC_SAMGP3)] = { true, false, true, true, }, + [opc_op(OPC_DSXPP_1)] = { true, false, false, false, }, + [opc_op(OPC_DSYPP_1)] = { true, false, false, false, }, + [opc_op(OPC_RGETPOS)] = { false, false, false, false, }, + [opc_op(OPC_RGETINFO)] = { false, false, false, false, }, }; instr_cat5_t *cat5 = &instr->cat5; int i; @@ -423,7 +423,7 @@ static void print_instr_cat5(instr_t *instr) printf(" "); - switch (cat5->opc) { + switch (_OPC(5, cat5->opc)) { case OPC_DSXPP_1: case OPC_DSYPP_1: break; @@ -488,7 +488,7 @@ static void print_instr_cat6(instr_t *instr) memset(&src1, 0, sizeof(src1)); memset(&src2, 0, sizeof(src2)); - switch (cat6->opc) { + switch (_OPC(6, cat6->opc)) { case OPC_RESINFO: case OPC_RESFMT: dst.full = type_size(cat6->type) == 32; @@ -519,7 +519,7 @@ static void print_instr_cat6(instr_t *instr) break; } - switch (cat6->opc) { + switch (_OPC(6, cat6->opc)) { case OPC_PREFETCH: case OPC_RESINFO: break; @@ -545,7 +545,7 @@ static void print_instr_cat6(instr_t *instr) } printf(" "); - switch (cat6->opc) { + switch (_OPC(6, cat6->opc)) { case OPC_STG: sd = 'g'; break; @@ -636,7 +636,7 @@ static void print_instr_cat6(instr_t *instr) if (ss) printf("]"); - switch (cat6->opc) { + switch (_OPC(6, cat6->opc)) { case OPC_RESINFO: case OPC_RESFMT: break; @@ -656,7 +656,7 @@ static const struct opc_info { const char *name; void (*print)(instr_t *instr); } opcs[1 << (3+NOPC_BITS)] = { -#define OPC(cat, opc, name) [((cat) << NOPC_BITS) | (opc)] = { (cat), (opc), #name, print_instr_cat##cat } +#define OPC(cat, opc, name) [(opc)] = { (cat), (opc), #name, print_instr_cat##cat } /* category 0: */ OPC(0, OPC_NOP, nop), OPC(0, OPC_BR, br), @@ -672,7 +672,7 @@ static const struct opc_info { OPC(0, OPC_FLOW_REV, flow_rev), /* category 1: */ - OPC(1, 0, ), + OPC(1, OPC_MOV, ), /* category 2: */ OPC(2, OPC_ADD_F, add.f), @@ -822,8 +822,8 @@ static const struct opc_info { #include "ir3.h" const char *ir3_instr_name(struct ir3_instruction *instr) { - if (instr->category == -1) return "??meta??"; - return opcs[(instr->category << NOPC_BITS) | instr->opc].name; + if (opc_cat(instr->opc) == -1) return "??meta??"; + return opcs[instr->opc].name; } static void print_instr(uint32_t *dwords, int level, int n) diff --git a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h index 1b1f1f0a797..87083fd1e81 100644 --- a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h +++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h @@ -29,181 +29,189 @@ #include <stdint.h> #include <assert.h> +/* size of largest OPC field of all the instruction categories: */ +#define NOPC_BITS 6 + +#define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc) + typedef enum { /* category 0: */ - OPC_NOP = 0, - OPC_BR = 1, - OPC_JUMP = 2, - OPC_CALL = 3, - OPC_RET = 4, - OPC_KILL = 5, - OPC_END = 6, - OPC_EMIT = 7, - OPC_CUT = 8, - OPC_CHMASK = 9, - OPC_CHSH = 10, - OPC_FLOW_REV = 11, + OPC_NOP = _OPC(0, 0), + OPC_BR = _OPC(0, 1), + OPC_JUMP = _OPC(0, 2), + OPC_CALL = _OPC(0, 3), + OPC_RET = _OPC(0, 4), + OPC_KILL = _OPC(0, 5), + OPC_END = _OPC(0, 6), + OPC_EMIT = _OPC(0, 7), + OPC_CUT = _OPC(0, 8), + OPC_CHMASK = _OPC(0, 9), + OPC_CHSH = _OPC(0, 10), + OPC_FLOW_REV = _OPC(0, 11), /* category 1: */ - /* no opc.. all category 1 are variants of mov */ + OPC_MOV = _OPC(1, 0), /* category 2: */ - OPC_ADD_F = 0, - OPC_MIN_F = 1, - OPC_MAX_F = 2, - OPC_MUL_F = 3, - OPC_SIGN_F = 4, - OPC_CMPS_F = 5, - OPC_ABSNEG_F = 6, - OPC_CMPV_F = 7, + OPC_ADD_F = _OPC(2, 0), + OPC_MIN_F = _OPC(2, 1), + OPC_MAX_F = _OPC(2, 2), + OPC_MUL_F = _OPC(2, 3), + OPC_SIGN_F = _OPC(2, 4), + OPC_CMPS_F = _OPC(2, 5), + OPC_ABSNEG_F = _OPC(2, 6), + OPC_CMPV_F = _OPC(2, 7), /* 8 - invalid */ - OPC_FLOOR_F = 9, - OPC_CEIL_F = 10, - OPC_RNDNE_F = 11, - OPC_RNDAZ_F = 12, - OPC_TRUNC_F = 13, + OPC_FLOOR_F = _OPC(2, 9), + OPC_CEIL_F = _OPC(2, 10), + OPC_RNDNE_F = _OPC(2, 11), + OPC_RNDAZ_F = _OPC(2, 12), + OPC_TRUNC_F = _OPC(2, 13), /* 14-15 - invalid */ - OPC_ADD_U = 16, - OPC_ADD_S = 17, - OPC_SUB_U = 18, - OPC_SUB_S = 19, - OPC_CMPS_U = 20, - OPC_CMPS_S = 21, - OPC_MIN_U = 22, - OPC_MIN_S = 23, - OPC_MAX_U = 24, - OPC_MAX_S = 25, - OPC_ABSNEG_S = 26, + OPC_ADD_U = _OPC(2, 16), + OPC_ADD_S = _OPC(2, 17), + OPC_SUB_U = _OPC(2, 18), + OPC_SUB_S = _OPC(2, 19), + OPC_CMPS_U = _OPC(2, 20), + OPC_CMPS_S = _OPC(2, 21), + OPC_MIN_U = _OPC(2, 22), + OPC_MIN_S = _OPC(2, 23), + OPC_MAX_U = _OPC(2, 24), + OPC_MAX_S = _OPC(2, 25), + OPC_ABSNEG_S = _OPC(2, 26), /* 27 - invalid */ - OPC_AND_B = 28, - OPC_OR_B = 29, - OPC_NOT_B = 30, - OPC_XOR_B = 31, + OPC_AND_B = _OPC(2, 28), + OPC_OR_B = _OPC(2, 29), + OPC_NOT_B = _OPC(2, 30), + OPC_XOR_B = _OPC(2, 31), /* 32 - invalid */ - OPC_CMPV_U = 33, - OPC_CMPV_S = 34, + OPC_CMPV_U = _OPC(2, 33), + OPC_CMPV_S = _OPC(2, 34), /* 35-47 - invalid */ - OPC_MUL_U = 48, - OPC_MUL_S = 49, - OPC_MULL_U = 50, - OPC_BFREV_B = 51, - OPC_CLZ_S = 52, - OPC_CLZ_B = 53, - OPC_SHL_B = 54, - OPC_SHR_B = 55, - OPC_ASHR_B = 56, - OPC_BARY_F = 57, - OPC_MGEN_B = 58, - OPC_GETBIT_B = 59, - OPC_SETRM = 60, - OPC_CBITS_B = 61, - OPC_SHB = 62, - OPC_MSAD = 63, + OPC_MUL_U = _OPC(2, 48), + OPC_MUL_S = _OPC(2, 49), + OPC_MULL_U = _OPC(2, 50), + OPC_BFREV_B = _OPC(2, 51), + OPC_CLZ_S = _OPC(2, 52), + OPC_CLZ_B = _OPC(2, 53), + OPC_SHL_B = _OPC(2, 54), + OPC_SHR_B = _OPC(2, 55), + OPC_ASHR_B = _OPC(2, 56), + OPC_BARY_F = _OPC(2, 57), + OPC_MGEN_B = _OPC(2, 58), + OPC_GETBIT_B = _OPC(2, 59), + OPC_SETRM = _OPC(2, 60), + OPC_CBITS_B = _OPC(2, 61), + OPC_SHB = _OPC(2, 62), + OPC_MSAD = _OPC(2, 63), /* category 3: */ - OPC_MAD_U16 = 0, - OPC_MADSH_U16 = 1, - OPC_MAD_S16 = 2, - OPC_MADSH_M16 = 3, /* should this be .s16? */ - OPC_MAD_U24 = 4, - OPC_MAD_S24 = 5, - OPC_MAD_F16 = 6, - OPC_MAD_F32 = 7, - OPC_SEL_B16 = 8, - OPC_SEL_B32 = 9, - OPC_SEL_S16 = 10, - OPC_SEL_S32 = 11, - OPC_SEL_F16 = 12, - OPC_SEL_F32 = 13, - OPC_SAD_S16 = 14, - OPC_SAD_S32 = 15, + OPC_MAD_U16 = _OPC(3, 0), + OPC_MADSH_U16 = _OPC(3, 1), + OPC_MAD_S16 = _OPC(3, 2), + OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */ + OPC_MAD_U24 = _OPC(3, 4), + OPC_MAD_S24 = _OPC(3, 5), + OPC_MAD_F16 = _OPC(3, 6), + OPC_MAD_F32 = _OPC(3, 7), + OPC_SEL_B16 = _OPC(3, 8), + OPC_SEL_B32 = _OPC(3, 9), + OPC_SEL_S16 = _OPC(3, 10), + OPC_SEL_S32 = _OPC(3, 11), + OPC_SEL_F16 = _OPC(3, 12), + OPC_SEL_F32 = _OPC(3, 13), + OPC_SAD_S16 = _OPC(3, 14), + OPC_SAD_S32 = _OPC(3, 15), /* category 4: */ - OPC_RCP = 0, - OPC_RSQ = 1, - OPC_LOG2 = 2, - OPC_EXP2 = 3, - OPC_SIN = 4, - OPC_COS = 5, - OPC_SQRT = 6, + OPC_RCP = _OPC(4, 0), + OPC_RSQ = _OPC(4, 1), + OPC_LOG2 = _OPC(4, 2), + OPC_EXP2 = _OPC(4, 3), + OPC_SIN = _OPC(4, 4), + OPC_COS = _OPC(4, 5), + OPC_SQRT = _OPC(4, 6), // 7-63 - invalid /* category 5: */ - OPC_ISAM = 0, - OPC_ISAML = 1, - OPC_ISAMM = 2, - OPC_SAM = 3, - OPC_SAMB = 4, - OPC_SAML = 5, - OPC_SAMGQ = 6, - OPC_GETLOD = 7, - OPC_CONV = 8, - OPC_CONVM = 9, - OPC_GETSIZE = 10, - OPC_GETBUF = 11, - OPC_GETPOS = 12, - OPC_GETINFO = 13, - OPC_DSX = 14, - OPC_DSY = 15, - OPC_GATHER4R = 16, - OPC_GATHER4G = 17, - OPC_GATHER4B = 18, - OPC_GATHER4A = 19, - OPC_SAMGP0 = 20, - OPC_SAMGP1 = 21, - OPC_SAMGP2 = 22, - OPC_SAMGP3 = 23, - OPC_DSXPP_1 = 24, - OPC_DSYPP_1 = 25, - OPC_RGETPOS = 26, - OPC_RGETINFO = 27, + OPC_ISAM = _OPC(5, 0), + OPC_ISAML = _OPC(5, 1), + OPC_ISAMM = _OPC(5, 2), + OPC_SAM = _OPC(5, 3), + OPC_SAMB = _OPC(5, 4), + OPC_SAML = _OPC(5, 5), + OPC_SAMGQ = _OPC(5, 6), + OPC_GETLOD = _OPC(5, 7), + OPC_CONV = _OPC(5, 8), + OPC_CONVM = _OPC(5, 9), + OPC_GETSIZE = _OPC(5, 10), + OPC_GETBUF = _OPC(5, 11), + OPC_GETPOS = _OPC(5, 12), + OPC_GETINFO = _OPC(5, 13), + OPC_DSX = _OPC(5, 14), + OPC_DSY = _OPC(5, 15), + OPC_GATHER4R = _OPC(5, 16), + OPC_GATHER4G = _OPC(5, 17), + OPC_GATHER4B = _OPC(5, 18), + OPC_GATHER4A = _OPC(5, 19), + OPC_SAMGP0 = _OPC(5, 20), + OPC_SAMGP1 = _OPC(5, 21), + OPC_SAMGP2 = _OPC(5, 22), + OPC_SAMGP3 = _OPC(5, 23), + OPC_DSXPP_1 = _OPC(5, 24), + OPC_DSYPP_1 = _OPC(5, 25), + OPC_RGETPOS = _OPC(5, 26), + OPC_RGETINFO = _OPC(5, 27), /* category 6: */ - OPC_LDG = 0, /* load-global */ - OPC_LDL = 1, - OPC_LDP = 2, - OPC_STG = 3, /* store-global */ - OPC_STL = 4, - OPC_STP = 5, - OPC_STI = 6, - OPC_G2L = 7, - OPC_L2G = 8, - OPC_PREFETCH = 9, - OPC_LDLW = 10, - OPC_STLW = 11, - OPC_RESFMT = 14, - OPC_RESINFO = 15, - OPC_ATOMIC_ADD = 16, - OPC_ATOMIC_SUB = 17, - OPC_ATOMIC_XCHG = 18, - OPC_ATOMIC_INC = 19, - OPC_ATOMIC_DEC = 20, - OPC_ATOMIC_CMPXCHG = 21, - OPC_ATOMIC_MIN = 22, - OPC_ATOMIC_MAX = 23, - OPC_ATOMIC_AND = 24, - OPC_ATOMIC_OR = 25, - OPC_ATOMIC_XOR = 26, - OPC_LDGB_TYPED_4D = 27, - OPC_STGB_4D_4 = 28, - OPC_STIB = 29, - OPC_LDC_4 = 30, - OPC_LDLV = 31, + OPC_LDG = _OPC(6, 0), /* load-global */ + OPC_LDL = _OPC(6, 1), + OPC_LDP = _OPC(6, 2), + OPC_STG = _OPC(6, 3), /* store-global */ + OPC_STL = _OPC(6, 4), + OPC_STP = _OPC(6, 5), + OPC_STI = _OPC(6, 6), + OPC_G2L = _OPC(6, 7), + OPC_L2G = _OPC(6, 8), + OPC_PREFETCH = _OPC(6, 9), + OPC_LDLW = _OPC(6, 10), + OPC_STLW = _OPC(6, 11), + OPC_RESFMT = _OPC(6, 14), + OPC_RESINFO = _OPC(6, 15), + OPC_ATOMIC_ADD = _OPC(6, 16), + OPC_ATOMIC_SUB = _OPC(6, 17), + OPC_ATOMIC_XCHG = _OPC(6, 18), + OPC_ATOMIC_INC = _OPC(6, 19), + OPC_ATOMIC_DEC = _OPC(6, 20), + OPC_ATOMIC_CMPXCHG = _OPC(6, 21), + OPC_ATOMIC_MIN = _OPC(6, 22), + OPC_ATOMIC_MAX = _OPC(6, 23), + OPC_ATOMIC_AND = _OPC(6, 24), + OPC_ATOMIC_OR = _OPC(6, 25), + OPC_ATOMIC_XOR = _OPC(6, 26), + OPC_LDGB_TYPED_4D = _OPC(6, 27), + OPC_STGB_4D_4 = _OPC(6, 28), + OPC_STIB = _OPC(6, 29), + OPC_LDC_4 = _OPC(6, 30), + OPC_LDLV = _OPC(6, 31), /* meta instructions (category -1): */ /* placeholder instr to mark shader inputs: */ - OPC_META_INPUT = 0, - OPC_META_PHI = 1, + OPC_META_INPUT = _OPC(-1, 0), + OPC_META_PHI = _OPC(-1, 1), /* The "fan-in" and "fan-out" instructions are used for keeping * track of instructions that write to multiple dst registers * (fan-out) like texture sample instructions, or read multiple * consecutive scalar registers (fan-in) (bary.f, texture samp) */ - OPC_META_FO = 2, - OPC_META_FI = 3, + OPC_META_FO = _OPC(-1, 2), + OPC_META_FI = _OPC(-1, 3), } opc_t; +#define opc_cat(opc) ((int)((opc) >> NOPC_BITS)) +#define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1))) + typedef enum { TYPE_F16 = 0, TYPE_F32 = 1, @@ -472,7 +480,7 @@ typedef struct PACKED { static inline bool instr_cat3_full(instr_cat3_t *cat3) { - switch (cat3->opc) { + switch (_OPC(3, cat3->opc)) { case OPC_MAD_F16: case OPC_MAD_U16: case OPC_MAD_S16: diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index 7d89142d7a1..3de8fdc11b3 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -612,7 +612,7 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, list_for_each_entry (struct ir3_block, block, &shader->block_list, node) { list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - int ret = emit[instr->category](instr, dwords, info); + int ret = emit[opc_cat(instr->opc)](instr, dwords, info); if (ret) goto fail; info->instrs_count += 1 + instr->repeat; @@ -683,23 +683,21 @@ static struct ir3_instruction *instr_create(struct ir3_block *block, int nreg) } struct ir3_instruction * ir3_instr_create2(struct ir3_block *block, - int category, opc_t opc, int nreg) + opc_t opc, int nreg) { struct ir3_instruction *instr = instr_create(block, nreg); instr->block = block; - instr->category = category; instr->opc = opc; insert_instr(block, instr); return instr; } -struct ir3_instruction * ir3_instr_create(struct ir3_block *block, - int category, opc_t opc) +struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc) { /* NOTE: we could be slightly more clever, at least for non-meta, * and choose # of regs based on category. */ - return ir3_instr_create2(block, category, opc, 4); + return ir3_instr_create2(block, opc, 4); } struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr) diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 1a109d880e6..3859f6a39f3 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -130,7 +130,6 @@ struct ir3_register { struct ir3_instruction { struct ir3_block *block; - int category; opc_t opc; enum { /* (sy) flag is set on first instruction, and after sample @@ -435,6 +434,16 @@ struct ir3_block { #endif }; +static inline uint32_t +block_id(struct ir3_block *block) +{ +#ifdef DEBUG + return block->serialno; +#else + return (uint32_t)(unsigned long)block; +#endif +} + struct ir3 * ir3_create(struct ir3_compiler *compiler, unsigned nin, unsigned nout); void ir3_destroy(struct ir3 *shader); @@ -444,10 +453,9 @@ void * ir3_alloc(struct ir3 *shader, int sz); struct ir3_block * ir3_block_create(struct ir3 *shader); -struct ir3_instruction * ir3_instr_create(struct ir3_block *block, - int category, opc_t opc); +struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc); struct ir3_instruction * ir3_instr_create2(struct ir3_block *block, - int category, opc_t opc, int nreg); + opc_t opc, int nreg); struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr); const char *ir3_instr_name(struct ir3_instruction *instr); @@ -508,17 +516,17 @@ static inline uint32_t reg_comp(struct ir3_register *reg) static inline bool is_flow(struct ir3_instruction *instr) { - return (instr->category == 0); + return (opc_cat(instr->opc) == 0); } static inline bool is_kill(struct ir3_instruction *instr) { - return is_flow(instr) && (instr->opc == OPC_KILL); + return instr->opc == OPC_KILL; } static inline bool is_nop(struct ir3_instruction *instr) { - return is_flow(instr) && (instr->opc == OPC_NOP); + return instr->opc == OPC_NOP; } /* Is it a non-transformative (ie. not type changing) mov? This can @@ -538,75 +546,71 @@ static inline bool is_same_type_mov(struct ir3_instruction *instr) if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY)) return false; - if ((instr->category == 1) && - (instr->cat1.src_type == instr->cat1.dst_type)) - return true; - if ((instr->category == 2) && ((instr->opc == OPC_ABSNEG_F) || - (instr->opc == OPC_ABSNEG_S))) + switch (instr->opc) { + case OPC_MOV: + return instr->cat1.src_type == instr->cat1.dst_type; + case OPC_ABSNEG_F: + case OPC_ABSNEG_S: return true; - return false; + default: + return false; + } } static inline bool is_alu(struct ir3_instruction *instr) { - return (1 <= instr->category) && (instr->category <= 3); + return (1 <= opc_cat(instr->opc)) && (opc_cat(instr->opc) <= 3); } static inline bool is_sfu(struct ir3_instruction *instr) { - return (instr->category == 4); + return (opc_cat(instr->opc) == 4); } static inline bool is_tex(struct ir3_instruction *instr) { - return (instr->category == 5); + return (opc_cat(instr->opc) == 5); } static inline bool is_mem(struct ir3_instruction *instr) { - return (instr->category == 6); + return (opc_cat(instr->opc) == 6); } static inline bool is_store(struct ir3_instruction *instr) { - if (is_mem(instr)) { - /* these instructions, the "destination" register is - * actually a source, the address to store to. - */ - switch (instr->opc) { - case OPC_STG: - case OPC_STP: - case OPC_STL: - case OPC_STLW: - case OPC_L2G: - case OPC_G2L: - return true; - default: - break; - } + /* these instructions, the "destination" register is + * actually a source, the address to store to. + */ + switch (instr->opc) { + case OPC_STG: + case OPC_STP: + case OPC_STL: + case OPC_STLW: + case OPC_L2G: + case OPC_G2L: + return true; + default: + return false; } - return false; } static inline bool is_load(struct ir3_instruction *instr) { - if (is_mem(instr)) { - switch (instr->opc) { - case OPC_LDG: - case OPC_LDL: - case OPC_LDP: - case OPC_L2G: - case OPC_LDLW: - case OPC_LDC_4: - case OPC_LDLV: + switch (instr->opc) { + case OPC_LDG: + case OPC_LDL: + case OPC_LDP: + case OPC_L2G: + case OPC_LDLW: + case OPC_LDC_4: + case OPC_LDLV: /* probably some others too.. */ - return true; - default: - break; - } + return true; + default: + return false; } - return false; } static inline bool is_input(struct ir3_instruction *instr) @@ -615,9 +619,25 @@ static inline bool is_input(struct ir3_instruction *instr) * interpolation.. fortunately inloc is the first src * register in either case */ - if (is_mem(instr) && (instr->opc == OPC_LDLV)) + switch (instr->opc) { + case OPC_LDLV: + case OPC_BARY_F: + return true; + default: + return false; + } +} + +static inline bool is_bool(struct ir3_instruction *instr) +{ + switch (instr->opc) { + case OPC_CMPS_F: + case OPC_CMPS_S: + case OPC_CMPS_U: return true; - return (instr->category == 2) && (instr->opc == OPC_BARY_F); + default: + return false; + } } static inline bool is_meta(struct ir3_instruction *instr) @@ -626,7 +646,7 @@ static inline bool is_meta(struct ir3_instruction *instr) * might actually contribute some instructions to the final * result? */ - return (instr->category == -1); + return (opc_cat(instr->opc) == -1); } static inline bool writes_addr(struct ir3_instruction *instr) @@ -901,8 +921,7 @@ void ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary); static inline struct ir3_instruction * ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type) { - struct ir3_instruction *instr = - ir3_instr_create(block, 1, 0); + struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV); ir3_reg_create(instr, 0, 0); /* dst */ if (src->regs[0]->flags & IR3_REG_ARRAY) { struct ir3_register *src_reg = @@ -922,8 +941,7 @@ static inline struct ir3_instruction * ir3_COV(struct ir3_block *block, struct ir3_instruction *src, type_t src_type, type_t dst_type) { - struct ir3_instruction *instr = - ir3_instr_create(block, 1, 0); + struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV); ir3_reg_create(instr, 0, 0); /* dst */ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; instr->cat1.src_type = src_type; @@ -935,45 +953,45 @@ ir3_COV(struct ir3_block *block, struct ir3_instruction *src, static inline struct ir3_instruction * ir3_NOP(struct ir3_block *block) { - return ir3_instr_create(block, 0, OPC_NOP); + return ir3_instr_create(block, OPC_NOP); } -#define INSTR0(CAT, name) \ +#define INSTR0(name) \ static inline struct ir3_instruction * \ ir3_##name(struct ir3_block *block) \ { \ struct ir3_instruction *instr = \ - ir3_instr_create(block, CAT, OPC_##name); \ + ir3_instr_create(block, OPC_##name); \ return instr; \ } -#define INSTR1(CAT, name) \ +#define INSTR1(name) \ static inline struct ir3_instruction * \ ir3_##name(struct ir3_block *block, \ struct ir3_instruction *a, unsigned aflags) \ { \ struct ir3_instruction *instr = \ - ir3_instr_create(block, CAT, OPC_##name); \ + ir3_instr_create(block, OPC_##name); \ ir3_reg_create(instr, 0, 0); /* dst */ \ ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ return instr; \ } -#define INSTR2(CAT, name) \ +#define INSTR2(name) \ static inline struct ir3_instruction * \ ir3_##name(struct ir3_block *block, \ struct ir3_instruction *a, unsigned aflags, \ struct ir3_instruction *b, unsigned bflags) \ { \ struct ir3_instruction *instr = \ - ir3_instr_create(block, CAT, OPC_##name); \ + ir3_instr_create(block, OPC_##name); \ ir3_reg_create(instr, 0, 0); /* dst */ \ ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \ return instr; \ } -#define INSTR3(CAT, name) \ +#define INSTR3(name) \ static inline struct ir3_instruction * \ ir3_##name(struct ir3_block *block, \ struct ir3_instruction *a, unsigned aflags, \ @@ -981,7 +999,7 @@ ir3_##name(struct ir3_block *block, \ struct ir3_instruction *c, unsigned cflags) \ { \ struct ir3_instruction *instr = \ - ir3_instr_create(block, CAT, OPC_##name); \ + ir3_instr_create(block, OPC_##name); \ ir3_reg_create(instr, 0, 0); /* dst */ \ ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \ @@ -990,89 +1008,89 @@ ir3_##name(struct ir3_block *block, \ } /* cat0 instructions: */ -INSTR0(0, BR); -INSTR0(0, JUMP); -INSTR1(0, KILL); -INSTR0(0, END); +INSTR0(BR); +INSTR0(JUMP); +INSTR1(KILL); +INSTR0(END); /* cat2 instructions, most 2 src but some 1 src: */ -INSTR2(2, ADD_F) -INSTR2(2, MIN_F) -INSTR2(2, MAX_F) -INSTR2(2, MUL_F) -INSTR1(2, SIGN_F) -INSTR2(2, CMPS_F) -INSTR1(2, ABSNEG_F) -INSTR2(2, CMPV_F) -INSTR1(2, FLOOR_F) -INSTR1(2, CEIL_F) -INSTR1(2, RNDNE_F) -INSTR1(2, RNDAZ_F) -INSTR1(2, TRUNC_F) -INSTR2(2, ADD_U) -INSTR2(2, ADD_S) -INSTR2(2, SUB_U) -INSTR2(2, SUB_S) -INSTR2(2, CMPS_U) -INSTR2(2, CMPS_S) -INSTR2(2, MIN_U) -INSTR2(2, MIN_S) -INSTR2(2, MAX_U) -INSTR2(2, MAX_S) -INSTR1(2, ABSNEG_S) -INSTR2(2, AND_B) -INSTR2(2, OR_B) -INSTR1(2, NOT_B) -INSTR2(2, XOR_B) -INSTR2(2, CMPV_U) -INSTR2(2, CMPV_S) -INSTR2(2, MUL_U) -INSTR2(2, MUL_S) -INSTR2(2, MULL_U) -INSTR1(2, BFREV_B) -INSTR1(2, CLZ_S) -INSTR1(2, CLZ_B) -INSTR2(2, SHL_B) -INSTR2(2, SHR_B) -INSTR2(2, ASHR_B) -INSTR2(2, BARY_F) -INSTR2(2, MGEN_B) -INSTR2(2, GETBIT_B) -INSTR1(2, SETRM) -INSTR1(2, CBITS_B) -INSTR2(2, SHB) -INSTR2(2, MSAD) +INSTR2(ADD_F) +INSTR2(MIN_F) +INSTR2(MAX_F) +INSTR2(MUL_F) +INSTR1(SIGN_F) +INSTR2(CMPS_F) +INSTR1(ABSNEG_F) +INSTR2(CMPV_F) +INSTR1(FLOOR_F) +INSTR1(CEIL_F) +INSTR1(RNDNE_F) +INSTR1(RNDAZ_F) +INSTR1(TRUNC_F) +INSTR2(ADD_U) +INSTR2(ADD_S) +INSTR2(SUB_U) +INSTR2(SUB_S) +INSTR2(CMPS_U) +INSTR2(CMPS_S) +INSTR2(MIN_U) +INSTR2(MIN_S) +INSTR2(MAX_U) +INSTR2(MAX_S) +INSTR1(ABSNEG_S) +INSTR2(AND_B) +INSTR2(OR_B) +INSTR1(NOT_B) +INSTR2(XOR_B) +INSTR2(CMPV_U) +INSTR2(CMPV_S) +INSTR2(MUL_U) +INSTR2(MUL_S) +INSTR2(MULL_U) +INSTR1(BFREV_B) +INSTR1(CLZ_S) +INSTR1(CLZ_B) +INSTR2(SHL_B) +INSTR2(SHR_B) +INSTR2(ASHR_B) +INSTR2(BARY_F) +INSTR2(MGEN_B) +INSTR2(GETBIT_B) +INSTR1(SETRM) +INSTR1(CBITS_B) +INSTR2(SHB) +INSTR2(MSAD) /* cat3 instructions: */ -INSTR3(3, MAD_U16) -INSTR3(3, MADSH_U16) -INSTR3(3, MAD_S16) -INSTR3(3, MADSH_M16) -INSTR3(3, MAD_U24) -INSTR3(3, MAD_S24) -INSTR3(3, MAD_F16) -INSTR3(3, MAD_F32) -INSTR3(3, SEL_B16) -INSTR3(3, SEL_B32) -INSTR3(3, SEL_S16) -INSTR3(3, SEL_S32) -INSTR3(3, SEL_F16) -INSTR3(3, SEL_F32) -INSTR3(3, SAD_S16) -INSTR3(3, SAD_S32) +INSTR3(MAD_U16) +INSTR3(MADSH_U16) +INSTR3(MAD_S16) +INSTR3(MADSH_M16) +INSTR3(MAD_U24) +INSTR3(MAD_S24) +INSTR3(MAD_F16) +INSTR3(MAD_F32) +INSTR3(SEL_B16) +INSTR3(SEL_B32) +INSTR3(SEL_S16) +INSTR3(SEL_S32) +INSTR3(SEL_F16) +INSTR3(SEL_F32) +INSTR3(SAD_S16) +INSTR3(SAD_S32) /* cat4 instructions: */ -INSTR1(4, RCP) -INSTR1(4, RSQ) -INSTR1(4, LOG2) -INSTR1(4, EXP2) -INSTR1(4, SIN) -INSTR1(4, COS) -INSTR1(4, SQRT) +INSTR1(RCP) +INSTR1(RSQ) +INSTR1(LOG2) +INSTR1(EXP2) +INSTR1(SIN) +INSTR1(COS) +INSTR1(SQRT) /* cat5 instructions: */ -INSTR1(5, DSX) -INSTR1(5, DSY) +INSTR1(DSX) +INSTR1(DSY) static inline struct ir3_instruction * ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, @@ -1082,7 +1100,7 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, struct ir3_instruction *sam; struct ir3_register *reg; - sam = ir3_instr_create(block, 5, opc); + sam = ir3_instr_create(block, opc); sam->flags |= flags; ir3_reg_create(sam, 0, 0)->wrmask = wrmask; if (src0) { @@ -1103,9 +1121,9 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, } /* cat6 instructions: */ -INSTR2(6, LDLV) -INSTR2(6, LDG) -INSTR3(6, STG) +INSTR2(LDLV) +INSTR2(LDG) +INSTR3(STG) /* ************************************************************************* */ /* split this out or find some helper to use.. like main/bitset.h.. */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index 3d656d4a34d..245b61f31e5 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -286,7 +286,7 @@ create_immed(struct ir3_block *block, uint32_t val) { struct ir3_instruction *mov; - mov = ir3_instr_create(block, 1, 0); + mov = ir3_instr_create(block, OPC_MOV); mov->cat1.src_type = TYPE_U32; mov->cat1.dst_type = TYPE_U32; ir3_reg_create(mov, 0, 0); @@ -366,7 +366,7 @@ create_uniform(struct ir3_compile *ctx, unsigned n) { struct ir3_instruction *mov; - mov = ir3_instr_create(ctx->block, 1, 0); + mov = ir3_instr_create(ctx->block, OPC_MOV); /* TODO get types right? */ mov->cat1.src_type = TYPE_F32; mov->cat1.dst_type = TYPE_F32; @@ -382,7 +382,7 @@ create_uniform_indirect(struct ir3_compile *ctx, int n, { struct ir3_instruction *mov; - mov = ir3_instr_create(ctx->block, 1, 0); + mov = ir3_instr_create(ctx->block, OPC_MOV); mov->cat1.src_type = TYPE_U32; mov->cat1.dst_type = TYPE_U32; ir3_reg_create(mov, 0, 0); @@ -402,7 +402,7 @@ create_collect(struct ir3_block *block, struct ir3_instruction **arr, if (arrsz == 0) return NULL; - collect = ir3_instr_create2(block, -1, OPC_META_FI, 1 + arrsz); + collect = ir3_instr_create2(block, OPC_META_FI, 1 + arrsz); ir3_reg_create(collect, 0, 0); /* dst */ for (unsigned i = 0; i < arrsz; i++) ir3_reg_create(collect, 0, IR3_REG_SSA)->instr = arr[i]; @@ -418,7 +418,7 @@ create_indirect_load(struct ir3_compile *ctx, unsigned arrsz, int n, struct ir3_instruction *mov; struct ir3_register *src; - mov = ir3_instr_create(block, 1, 0); + mov = ir3_instr_create(block, OPC_MOV); mov->cat1.src_type = TYPE_U32; mov->cat1.dst_type = TYPE_U32; ir3_reg_create(mov, 0, 0); @@ -441,7 +441,7 @@ create_var_load(struct ir3_compile *ctx, struct ir3_array *arr, int n, struct ir3_instruction *mov; struct ir3_register *src; - mov = ir3_instr_create(block, 1, 0); + mov = ir3_instr_create(block, OPC_MOV); mov->cat1.src_type = TYPE_U32; mov->cat1.dst_type = TYPE_U32; ir3_reg_create(mov, 0, 0); @@ -469,7 +469,7 @@ create_var_store(struct ir3_compile *ctx, struct ir3_array *arr, int n, struct ir3_instruction *mov; struct ir3_register *dst; - mov = ir3_instr_create(block, 1, 0); + mov = ir3_instr_create(block, OPC_MOV); mov->cat1.src_type = TYPE_U32; mov->cat1.dst_type = TYPE_U32; dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY | @@ -492,7 +492,7 @@ create_input(struct ir3_block *block, unsigned n) { struct ir3_instruction *in; - in = ir3_instr_create(block, -1, OPC_META_INPUT); + in = ir3_instr_create(block, OPC_META_INPUT); in->inout.block = block; ir3_reg_create(in, n, 0); @@ -617,8 +617,7 @@ split_dest(struct ir3_block *block, struct ir3_instruction **dst, { struct ir3_instruction *prev = NULL; for (int i = 0, j = 0; i < n; i++) { - struct ir3_instruction *split = - ir3_instr_create(block, -1, OPC_META_FO); + struct ir3_instruction *split = ir3_instr_create(block, OPC_META_FO); ir3_reg_create(split, 0, IR3_REG_SSA); ir3_reg_create(split, 0, IR3_REG_SSA)->instr = src; split->fo.off = i; @@ -1631,7 +1630,7 @@ emit_phi(struct ir3_compile *ctx, nir_phi_instr *nphi) dst = get_dst(ctx, &nphi->dest, 1); - phi = ir3_instr_create2(ctx->block, -1, OPC_META_PHI, + phi = ir3_instr_create2(ctx->block, OPC_META_PHI, 1 + exec_list_length(&nphi->srcs)); ir3_reg_create(phi, 0, 0); /* dst */ phi->phi.nphi = nphi; @@ -1651,7 +1650,7 @@ resolve_phis(struct ir3_compile *ctx, struct ir3_block *block) nir_phi_instr *nphi; /* phi's only come at start of block: */ - if (!(is_meta(instr) && (instr->opc == OPC_META_PHI))) + if (instr->opc != OPC_META_PHI) break; if (!instr->phi.nphi) @@ -1662,6 +1661,16 @@ resolve_phis(struct ir3_compile *ctx, struct ir3_block *block) foreach_list_typed(nir_phi_src, nsrc, node, &nphi->srcs) { struct ir3_instruction *src = get_src(ctx, &nsrc->src)[0]; + + /* NOTE: src might not be in the same block as it comes from + * according to the phi.. but in the end the backend assumes + * it will be able to assign the same register to each (which + * only works if it is assigned in the src block), so insert + * an extra mov to make sure the phi src is assigned in the + * block it comes from: + */ + src = ir3_MOV(get_block(ctx, nsrc->pred), src, TYPE_U32); + ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; } } @@ -2144,7 +2153,7 @@ emit_instructions(struct ir3_compile *ctx) if (ctx->so->type == SHADER_FRAGMENT) { // TODO maybe a helper for fi since we need it a few places.. struct ir3_instruction *instr; - instr = ir3_instr_create(ctx->block, -1, OPC_META_FI); + instr = ir3_instr_create(ctx->block, OPC_META_FI); ir3_reg_create(instr, 0, 0); ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.x */ ir3_reg_create(instr, 0, IR3_REG_SSA); /* r0.y */ @@ -2323,12 +2332,12 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler, * in which case we need to propagate the half-reg flag * up to the definer so that RA sees it: */ - if (is_meta(out) && (out->opc == OPC_META_FO)) { + if (out->opc == OPC_META_FO) { out = out->regs[1]->instr; out->regs[0]->flags |= IR3_REG_HALF; } - if (out->category == 1) { + if (out->opc == OPC_MOV) { out->cat1.dst_type = half_type(out->cat1.dst_type); } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index 1cc211a7663..6037becf22f 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -58,14 +58,14 @@ static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags) return false; /* TODO: remove this hack: */ - if (is_meta(src_instr) && (src_instr->opc == OPC_META_FO)) + if (src_instr->opc == OPC_META_FO) return false; /* TODO: we currently don't handle left/right neighbors * very well when inserting parallel-copies into phi.. * to avoid problems don't eliminate a mov coming out * of phi.. */ - if (is_meta(src_instr) && (src_instr->opc == OPC_META_PHI)) + if (src_instr->opc == OPC_META_PHI) return false; return true; } @@ -96,7 +96,7 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n, return false; /* clear flags that are 'ok' */ - switch (instr->category) { + switch (opc_cat(instr->opc)) { case 1: valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV; if (flags & ~valid_flags) @@ -111,6 +111,19 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n, valid_flags = IR3_REG_IMMED; if (flags & ~valid_flags) return false; + + if (flags & IR3_REG_IMMED) { + /* doesn't seem like we can have immediate src for store + * instructions: + * + * TODO this restriction could also apply to load instructions, + * but for load instructions this arg is the address (and not + * really sure any good way to test a hard-coded immed addr src) + */ + if (is_store(instr) && (n == 1)) + return false; + } + break; case 2: valid_flags = ir3_cat2_absneg(instr->opc) | @@ -176,8 +189,10 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n, /* propagate register flags from src to dst.. negates need special * handling to cancel each other out. */ -static void combine_flags(unsigned *dstflags, unsigned srcflags) +static void combine_flags(unsigned *dstflags, struct ir3_instruction *src) { + unsigned srcflags = src->regs[1]->flags; + /* if what we are combining into already has (abs) flags, * we can drop (neg) from src: */ @@ -203,15 +218,15 @@ static void combine_flags(unsigned *dstflags, unsigned srcflags) *dstflags |= srcflags & IR3_REG_IMMED; *dstflags |= srcflags & IR3_REG_RELATIV; *dstflags |= srcflags & IR3_REG_ARRAY; -} -/* the "plain" MAD's (ie. the ones that don't shift first src prior to - * multiply) can swap their first two srcs if src[0] is !CONST and - * src[1] is CONST: - */ -static bool is_valid_mad(struct ir3_instruction *instr) -{ - return (instr->category == 3) && is_mad(instr->opc); + /* if src of the src is boolean we can drop the (abs) since we know + * the source value is already a postitive integer. This cleans + * up the absnegs that get inserted when converting between nir and + * native boolean (see ir3_b2n/n2b) + */ + struct ir3_instruction *srcsrc = ssa(src->regs[1]); + if (srcsrc && is_bool(srcsrc)) + *dstflags &= ~IR3_REG_SABS; } /** @@ -226,12 +241,18 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) { struct ir3_instruction *src = ssa(reg); + /* don't propagate copies into a PHI, since we don't know if the + * src block executed: + */ + if (instr->opc == OPC_META_PHI) + return; + if (is_eligible_mov(src, true)) { /* simple case, no immed/const/relativ, only mov's w/ ssa src: */ struct ir3_register *src_reg = src->regs[1]; unsigned new_flags = reg->flags; - combine_flags(&new_flags, src_reg->flags); + combine_flags(&new_flags, src); if (valid_flags(instr, n, new_flags)) { if (new_flags & IR3_REG_ARRAY) { @@ -252,13 +273,17 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) struct ir3_register *src_reg = src->regs[1]; unsigned new_flags = reg->flags; - combine_flags(&new_flags, src_reg->flags); + combine_flags(&new_flags, src); if (!valid_flags(instr, n, new_flags)) { /* special case for "normal" mad instructions, we can * try swapping the first two args if that fits better. + * + * the "plain" MAD's (ie. the ones that don't shift first + * src prior to multiply) can swap their first two srcs if + * src[0] is !CONST and src[1] is CONST: */ - if ((n == 1) && is_valid_mad(instr) && + if ((n == 1) && is_mad(instr->opc) && !(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) && valid_flags(instr, 0, new_flags)) { /* swap src[0] and src[1]: */ @@ -292,7 +317,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) * just somehow don't work out. This restriction may only * apply if the first src is also CONST. */ - if ((instr->category == 3) && (n == 2) && + if ((opc_cat(instr->opc) == 3) && (n == 2) && (src_reg->flags & IR3_REG_RELATIV) && (src_reg->array.offset == 0)) return; @@ -328,10 +353,9 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) if (src_reg->flags & IR3_REG_IMMED) { int32_t iim_val = src_reg->iim_val; - debug_assert((instr->category == 1) || - (instr->category == 6) || - ((instr->category == 2) && - ir3_cat2_int(instr->opc))); + debug_assert((opc_cat(instr->opc) == 1) || + (opc_cat(instr->opc) == 6) || + ir3_cat2_int(instr->opc)); if (new_flags & IR3_REG_SABS) iim_val = abs(iim_val); @@ -343,7 +367,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) iim_val = ~iim_val; /* other than category 1 (mov) we can only encode up to 10 bits: */ - if ((instr->category == 1) || !(iim_val & ~0x3ff)) { + if ((instr->opc == OPC_MOV) || !(iim_val & ~0x3ff)) { new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT); src_reg = ir3_reg_clone(instr->block->shader, src_reg); src_reg->flags = new_flags; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c index 6d294f1a48c..c3f6de965ce 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c @@ -74,8 +74,7 @@ int ir3_delayslots(struct ir3_instruction *assigner, if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) || is_mem(consumer)) { return 6; - } else if ((consumer->category == 3) && - (is_mad(consumer->opc) || is_madsh(consumer->opc)) && + } else if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) && (n == 3)) { /* special case, 3rd src to cat3 not required on first cycle */ return 1; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c index ca28aefd502..cd59080b0f1 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_group.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c @@ -63,14 +63,13 @@ static void arr_insert_mov_in(void *arr, int idx, struct ir3_instruction *instr) debug_assert(instr->regs_count == 1); - in = ir3_instr_create(instr->block, -1, OPC_META_INPUT); + in = ir3_instr_create(instr->block, OPC_META_INPUT); in->inout.block = instr->block; ir3_reg_create(in, instr->regs[0]->num, 0); /* create src reg for meta:in and fixup to now be a mov: */ ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = in; - instr->category = 1; - instr->opc = 0; + instr->opc = OPC_MOV; instr->cat1.src_type = TYPE_F32; instr->cat1.dst_type = TYPE_F32; @@ -117,7 +116,7 @@ restart: conflicts(instr->cp.right, right); /* RA can't yet deal very well w/ group'd phi's: */ - if (is_meta(instr) && (instr->opc == OPC_META_PHI)) + if (instr->opc == OPC_META_PHI) conflict = true; /* we also can't have an instr twice in the group: */ @@ -168,7 +167,7 @@ instr_find_neighbors(struct ir3_instruction *instr) if (ir3_instr_check_mark(instr)) return; - if (is_meta(instr) && (instr->opc == OPC_META_FI)) + if (instr->opc == OPC_META_FI) group_n(&instr_ops, instr, instr->regs_count - 1); foreach_ssa_src(src, instr) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c index e94293f6d6b..77cd0e622f0 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c @@ -146,7 +146,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) * clever if we were aware of this during scheduling, but * this should be a pretty rare case: */ - if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) { + if ((n->flags & IR3_INSTR_SS) && (opc_cat(n->opc) >= 5)) { struct ir3_instruction *nop; nop = ir3_NOP(block); nop->flags |= IR3_INSTR_SS; @@ -154,7 +154,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) } /* need to be able to set (ss) on first instruction: */ - if (list_empty(&block->instr_list) && (n->category >= 5)) + if (list_empty(&block->instr_list) && (opc_cat(n->opc) >= 5)) ir3_NOP(block); if (is_nop(n) && !list_empty(&block->instr_list)) { @@ -209,7 +209,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) struct ir3_instruction *baryf; /* (ss)bary.f (ei)r63.x, 0, r0.x */ - baryf = ir3_instr_create(block, 2, OPC_BARY_F); + baryf = ir3_instr_create(block, OPC_BARY_F); baryf->flags |= IR3_INSTR_SS; ir3_reg_create(baryf, regid(63, 0), 0); ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_print.c b/src/gallium/drivers/freedreno/ir3/ir3_print.c index ba0c4a57aa3..8aebf21a1be 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_print.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_print.c @@ -35,9 +35,12 @@ static void print_instr_name(struct ir3_instruction *instr) { + if (!instr) + return; #ifdef DEBUG printf("%04u:", instr->serialno); #endif + printf("%04u:", instr->name); printf("%03u: ", instr->depth); if (instr->flags & IR3_INSTR_SY) @@ -61,7 +64,7 @@ static void print_instr_name(struct ir3_instruction *instr) } break; } - } else if (instr->category == 1) { + } else if (instr->opc == OPC_MOV) { static const char *type[] = { [TYPE_F16] = "f16", [TYPE_F32] = "f32", @@ -146,16 +149,6 @@ tab(int lvl) printf("\t"); } -static uint32_t -block_id(struct ir3_block *block) -{ -#ifdef DEBUG - return block->serialno; -#else - return (uint32_t)(unsigned long)block; -#endif -} - static void print_instr(struct ir3_instruction *instr, int lvl) { @@ -191,10 +184,8 @@ print_instr(struct ir3_instruction *instr, int lvl) printf("]"); } - if (is_meta(instr)) { - if (instr->opc == OPC_META_FO) { - printf(", off=%d", instr->fo.off); - } + if (instr->opc == OPC_META_FO) { + printf(", off=%d", instr->fo.off); } if (is_flow(instr) && instr->cat0.target) { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_ra.c b/src/gallium/drivers/freedreno/ir3/ir3_ra.c index bcad96e8a30..ed3030d722a 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_ra.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_ra.c @@ -31,6 +31,8 @@ #include "util/ralloc.h" #include "util/bitset.h" +#include "freedreno_util.h" + #include "ir3.h" #include "ir3_compiler.h" @@ -342,7 +344,7 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, return id->defn; } - if (is_meta(instr) && (instr->opc == OPC_META_FI)) { + if (instr->opc == OPC_META_FI) { /* What about the case where collect is subset of array, we * need to find the distance between where actual array starts * and fanin.. that probably doesn't happen currently. @@ -436,7 +438,7 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, } } - if (is_meta(d) && (d->opc == OPC_META_PHI)) { + if (d->opc == OPC_META_PHI) { /* we have already inserted parallel-copies into * the phi, so we don't need to chase definers */ @@ -456,7 +458,7 @@ get_definer(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, d = dd; } - if (is_meta(d) && (d->opc == OPC_META_FO)) { + if (d->opc == OPC_META_FO) { struct ir3_instruction *dd; int dsz, doff; @@ -810,6 +812,22 @@ ra_compute_livein_liveout(struct ir3_ra_ctx *ctx) } static void +print_bitset(const char *name, BITSET_WORD *bs, unsigned cnt) +{ + bool first = true; + debug_printf(" %s:", name); + for (unsigned i = 0; i < cnt; i++) { + if (BITSET_TEST(bs, i)) { + if (!first) + debug_printf(","); + debug_printf(" %04u", i); + first = false; + } + } + debug_printf("\n"); +} + +static void ra_add_interference(struct ir3_ra_ctx *ctx) { struct ir3 *ir = ctx->ir; @@ -831,12 +849,24 @@ ra_add_interference(struct ir3_ra_ctx *ctx) /* update per-block livein/liveout: */ while (ra_compute_livein_liveout(ctx)) {} + if (fd_mesa_debug & FD_DBG_OPTMSGS) { + debug_printf("AFTER LIVEIN/OUT:\n"); + ir3_print(ir); + list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { + struct ir3_ra_block_data *bd = block->data; + debug_printf("block%u:\n", block_id(block)); + print_bitset("def", bd->def, ctx->alloc_count); + print_bitset("use", bd->use, ctx->alloc_count); + print_bitset("l/i", bd->livein, ctx->alloc_count); + print_bitset("l/o", bd->liveout, ctx->alloc_count); + } + } + /* extend start/end ranges based on livein/liveout info from cfg: */ - unsigned bitset_words = BITSET_WORDS(ctx->alloc_count); list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { struct ir3_ra_block_data *bd = block->data; - for (unsigned i = 0; i < bitset_words; i++) { + for (unsigned i = 0; i < ctx->alloc_count; i++) { if (BITSET_TEST(bd->livein, i)) { ctx->def[i] = MIN2(ctx->def[i], block->start_ip); ctx->use[i] = MAX2(ctx->use[i], block->start_ip); @@ -869,7 +899,7 @@ ra_add_interference(struct ir3_ra_ctx *ctx) /* some instructions need fix-up if dst register is half precision: */ static void fixup_half_instr_dst(struct ir3_instruction *instr) { - switch (instr->category) { + switch (opc_cat(instr->opc)) { case 1: /* move instructions */ instr->cat1.dst_type = half_type(instr->cat1.dst_type); break; @@ -910,10 +940,12 @@ static void fixup_half_instr_dst(struct ir3_instruction *instr) /* some instructions need fix-up if src register is half precision: */ static void fixup_half_instr_src(struct ir3_instruction *instr) { - switch (instr->category) { - case 1: /* move instructions */ + switch (instr->opc) { + case OPC_MOV: instr->cat1.src_type = half_type(instr->cat1.src_type); break; + default: + break; } } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_sched.c b/src/gallium/drivers/freedreno/ir3/ir3_sched.c index 8f640febc5d..b56da304f92 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_sched.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_sched.c @@ -511,8 +511,7 @@ sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block) * occupied), and move remaining to depth sorted list: */ list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) { - if (is_meta(instr) && ((instr->opc == OPC_META_INPUT) || - (instr->opc == OPC_META_PHI))) { + if ((instr->opc == OPC_META_INPUT) || (instr->opc == OPC_META_PHI)) { schedule(ctx, instr); } else { ir3_insert_by_depth(instr, &ctx->depth_list); @@ -627,14 +626,29 @@ static void sched_insert_parallel_copies(struct ir3_block *block) { list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { - if (is_meta(instr) && (instr->opc == OPC_META_PHI)) { - struct ir3_register *reg; + if (instr->opc == OPC_META_PHI) { + struct ir3_register *reg, *reg2; foreach_src(reg, instr) { struct ir3_instruction *src = reg->instr; - struct ir3_instruction *mov = - ir3_MOV(src->block, src, TYPE_U32); - mov->regs[0]->flags |= IR3_REG_PHI_SRC; - mov->regs[0]->instr = instr; + struct ir3_instruction *mov = NULL; + + /* after CP we could end up w/ duplicate phi srcs: */ + foreach_src(reg2, instr) { + if (reg == reg2) + break; + /* reg2 is before reg1 so already an inserted mov: */ + else if (reg2->instr->regs[1]->instr == src) { + mov = reg2->instr; + break; + } + } + + if (!mov) { + mov = ir3_MOV(src->block, src, TYPE_U32); + mov->regs[0]->flags |= IR3_REG_PHI_SRC; + mov->regs[0]->instr = instr; + } + reg->instr = mov; } } diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index f4aa310ecdc..68e32e51c34 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -269,6 +269,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS: diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c index 7812c826250..142d6f1fa21 100644 --- a/src/gallium/drivers/ilo/ilo_screen.c +++ b/src/gallium/drivers/ilo/ilo_screen.c @@ -498,6 +498,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 2529b546564..6a5f906adc6 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -319,6 +319,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; } /* should only get here on unhandled cases */ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 66e7b2e8243..fea388685fa 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -2824,7 +2824,7 @@ FlatteningPass::visit(BasicBlock *bb) !isSurfaceOp(insn->op) && // not confirmed insn->op != OP_LINTERP && // probably just nve4 insn->op != OP_PINTERP && // probably just nve4 - ((insn->op != OP_LOAD && insn->op != OP_STORE) || + ((insn->op != OP_LOAD && insn->op != OP_STORE && insn->op != OP_ATOM) || (typeSizeof(insn->dType) <= 4 && !insn->src(0).isIndirect(0))) && !insn->isNop()) { insn->join = 1; diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c index b105c6aeb80..db7c2d15fb1 100644 --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c @@ -192,6 +192,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index ba5e5003b69..20fb61b51f4 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -245,6 +245,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index ec2340ee0c3..c41912a6037 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -251,6 +251,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; case PIPE_CAP_VENDOR_ID: diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 1c3bb64f0e4..b3a7f049e10 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -214,6 +214,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: case PIPE_CAP_QUERY_BUFFER_OBJECT: case PIPE_CAP_QUERY_MEMORY_INFO: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; /* SWTCL-only features. */ diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index f4b669000dc..6f171487f92 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -83,29 +83,26 @@ writable images will consume TEX slots, VTX slots too because of linear indexing */ -struct r600_resource* r600_compute_buffer_alloc_vram( - struct r600_screen *screen, - unsigned size) +struct r600_resource *r600_compute_buffer_alloc_vram(struct r600_screen *screen, + unsigned size) { - struct pipe_resource * buffer = NULL; + struct pipe_resource *buffer = NULL; assert(size); - buffer = pipe_buffer_create( - (struct pipe_screen*) screen, - PIPE_BIND_CUSTOM, - PIPE_USAGE_IMMUTABLE, - size); + buffer = pipe_buffer_create((struct pipe_screen*) screen, + PIPE_BIND_CUSTOM, + PIPE_USAGE_IMMUTABLE, + size); return (struct r600_resource *)buffer; } -static void evergreen_set_rat( - struct r600_pipe_compute *pipe, - unsigned id, - struct r600_resource* bo, - int start, - int size) +static void evergreen_set_rat(struct r600_pipe_compute *pipe, + unsigned id, + struct r600_resource *bo, + int start, + int size) { struct pipe_surface rat_templ; struct r600_surface *surf = NULL; @@ -145,11 +142,10 @@ static void evergreen_set_rat( evergreen_init_color_surface_rat(rctx, surf); } -static void evergreen_cs_set_vertex_buffer( - struct r600_context * rctx, - unsigned vb_index, - unsigned offset, - struct pipe_resource * buffer) +static void evergreen_cs_set_vertex_buffer(struct r600_context *rctx, + unsigned vb_index, + unsigned offset, + struct pipe_resource *buffer) { struct r600_vertexbuf_state *state = &rctx->cs_vertex_buffer_state; struct pipe_vertex_buffer *vb = &state->vb[vb_index]; @@ -166,12 +162,11 @@ static void evergreen_cs_set_vertex_buffer( r600_mark_atom_dirty(rctx, &state->atom); } -static void evergreen_cs_set_constant_buffer( - struct r600_context * rctx, - unsigned cb_index, - unsigned offset, - unsigned size, - struct pipe_resource * buffer) +static void evergreen_cs_set_constant_buffer(struct r600_context *rctx, + unsigned cb_index, + unsigned offset, + unsigned size, + struct pipe_resource *buffer) { struct pipe_constant_buffer cb; cb.buffer_size = size; @@ -182,16 +177,6 @@ static void evergreen_cs_set_constant_buffer( rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_COMPUTE, cb_index, &cb); } -static const struct u_resource_vtbl r600_global_buffer_vtbl = -{ - u_default_resource_get_handle, /* get_handle */ - r600_compute_global_buffer_destroy, /* resource_destroy */ - r600_compute_global_transfer_map, /* transfer_map */ - r600_compute_global_transfer_flush_region,/* transfer_flush_region */ - r600_compute_global_transfer_unmap, /* transfer_unmap */ - r600_compute_global_transfer_inline_write /* transfer_inline_write */ -}; - /* We need to define these R600 registers here, because we can't include * evergreend.h and r600d.h. */ @@ -256,33 +241,32 @@ static void r600_destroy_shader(struct r600_bytecode *bc) FREE(bc->bytecode); } -void *evergreen_create_compute_state( - struct pipe_context *ctx_, - const const struct pipe_compute_state *cso) +static void *evergreen_create_compute_state(struct pipe_context *ctx, + const const struct pipe_compute_state *cso) { - struct r600_context *ctx = (struct r600_context *)ctx_; + struct r600_context *rctx = (struct r600_context *)ctx; struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute); #ifdef HAVE_OPENCL - const struct pipe_llvm_program_header * header; + const struct pipe_llvm_program_header *header; const char *code; void *p; boolean use_kill; - COMPUTE_DBG(ctx->screen, "*** evergreen_create_compute_state\n"); + COMPUTE_DBG(rctx->screen, "*** evergreen_create_compute_state\n"); header = cso->prog; code = cso->prog + sizeof(struct pipe_llvm_program_header); radeon_shader_binary_init(&shader->binary); radeon_elf_read(code, header->num_bytes, &shader->binary); r600_create_shader(&shader->bc, &shader->binary, &use_kill); - shader->code_bo = r600_compute_buffer_alloc_vram(ctx->screen, + shader->code_bo = r600_compute_buffer_alloc_vram(rctx->screen, shader->bc.ndw * 4); - p = r600_buffer_map_sync_with_rings(&ctx->b, shader->code_bo, PIPE_TRANSFER_WRITE); + p = r600_buffer_map_sync_with_rings(&rctx->b, shader->code_bo, PIPE_TRANSFER_WRITE); memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4); - ctx->b.ws->buffer_unmap(shader->code_bo->buf); + rctx->b.ws->buffer_unmap(shader->code_bo->buf); #endif - shader->ctx = ctx; + shader->ctx = rctx; shader->local_size = cso->req_local_mem; shader->private_size = cso->req_private_mem; shader->input_size = cso->req_input_mem; @@ -290,12 +274,13 @@ void *evergreen_create_compute_state( return shader; } -void evergreen_delete_compute_state(struct pipe_context *ctx_, void* state) +static void evergreen_delete_compute_state(struct pipe_context *ctx, void *state) { - struct r600_context *ctx = (struct r600_context *)ctx_; - COMPUTE_DBG(ctx->screen, "*** evergreen_delete_compute_state\n"); + struct r600_context *rctx = (struct r600_context *)ctx; struct r600_pipe_compute *shader = state; + COMPUTE_DBG(rctx->screen, "*** evergreen_delete_compute_state\n"); + if (!shader) return; @@ -307,13 +292,13 @@ void evergreen_delete_compute_state(struct pipe_context *ctx_, void* state) FREE(shader); } -static void evergreen_bind_compute_state(struct pipe_context *ctx_, void *state) +static void evergreen_bind_compute_state(struct pipe_context *ctx, void *state) { - struct r600_context *ctx = (struct r600_context *)ctx_; + struct r600_context *rctx = (struct r600_context *)ctx; - COMPUTE_DBG(ctx->screen, "*** evergreen_bind_compute_state\n"); + COMPUTE_DBG(rctx->screen, "*** evergreen_bind_compute_state\n"); - ctx->cs_shader_state.shader = (struct r600_pipe_compute *)state; + rctx->cs_shader_state.shader = (struct r600_pipe_compute *)state; } /* The kernel parameters are stored a vtx buffer (ID=0), besides the explicit @@ -327,23 +312,20 @@ static void evergreen_bind_compute_state(struct pipe_context *ctx_, void *state) * (x,y,z) * DWORDS 9+ : Kernel parameters */ -void evergreen_compute_upload_input( - struct pipe_context *ctx_, - const uint *block_layout, - const uint *grid_layout, - const void *input) +static void evergreen_compute_upload_input(struct pipe_context *ctx, + const struct pipe_grid_info *info) { - struct r600_context *ctx = (struct r600_context *)ctx_; - struct r600_pipe_compute *shader = ctx->cs_shader_state.shader; + struct r600_context *rctx = (struct r600_context *)ctx; + struct r600_pipe_compute *shader = rctx->cs_shader_state.shader; unsigned i; /* We need to reserve 9 dwords (36 bytes) for implicit kernel * parameters. */ unsigned input_size = shader->input_size + 36; - uint32_t * num_work_groups_start; - uint32_t * global_size_start; - uint32_t * local_size_start; - uint32_t * kernel_parameters_start; + uint32_t *num_work_groups_start; + uint32_t *global_size_start; + uint32_t *local_size_start; + uint32_t *kernel_parameters_start; struct pipe_box box; struct pipe_transfer *transfer = NULL; @@ -354,12 +336,12 @@ void evergreen_compute_upload_input( if (!shader->kernel_param) { /* Add space for the grid dimensions */ shader->kernel_param = (struct r600_resource *) - pipe_buffer_create(ctx_->screen, PIPE_BIND_CUSTOM, + pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, input_size); } u_box_1d(0, input_size, &box); - num_work_groups_start = ctx_->transfer_map(ctx_, + num_work_groups_start = ctx->transfer_map(ctx, (struct pipe_resource*)shader->kernel_param, 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE, &box, &transfer); @@ -368,34 +350,33 @@ void evergreen_compute_upload_input( kernel_parameters_start = local_size_start + (3 * (sizeof(uint)) / 4); /* Copy the work group size */ - memcpy(num_work_groups_start, grid_layout, 3 * sizeof(uint)); + memcpy(num_work_groups_start, info->grid, 3 * sizeof(uint)); /* Copy the global size */ for (i = 0; i < 3; i++) { - global_size_start[i] = grid_layout[i] * block_layout[i]; + global_size_start[i] = info->grid[i] * info->block[i]; } /* Copy the local dimensions */ - memcpy(local_size_start, block_layout, 3 * sizeof(uint)); + memcpy(local_size_start, info->block, 3 * sizeof(uint)); /* Copy the kernel inputs */ - memcpy(kernel_parameters_start, input, shader->input_size); + memcpy(kernel_parameters_start, info->input, shader->input_size); for (i = 0; i < (input_size / 4); i++) { - COMPUTE_DBG(ctx->screen, "input %i : %u\n", i, + COMPUTE_DBG(rctx->screen, "input %i : %u\n", i, ((unsigned*)num_work_groups_start)[i]); } - ctx_->transfer_unmap(ctx_, transfer); + ctx->transfer_unmap(ctx, transfer); /* ID=0 is reserved for the parameters */ - evergreen_cs_set_constant_buffer(ctx, 0, 0, input_size, + evergreen_cs_set_constant_buffer(rctx, 0, 0, input_size, (struct pipe_resource*)shader->kernel_param); } -static void evergreen_emit_direct_dispatch( - struct r600_context *rctx, - const uint *block_layout, const uint *grid_layout) +static void evergreen_emit_dispatch(struct r600_context *rctx, + const struct pipe_grid_info *info) { int i; struct radeon_winsys_cs *cs = rctx->b.gfx.cs; @@ -411,15 +392,15 @@ static void evergreen_emit_direct_dispatch( /* Calculate group_size/grid_size */ for (i = 0; i < 3; i++) { - group_size *= block_layout[i]; + group_size *= info->block[i]; } for (i = 0; i < 3; i++) { - grid_size *= grid_layout[i]; + grid_size *= info->grid[i]; } /* num_waves = ceil((tg_size.x * tg_size.y, tg_size.z) / (16 * num_pipes)) */ - num_waves = (block_layout[0] * block_layout[1] * block_layout[2] + + num_waves = (info->block[0] * info->block[1] * info->block[2] + wave_divisor - 1) / wave_divisor; COMPUTE_DBG(rctx->screen, "Using %u pipes, " @@ -438,9 +419,9 @@ static void evergreen_emit_direct_dispatch( group_size); radeon_compute_set_context_reg_seq(cs, R_0286EC_SPI_COMPUTE_NUM_THREAD_X, 3); - radeon_emit(cs, block_layout[0]); /* R_0286EC_SPI_COMPUTE_NUM_THREAD_X */ - radeon_emit(cs, block_layout[1]); /* R_0286F0_SPI_COMPUTE_NUM_THREAD_Y */ - radeon_emit(cs, block_layout[2]); /* R_0286F4_SPI_COMPUTE_NUM_THREAD_Z */ + radeon_emit(cs, info->block[0]); /* R_0286EC_SPI_COMPUTE_NUM_THREAD_X */ + radeon_emit(cs, info->block[1]); /* R_0286F0_SPI_COMPUTE_NUM_THREAD_Y */ + radeon_emit(cs, info->block[2]); /* R_0286F4_SPI_COMPUTE_NUM_THREAD_Z */ if (rctx->b.chip_class < CAYMAN) { assert(lds_size <= 8192); @@ -455,22 +436,22 @@ static void evergreen_emit_direct_dispatch( /* Dispatch packet */ radeon_emit(cs, PKT3C(PKT3_DISPATCH_DIRECT, 3, 0)); - radeon_emit(cs, grid_layout[0]); - radeon_emit(cs, grid_layout[1]); - radeon_emit(cs, grid_layout[2]); + radeon_emit(cs, info->grid[0]); + radeon_emit(cs, info->grid[1]); + radeon_emit(cs, info->grid[2]); /* VGT_DISPATCH_INITIATOR = COMPUTE_SHADER_EN */ radeon_emit(cs, 1); } -static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, - const uint *grid_layout) +static void compute_emit_cs(struct r600_context *rctx, + const struct pipe_grid_info *info) { - struct radeon_winsys_cs *cs = ctx->b.gfx.cs; + struct radeon_winsys_cs *cs = rctx->b.gfx.cs; unsigned i; /* make sure that the gfx ring is only one active */ - if (ctx->b.dma.cs && ctx->b.dma.cs->cdw) { - ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); + if (rctx->b.dma.cs && rctx->b.dma.cs->cdw) { + rctx->b.dma.flush(rctx, RADEON_FLUSH_ASYNC, NULL); } /* Initialize all the compute-related registers. @@ -478,20 +459,20 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, * See evergreen_init_atom_start_compute_cs() in this file for the list * of registers initialized by the start_compute_cs_cmd atom. */ - r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd); + r600_emit_command_buffer(cs, &rctx->start_compute_cs_cmd); /* emit config state */ - if (ctx->b.chip_class == EVERGREEN) - r600_emit_atom(ctx, &ctx->config_state.atom); + if (rctx->b.chip_class == EVERGREEN) + r600_emit_atom(rctx, &rctx->config_state.atom); - ctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; - r600_flush_emit(ctx); + rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; + r600_flush_emit(rctx); /* Emit colorbuffers. */ /* XXX support more than 8 colorbuffers (the offsets are not a multiple of 0x3C for CB8-11) */ - for (i = 0; i < 8 && i < ctx->framebuffer.state.nr_cbufs; i++) { - struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i]; - unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.gfx, + for (i = 0; i < 8 && i < rctx->framebuffer.state.nr_cbufs; i++) { + struct r600_surface *cb = (struct r600_surface*)rctx->framebuffer.state.cbufs[i]; + unsigned reloc = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, (struct r600_resource*)cb->base.texture, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER); @@ -520,51 +501,51 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, /* Set CB_TARGET_MASK XXX: Use cb_misc_state */ radeon_compute_set_context_reg(cs, R_028238_CB_TARGET_MASK, - ctx->compute_cb_target_mask); + rctx->compute_cb_target_mask); /* Emit vertex buffer state */ - ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask); - r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom); + rctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(rctx->cs_vertex_buffer_state.dirty_mask); + r600_emit_atom(rctx, &rctx->cs_vertex_buffer_state.atom); /* Emit constant buffer state */ - r600_emit_atom(ctx, &ctx->constbuf_state[PIPE_SHADER_COMPUTE].atom); + r600_emit_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_COMPUTE].atom); /* Emit sampler state */ - r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].states.atom); + r600_emit_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].states.atom); /* Emit sampler view (texture resource) state */ - r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].views.atom); + r600_emit_atom(rctx, &rctx->samplers[PIPE_SHADER_COMPUTE].views.atom); /* Emit compute shader state */ - r600_emit_atom(ctx, &ctx->cs_shader_state.atom); + r600_emit_atom(rctx, &rctx->cs_shader_state.atom); /* Emit dispatch state and dispatch packet */ - evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout); + evergreen_emit_dispatch(rctx, info); /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff */ - ctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE | + rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE | R600_CONTEXT_INV_VERTEX_CACHE | R600_CONTEXT_INV_TEX_CACHE; - r600_flush_emit(ctx); - ctx->b.flags = 0; + r600_flush_emit(rctx); + rctx->b.flags = 0; - if (ctx->b.chip_class >= CAYMAN) { - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4); + if (rctx->b.chip_class >= CAYMAN) { + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4)); /* DEALLOC_STATE prevents the GPU from hanging when a * SURFACE_SYNC packet is emitted some time after a DISPATCH_DIRECT * with any of the CB*_DEST_BASE_ENA or DB_DEST_BASE_ENA bits set. */ - cs->buf[cs->cdw++] = PKT3C(PKT3_DEALLOC_STATE, 0, 0); - cs->buf[cs->cdw++] = 0; + radeon_emit(cs, PKT3C(PKT3_DEALLOC_STATE, 0, 0)); + radeon_emit(cs, 0); } #if 0 - COMPUTE_DBG(ctx->screen, "cdw: %i\n", cs->cdw); + COMPUTE_DBG(rctx->screen, "cdw: %i\n", cs->cdw); for (i = 0; i < cs->cdw; i++) { - COMPUTE_DBG(ctx->screen, "%4i : 0x%08X\n", i, cs->buf[i]); + COMPUTE_DBG(rctx->screen, "%4i : 0x%08X\n", i, cs->buf[i]); } #endif @@ -574,9 +555,8 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, /** * Emit function for r600_cs_shader_state atom */ -void evergreen_emit_cs_shader( - struct r600_context *rctx, - struct r600_atom *atom) +void evergreen_emit_cs_shader(struct r600_context *rctx, + struct r600_atom *atom) { struct r600_cs_shader_state *state = (struct r600_cs_shader_state*)atom; @@ -604,35 +584,35 @@ void evergreen_emit_cs_shader( RADEON_PRIO_USER_SHADER)); } -static void evergreen_launch_grid( - struct pipe_context *ctx_, const struct pipe_grid_info *info) +static void evergreen_launch_grid(struct pipe_context *ctx, + const struct pipe_grid_info *info) { - struct r600_context *ctx = (struct r600_context *)ctx_; + struct r600_context *rctx = (struct r600_context *)ctx; #ifdef HAVE_OPENCL - struct r600_pipe_compute *shader = ctx->cs_shader_state.shader; + struct r600_pipe_compute *shader = rctx->cs_shader_state.shader; boolean use_kill; - ctx->cs_shader_state.pc = info->pc; + rctx->cs_shader_state.pc = info->pc; /* Get the config information for this kernel. */ r600_shader_binary_read_config(&shader->binary, &shader->bc, info->pc, &use_kill); #endif - COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", info->pc); + COMPUTE_DBG(rctx->screen, "*** evergreen_launch_grid: pc = %u\n", info->pc); - evergreen_compute_upload_input(ctx_, info->block, info->grid, info->input); - compute_emit_cs(ctx, info->block, info->grid); + evergreen_compute_upload_input(ctx, info); + compute_emit_cs(rctx, info); } -static void evergreen_set_compute_resources(struct pipe_context * ctx_, - unsigned start, unsigned count, - struct pipe_surface ** surfaces) +static void evergreen_set_compute_resources(struct pipe_context *ctx, + unsigned start, unsigned count, + struct pipe_surface **surfaces) { - struct r600_context *ctx = (struct r600_context *)ctx_; + struct r600_context *rctx = (struct r600_context *)ctx; struct r600_surface **resources = (struct r600_surface **)surfaces; - COMPUTE_DBG(ctx->screen, "*** evergreen_set_compute_resources: start = %u count = %u\n", + COMPUTE_DBG(rctx->screen, "*** evergreen_set_compute_resources: start = %u count = %u\n", start, count); for (unsigned i = 0; i < count; i++) { @@ -646,31 +626,31 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_, if (resources[i]->base.writable) { assert(i+1 < 12); - evergreen_set_rat(ctx->cs_shader_state.shader, i+1, + evergreen_set_rat(rctx->cs_shader_state.shader, i+1, (struct r600_resource *)resources[i]->base.texture, buffer->chunk->start_in_dw*4, resources[i]->base.texture->width0); } - evergreen_cs_set_vertex_buffer(ctx, vtx_id, + evergreen_cs_set_vertex_buffer(rctx, vtx_id, buffer->chunk->start_in_dw * 4, resources[i]->base.texture); } } } -static void evergreen_set_global_binding( - struct pipe_context *ctx_, unsigned first, unsigned n, - struct pipe_resource **resources, - uint32_t **handles) +static void evergreen_set_global_binding(struct pipe_context *ctx, + unsigned first, unsigned n, + struct pipe_resource **resources, + uint32_t **handles) { - struct r600_context *ctx = (struct r600_context *)ctx_; - struct compute_memory_pool *pool = ctx->screen->global_pool; + struct r600_context *rctx = (struct r600_context *)ctx; + struct compute_memory_pool *pool = rctx->screen->global_pool; struct r600_resource_global **buffers = (struct r600_resource_global **)resources; unsigned i; - COMPUTE_DBG(ctx->screen, "*** evergreen_set_global_binding first = %u n = %u\n", + COMPUTE_DBG(rctx->screen, "*** evergreen_set_global_binding first = %u n = %u\n", first, n); if (!resources) { @@ -687,7 +667,7 @@ static void evergreen_set_global_binding( buffers[i]->chunk->status |= ITEM_FOR_PROMOTING; } - if (compute_memory_finalize_pending(pool, ctx_) == -1) { + if (compute_memory_finalize_pending(pool, ctx) == -1) { /* XXX: Unset */ return; } @@ -705,8 +685,8 @@ static void evergreen_set_global_binding( *(handles[i]) = util_cpu_to_le32(handle); } - evergreen_set_rat(ctx->cs_shader_state.shader, 0, pool->bo, 0, pool->size_in_dw * 4); - evergreen_cs_set_vertex_buffer(ctx, 1, 0, + evergreen_set_rat(rctx->cs_shader_state.shader, 0, pool->bo, 0, pool->size_in_dw * 4); + evergreen_cs_set_vertex_buffer(rctx, 1, 0, (struct pipe_resource*)pool->bo); } @@ -721,9 +701,9 @@ static void evergreen_set_global_binding( * functions evergreen_init_atom_start_cs or cayman_init_atom_start_cs depending * on the GPU family. */ -void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) +void evergreen_init_atom_start_compute_cs(struct r600_context *rctx) { - struct r600_command_buffer *cb = &ctx->start_compute_cs_cmd; + struct r600_command_buffer *cb = &rctx->start_compute_cs_cmd; int num_threads; int num_stack_entries; @@ -742,7 +722,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) r600_store_value(cb, PKT3(PKT3_EVENT_WRITE, 0, 0)); r600_store_value(cb, EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4)); - switch (ctx->b.family) { + switch (rctx->b.family) { case CHIP_CEDAR: default: num_threads = 128; @@ -788,18 +768,18 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) } /* Config Registers */ - if (ctx->b.chip_class < CAYMAN) - evergreen_init_common_regs(ctx, cb, ctx->b.chip_class, ctx->b.family, - ctx->screen->b.info.drm_minor); + if (rctx->b.chip_class < CAYMAN) + evergreen_init_common_regs(rctx, cb, rctx->b.chip_class, rctx->b.family, + rctx->screen->b.info.drm_minor); else - cayman_init_common_regs(cb, ctx->b.chip_class, ctx->b.family, - ctx->screen->b.info.drm_minor); + cayman_init_common_regs(cb, rctx->b.chip_class, rctx->b.family, + rctx->screen->b.info.drm_minor); /* The primitive type always needs to be POINTLIST for compute. */ r600_store_config_reg(cb, R_008958_VGT_PRIMITIVE_TYPE, V_008958_DI_PT_POINTLIST); - if (ctx->b.chip_class < CAYMAN) { + if (rctx->b.chip_class < CAYMAN) { /* These registers control which simds can be used by each stage. * The default for these registers is 0xffffffff, which means @@ -849,7 +829,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) * allocate the appropriate amount of LDS dwords using the * CM_R_0288E8_SQ_LDS_ALLOC register. */ - if (ctx->b.chip_class < CAYMAN) { + if (rctx->b.chip_class < CAYMAN) { r600_store_config_reg(cb, R_008E2C_SQ_LDS_RESOURCE_MGMT, S_008E2C_NUM_PS_LDS(0x0000) | S_008E2C_NUM_LS_LDS(8192)); } else { @@ -860,7 +840,7 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) /* Context Registers */ - if (ctx->b.chip_class < CAYMAN) { + if (rctx->b.chip_class < CAYMAN) { /* workaround for hw issues with dyn gpr - must set all limits * to 240 instead of 0, 0x1e == 240 / 8 */ @@ -902,86 +882,26 @@ void evergreen_init_atom_start_compute_cs(struct r600_context *ctx) eg_store_loop_const(cb, R_03A200_SQ_LOOP_CONST_0 + (160 * 4), 0x1000FFF); } -void evergreen_init_compute_state_functions(struct r600_context *ctx) -{ - ctx->b.b.create_compute_state = evergreen_create_compute_state; - ctx->b.b.delete_compute_state = evergreen_delete_compute_state; - ctx->b.b.bind_compute_state = evergreen_bind_compute_state; -// ctx->context.create_sampler_view = evergreen_compute_create_sampler_view; - ctx->b.b.set_compute_resources = evergreen_set_compute_resources; - ctx->b.b.set_global_binding = evergreen_set_global_binding; - ctx->b.b.launch_grid = evergreen_launch_grid; - -} - -struct pipe_resource *r600_compute_global_buffer_create( - struct pipe_screen *screen, - const struct pipe_resource *templ) +void evergreen_init_compute_state_functions(struct r600_context *rctx) { - struct r600_resource_global* result = NULL; - struct r600_screen* rscreen = NULL; - int size_in_dw = 0; - - assert(templ->target == PIPE_BUFFER); - assert(templ->bind & PIPE_BIND_GLOBAL); - assert(templ->array_size == 1 || templ->array_size == 0); - assert(templ->depth0 == 1 || templ->depth0 == 0); - assert(templ->height0 == 1 || templ->height0 == 0); - - result = (struct r600_resource_global*) - CALLOC(sizeof(struct r600_resource_global), 1); - rscreen = (struct r600_screen*)screen; - - COMPUTE_DBG(rscreen, "*** r600_compute_global_buffer_create\n"); - COMPUTE_DBG(rscreen, "width = %u array_size = %u\n", templ->width0, - templ->array_size); - - result->base.b.vtbl = &r600_global_buffer_vtbl; - result->base.b.b = *templ; - result->base.b.b.screen = screen; - pipe_reference_init(&result->base.b.b.reference, 1); - - size_in_dw = (templ->width0+3) / 4; - - result->chunk = compute_memory_alloc(rscreen->global_pool, size_in_dw); - - if (result->chunk == NULL) - { - free(result); - return NULL; - } - - return &result->base.b.b; -} - -void r600_compute_global_buffer_destroy( - struct pipe_screen *screen, - struct pipe_resource *res) -{ - struct r600_resource_global* buffer = NULL; - struct r600_screen* rscreen = NULL; - - assert(res->target == PIPE_BUFFER); - assert(res->bind & PIPE_BIND_GLOBAL); - - buffer = (struct r600_resource_global*)res; - rscreen = (struct r600_screen*)screen; + rctx->b.b.create_compute_state = evergreen_create_compute_state; + rctx->b.b.delete_compute_state = evergreen_delete_compute_state; + rctx->b.b.bind_compute_state = evergreen_bind_compute_state; +// rctx->context.create_sampler_view = evergreen_compute_create_sampler_view; + rctx->b.b.set_compute_resources = evergreen_set_compute_resources; + rctx->b.b.set_global_binding = evergreen_set_global_binding; + rctx->b.b.launch_grid = evergreen_launch_grid; - compute_memory_free(rscreen->global_pool, buffer->chunk->id); - - buffer->chunk = NULL; - free(res); } -void *r600_compute_global_transfer_map( - struct pipe_context *ctx_, - struct pipe_resource *resource, - unsigned level, - unsigned usage, - const struct pipe_box *box, - struct pipe_transfer **ptransfer) +static void *r600_compute_global_transfer_map(struct pipe_context *ctx, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **ptransfer) { - struct r600_context *rctx = (struct r600_context*)ctx_; + struct r600_context *rctx = (struct r600_context*)ctx; struct compute_memory_pool *pool = rctx->screen->global_pool; struct r600_resource_global* buffer = (struct r600_resource_global*)resource; @@ -991,7 +911,7 @@ void *r600_compute_global_transfer_map( unsigned offset = box->x; if (is_item_in_pool(item)) { - compute_memory_demote_item(pool, item, ctx_); + compute_memory_demote_item(pool, item, ctx); } else { if (item->real_buffer == NULL) { @@ -1021,13 +941,12 @@ void *r600_compute_global_transfer_map( assert(box->z == 0); ///TODO: do it better, mapping is not possible if the pool is too big - return pipe_buffer_map_range(ctx_, dst, + return pipe_buffer_map_range(ctx, dst, offset, box->width, usage, ptransfer); } -void r600_compute_global_transfer_unmap( - struct pipe_context *ctx_, - struct pipe_transfer* transfer) +static void r600_compute_global_transfer_unmap(struct pipe_context *ctx, + struct pipe_transfer *transfer) { /* struct r600_resource_global are not real resources, they just map * to an offset within the compute memory pool. The function @@ -1042,23 +961,88 @@ void r600_compute_global_transfer_unmap( assert (!"This function should not be called"); } -void r600_compute_global_transfer_flush_region( - struct pipe_context *ctx_, - struct pipe_transfer *transfer, - const struct pipe_box *box) +static void r600_compute_global_transfer_flush_region(struct pipe_context *ctx, + struct pipe_transfer *transfer, + const struct pipe_box *box) { assert(0 && "TODO"); } -void r600_compute_global_transfer_inline_write( - struct pipe_context *pipe, - struct pipe_resource *resource, - unsigned level, - unsigned usage, - const struct pipe_box *box, - const void *data, - unsigned stride, - unsigned layer_stride) +static void r600_compute_global_transfer_inline_write(struct pipe_context *pipe, + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + const void *data, + unsigned stride, + unsigned layer_stride) { assert(0 && "TODO"); } + +static void r600_compute_global_buffer_destroy(struct pipe_screen *screen, + struct pipe_resource *res) +{ + struct r600_resource_global* buffer = NULL; + struct r600_screen* rscreen = NULL; + + assert(res->target == PIPE_BUFFER); + assert(res->bind & PIPE_BIND_GLOBAL); + + buffer = (struct r600_resource_global*)res; + rscreen = (struct r600_screen*)screen; + + compute_memory_free(rscreen->global_pool, buffer->chunk->id); + + buffer->chunk = NULL; + free(res); +} + +static const struct u_resource_vtbl r600_global_buffer_vtbl = +{ + u_default_resource_get_handle, /* get_handle */ + r600_compute_global_buffer_destroy, /* resource_destroy */ + r600_compute_global_transfer_map, /* transfer_map */ + r600_compute_global_transfer_flush_region,/* transfer_flush_region */ + r600_compute_global_transfer_unmap, /* transfer_unmap */ + r600_compute_global_transfer_inline_write /* transfer_inline_write */ +}; + +struct pipe_resource *r600_compute_global_buffer_create(struct pipe_screen *screen, + const struct pipe_resource *templ) +{ + struct r600_resource_global* result = NULL; + struct r600_screen* rscreen = NULL; + int size_in_dw = 0; + + assert(templ->target == PIPE_BUFFER); + assert(templ->bind & PIPE_BIND_GLOBAL); + assert(templ->array_size == 1 || templ->array_size == 0); + assert(templ->depth0 == 1 || templ->depth0 == 0); + assert(templ->height0 == 1 || templ->height0 == 0); + + result = (struct r600_resource_global*) + CALLOC(sizeof(struct r600_resource_global), 1); + rscreen = (struct r600_screen*)screen; + + COMPUTE_DBG(rscreen, "*** r600_compute_global_buffer_create\n"); + COMPUTE_DBG(rscreen, "width = %u array_size = %u\n", templ->width0, + templ->array_size); + + result->base.b.vtbl = &r600_global_buffer_vtbl; + result->base.b.b = *templ; + result->base.b.b.screen = screen; + pipe_reference_init(&result->base.b.b.reference, 1); + + size_in_dw = (templ->width0+3) / 4; + + result->chunk = compute_memory_alloc(rscreen->global_pool, size_in_dw); + + if (result->chunk == NULL) + { + free(result); + return NULL; + } + + return &result->base.b.b; +} diff --git a/src/gallium/drivers/r600/evergreen_compute.h b/src/gallium/drivers/r600/evergreen_compute.h index e4d3a38e415..3c178870d91 100644 --- a/src/gallium/drivers/r600/evergreen_compute.h +++ b/src/gallium/drivers/r600/evergreen_compute.h @@ -38,26 +38,11 @@ struct r600_resource_global { struct compute_memory_item *chunk; }; -void *evergreen_create_compute_state(struct pipe_context *ctx, const struct pipe_compute_state *cso); -void evergreen_delete_compute_state(struct pipe_context *ctx, void *state); -void evergreen_compute_upload_input(struct pipe_context *context, const uint *block_layout, const uint *grid_layout, const void *input); void evergreen_init_atom_start_compute_cs(struct r600_context *rctx); void evergreen_init_compute_state_functions(struct r600_context *rctx); void evergreen_emit_cs_shader(struct r600_context *rctx, struct r600_atom * atom); struct r600_resource* r600_compute_buffer_alloc_vram(struct r600_screen *screen, unsigned size); struct pipe_resource *r600_compute_global_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ); -void r600_compute_global_buffer_destroy(struct pipe_screen *screen, struct pipe_resource *res); -void *r600_compute_global_transfer_map( - struct pipe_context *ctx_, - struct pipe_resource *resource, - unsigned level, - unsigned usage, - const struct pipe_box *box, - struct pipe_transfer **ptransfer); -void r600_compute_global_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer* transfer); -void r600_compute_global_transfer_flush_region( struct pipe_context *, struct pipe_transfer *, const struct pipe_box *); -void r600_compute_global_transfer_inline_write( struct pipe_context *, struct pipe_resource *, unsigned level, - unsigned usage, const struct pipe_box *, const void *data, unsigned stride, unsigned layer_stride); #endif diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index c97e34121e3..36b808fbbca 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -281,6 +281,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_INVALIDATE_BUFFER: case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: case PIPE_CAP_QUERY_MEMORY_INFO: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 1; case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 381ad21a4e3..062c3193947 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -425,8 +425,9 @@ struct r600_common_context { unsigned flags; /* flush flags */ /* Queries. */ - /* The list of active queries. Only one query of each type can be active. */ + /* The list of active queries. */ int num_occlusion_queries; + int num_perfect_occlusion_queries; /* Keep track of non-timer queries, because they should be suspended * during context flushing. * The timer queries (TIME_ELAPSED) shouldn't be suspended for blits, diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c index f9a5721fb97..7a2d2ee7f31 100644 --- a/src/gallium/drivers/radeon/r600_query.c +++ b/src/gallium/drivers/radeon/r600_query.c @@ -414,14 +414,22 @@ static void r600_update_occlusion_query_state(struct r600_common_context *rctx, if (type == PIPE_QUERY_OCCLUSION_COUNTER || type == PIPE_QUERY_OCCLUSION_PREDICATE) { bool old_enable = rctx->num_occlusion_queries != 0; - bool enable; + bool old_perfect_enable = + rctx->num_perfect_occlusion_queries != 0; + bool enable, perfect_enable; rctx->num_occlusion_queries += diff; assert(rctx->num_occlusion_queries >= 0); + if (type == PIPE_QUERY_OCCLUSION_COUNTER) { + rctx->num_perfect_occlusion_queries += diff; + assert(rctx->num_perfect_occlusion_queries >= 0); + } + enable = rctx->num_occlusion_queries != 0; + perfect_enable = rctx->num_perfect_occlusion_queries != 0; - if (enable != old_enable) { + if (enable != old_enable || perfect_enable != old_perfect_enable) { rctx->set_occlusion_query_state(&rctx->b, enable); } } diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 83fc0021227..4850b73f291 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -329,6 +329,7 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, struct r600_resource *res = (struct r600_resource*)resource; struct r600_texture *rtex = (struct r600_texture*)resource; struct radeon_bo_metadata metadata; + bool update_metadata = false; /* This is not supported now, but it might be required for OpenCL * interop in the future. @@ -337,29 +338,30 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, (resource->nr_samples > 1 || rtex->is_depth)) return false; - if (!res->is_shared) { - res->is_shared = true; - res->external_usage = usage; - - if (resource->target != PIPE_BUFFER) { - /* Since shader image stores don't support DCC on VI, - * disable it for external clients that want write - * access. - */ - if (usage & PIPE_HANDLE_USAGE_WRITE) - r600_texture_disable_dcc(rscreen, rtex); + if (resource->target != PIPE_BUFFER) { + /* Since shader image stores don't support DCC on VI, + * disable it for external clients that want write + * access. + */ + if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) { + r600_texture_disable_dcc(rscreen, rtex); + update_metadata = true; + } - if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) { - /* Eliminate fast clear (both CMASK and DCC) */ - r600_eliminate_fast_color_clear(rscreen, rtex); + if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) && + rtex->cmask.size) { + /* Eliminate fast clear (both CMASK and DCC) */ + r600_eliminate_fast_color_clear(rscreen, rtex); - /* Disable CMASK if flush_resource isn't going - * to be called. - */ - r600_texture_disable_cmask(rscreen, rtex); - } + /* Disable CMASK if flush_resource isn't going + * to be called. + */ + r600_texture_disable_cmask(rscreen, rtex); + update_metadata = true; + } - /* Set metadata. */ + /* Set metadata. */ + if (!res->is_shared || update_metadata) { r600_texture_init_metadata(rtex, &metadata); if (rscreen->query_opaque_metadata) rscreen->query_opaque_metadata(rscreen, rtex, @@ -367,8 +369,18 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen, rscreen->ws->buffer_set_metadata(res->buf, &metadata); } + } + + if (res->is_shared) { + /* USAGE_EXPLICIT_FLUSH must be cleared if at least one user + * doesn't set it. + */ + res->external_usage |= usage & ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH; + if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) + res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH; } else { - assert(res->external_usage == usage); + res->is_shared = true; + res->external_usage = usage; } return rscreen->ws->buffer_get_handle(res->buf, diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c index 474154e52ff..71741325af0 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c @@ -55,6 +55,13 @@ enum radeon_llvm_shader_type { RADEON_LLVM_SHADER_CS = 3, }; +enum radeon_llvm_calling_convention { + RADEON_LLVM_AMDGPU_VS = 87, + RADEON_LLVM_AMDGPU_GS = 88, + RADEON_LLVM_AMDGPU_PS = 89, + RADEON_LLVM_AMDGPU_CS = 90, +}; + void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value) { char str[16]; @@ -71,27 +78,35 @@ void radeon_llvm_add_attribute(LLVMValueRef F, const char *name, int value) void radeon_llvm_shader_type(LLVMValueRef F, unsigned type) { enum radeon_llvm_shader_type llvm_type; + enum radeon_llvm_calling_convention calling_conv; switch (type) { case TGSI_PROCESSOR_VERTEX: case TGSI_PROCESSOR_TESS_CTRL: case TGSI_PROCESSOR_TESS_EVAL: llvm_type = RADEON_LLVM_SHADER_VS; + calling_conv = RADEON_LLVM_AMDGPU_VS; break; case TGSI_PROCESSOR_GEOMETRY: llvm_type = RADEON_LLVM_SHADER_GS; + calling_conv = RADEON_LLVM_AMDGPU_GS; break; case TGSI_PROCESSOR_FRAGMENT: llvm_type = RADEON_LLVM_SHADER_PS; + calling_conv = RADEON_LLVM_AMDGPU_PS; break; case TGSI_PROCESSOR_COMPUTE: llvm_type = RADEON_LLVM_SHADER_CS; + calling_conv = RADEON_LLVM_AMDGPU_CS; break; default: assert(0); } - radeon_llvm_add_attribute(F, "ShaderType", llvm_type); + if (HAVE_LLVM >= 0x309) + LLVMSetFunctionCallConv(F, calling_conv); + else + radeon_llvm_add_attribute(F, "ShaderType", llvm_type); } static void init_r600_target() diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index e0dbec5fb79..c5ea8b17119 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -246,14 +246,14 @@ si_flush_depth_textures(struct si_context *sctx, struct si_textures_info *textures) { unsigned i; - unsigned mask = textures->depth_texture_mask; + uint64_t mask = textures->depth_texture_mask; while (mask) { struct pipe_sampler_view *view; struct si_sampler_view *sview; struct r600_texture *tex; - i = u_bit_scan(&mask); + i = u_bit_scan64(&mask); view = textures->views.views[i]; assert(view); @@ -329,13 +329,13 @@ si_decompress_sampler_color_textures(struct si_context *sctx, struct si_textures_info *textures) { unsigned i; - unsigned mask = textures->compressed_colortex_mask; + uint64_t mask = textures->compressed_colortex_mask; while (mask) { struct pipe_sampler_view *view; struct r600_texture *tex; - i = u_bit_scan(&mask); + i = u_bit_scan64(&mask); view = textures->views.views[i]; assert(view); @@ -355,13 +355,13 @@ si_decompress_image_color_textures(struct si_context *sctx, struct si_images_info *images) { unsigned i; - unsigned mask = images->compressed_colortex_mask; + uint64_t mask = images->compressed_colortex_mask; while (mask) { const struct pipe_image_view *view; struct r600_texture *tex; - i = u_bit_scan(&mask); + i = u_bit_scan64(&mask); view = &images->views[i]; assert(view->resource->target != PIPE_BUFFER); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 815b87bbd7e..6dd2e4fd89d 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -264,8 +264,8 @@ static void si_set_sampler_views(struct pipe_context *ctx, unsigned slot = start + i; if (!views || !views[i]) { - samplers->depth_texture_mask &= ~(1 << slot); - samplers->compressed_colortex_mask &= ~(1 << slot); + samplers->depth_texture_mask &= ~(1llu << slot); + samplers->compressed_colortex_mask &= ~(1llu << slot); si_set_sampler_view(sctx, &samplers->views, slot, NULL); continue; } @@ -277,18 +277,18 @@ static void si_set_sampler_views(struct pipe_context *ctx, (struct r600_texture*)views[i]->texture; if (rtex->is_depth && !rtex->is_flushing_texture) { - samplers->depth_texture_mask |= 1 << slot; + samplers->depth_texture_mask |= 1llu << slot; } else { - samplers->depth_texture_mask &= ~(1 << slot); + samplers->depth_texture_mask &= ~(1llu << slot); } if (is_compressed_colortex(rtex)) { - samplers->compressed_colortex_mask |= 1 << slot; + samplers->compressed_colortex_mask |= 1llu << slot; } else { - samplers->compressed_colortex_mask &= ~(1 << slot); + samplers->compressed_colortex_mask &= ~(1llu << slot); } } else { - samplers->depth_texture_mask &= ~(1 << slot); - samplers->compressed_colortex_mask &= ~(1 << slot); + samplers->depth_texture_mask &= ~(1llu << slot); + samplers->compressed_colortex_mask &= ~(1llu << slot); } } } @@ -306,9 +306,9 @@ si_samplers_update_compressed_colortex_mask(struct si_textures_info *samplers) struct r600_texture *rtex = (struct r600_texture *)res; if (is_compressed_colortex(rtex)) { - samplers->compressed_colortex_mask |= 1 << i; + samplers->compressed_colortex_mask |= 1llu << i; } else { - samplers->compressed_colortex_mask &= ~(1 << i); + samplers->compressed_colortex_mask &= ~(1llu << i); } } } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 407b9e19cc4..41bb84d68df 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -307,6 +307,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: case PIPE_CAP_QUERY_MEMORY_INFO: case PIPE_CAP_TGSI_PACK_HALF_FLOAT: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 1; case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: @@ -522,7 +523,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu return 0; case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS: - return 16; + return SI_NUM_USER_SAMPLERS; case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_SUPPORTED_IRS: diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 6d0d687fe4c..4158fc5461e 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -137,8 +137,8 @@ struct si_cs_shader_state { struct si_textures_info { struct si_sampler_views views; - uint32_t depth_texture_mask; /* which textures are depth */ - uint32_t compressed_colortex_mask; + uint64_t depth_texture_mask; /* which textures are depth */ + uint64_t compressed_colortex_mask; }; struct si_images_info { diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 56c575948ab..08da3e37550 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1328,8 +1328,9 @@ static LLVMValueRef fetch_constant( if (reg->Register.Dimension && reg->Dimension.Indirect) { LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS); LLVMValueRef index; - index = get_indirect_index(ctx, ®->DimIndirect, - reg->Dimension.Index); + index = get_bounded_indirect_index(ctx, ®->DimIndirect, + reg->Dimension.Index, + SI_NUM_USER_CONST_BUFFERS); bufp = build_indexed_load_const(ctx, ptr, index); } else bufp = ctx->const_buffers[buf]; @@ -3356,7 +3357,10 @@ static void tex_fetch_ptrs( const struct tgsi_full_src_register *reg = &emit_data->inst->Src[sampler_src]; LLVMValueRef ind_index; - ind_index = get_indirect_index(ctx, ®->Indirect, reg->Register.Index); + ind_index = get_bounded_indirect_index(ctx, + ®->Indirect, + reg->Register.Index, + SI_NUM_USER_SAMPLERS); *res_ptr = get_sampler_desc(ctx, ind_index, DESC_IMAGE); @@ -4278,6 +4282,14 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, struct si_shader_context *ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; + /* The real barrier instruction isn’t needed, because an entire patch + * always fits into a single wave. + */ + if (ctx->type == TGSI_PROCESSOR_TESS_CTRL) { + emit_optimization_barrier(ctx); + return; + } + lp_build_intrinsic(gallivm->builder, HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.s.barrier" : "llvm.AMDGPU.barrier.local", diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 10d691a92f1..8087d2331ff 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -830,25 +830,93 @@ static void si_set_scissor_states(struct pipe_context *ctx, for (i = 0; i < num_scissors; i++) sctx->scissors.states[start_slot + i] = state[i]; + if (!sctx->queued.named.rasterizer || + !sctx->queued.named.rasterizer->scissor_enable) + return; + sctx->scissors.dirty_mask |= ((1 << num_scissors) - 1) << start_slot; si_mark_atom_dirty(sctx, &sctx->scissors.atom); } +static void si_get_scissor_from_viewport(struct pipe_viewport_state *vp, + struct pipe_scissor_state *scissor) +{ + /* These must be signed, unlike pipe_scissor_state. */ + int minx, miny, maxx, maxy, tmp; + + /* Convert (-1, -1) and (1, 1) from clip space into window space. */ + minx = -vp->scale[0] + vp->translate[0]; + miny = -vp->scale[1] + vp->translate[1]; + maxx = vp->scale[0] + vp->translate[0]; + maxy = vp->scale[1] + vp->translate[1]; + + /* r600_draw_rectangle sets this. Disable the scissor. */ + if (minx == -1 && miny == -1 && maxx == 1 && maxy == 1) { + minx = miny = 0; + maxx = maxy = 16384; + } + + /* Handle inverted viewports. */ + if (minx > maxx) { + tmp = minx; + minx = maxx; + maxx = tmp; + } + if (miny > maxy) { + tmp = miny; + miny = maxy; + maxy = tmp; + } + + scissor->minx = CLAMP(minx, 0, 16384); + scissor->miny = CLAMP(miny, 0, 16384); + scissor->maxx = CLAMP(maxx, 0, 16384); + scissor->maxy = CLAMP(maxy, 0, 16384); +} + +static void si_clip_scissor(struct pipe_scissor_state *out, + struct pipe_scissor_state *clip) +{ + out->minx = MAX2(out->minx, clip->minx); + out->miny = MAX2(out->miny, clip->miny); + out->maxx = MIN2(out->maxx, clip->maxx); + out->maxy = MIN2(out->maxy, clip->maxy); +} + +static void si_emit_one_scissor(struct radeon_winsys_cs *cs, + struct pipe_viewport_state *vp, + struct pipe_scissor_state *scissor) +{ + struct pipe_scissor_state final; + + /* Since the guard band disables clipping, we have to clip per-pixel + * using a scissor. + */ + si_get_scissor_from_viewport(vp, &final); + + if (scissor) + si_clip_scissor(&final, scissor); + + radeon_emit(cs, S_028250_TL_X(final.minx) | + S_028250_TL_Y(final.miny) | + S_028250_WINDOW_OFFSET_DISABLE(1)); + radeon_emit(cs, S_028254_BR_X(final.maxx) | + S_028254_BR_Y(final.maxy)); +} + static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; struct pipe_scissor_state *states = sctx->scissors.states; unsigned mask = sctx->scissors.dirty_mask; + bool scissor_enable = sctx->queued.named.rasterizer->scissor_enable; /* The simple case: Only 1 viewport is active. */ if (mask & 1 && !si_get_vs_info(sctx)->writes_viewport_index) { radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2); - radeon_emit(cs, S_028250_TL_X(states[0].minx) | - S_028250_TL_Y(states[0].miny) | - S_028250_WINDOW_OFFSET_DISABLE(1)); - radeon_emit(cs, S_028254_BR_X(states[0].maxx) | - S_028254_BR_Y(states[0].maxy)); + si_emit_one_scissor(cs, &sctx->viewports.states[0], + scissor_enable ? &states[0] : NULL); sctx->scissors.dirty_mask &= ~1; /* clear one bit */ return; } @@ -861,11 +929,8 @@ static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom) radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + start * 4 * 2, count * 2); for (i = start; i < start+count; i++) { - radeon_emit(cs, S_028250_TL_X(states[i].minx) | - S_028250_TL_Y(states[i].miny) | - S_028250_WINDOW_OFFSET_DISABLE(1)); - radeon_emit(cs, S_028254_BR_X(states[i].maxx) | - S_028254_BR_Y(states[i].maxy)); + si_emit_one_scissor(cs, &sctx->viewports.states[i], + scissor_enable ? &states[i] : NULL); } } sctx->scissors.dirty_mask = 0; @@ -883,7 +948,9 @@ static void si_set_viewport_states(struct pipe_context *ctx, sctx->viewports.states[start_slot + i] = state[i]; sctx->viewports.dirty_mask |= ((1 << num_viewports) - 1) << start_slot; + sctx->scissors.dirty_mask |= ((1 << num_viewports) - 1) << start_slot; si_mark_atom_dirty(sctx, &sctx->viewports.atom); + si_mark_atom_dirty(sctx, &sctx->scissors.atom); } static void si_emit_viewports(struct si_context *sctx, struct r600_atom *atom) @@ -980,6 +1047,7 @@ static void *si_create_rs_state(struct pipe_context *ctx, return NULL; } + rs->scissor_enable = state->scissor; rs->two_side = state->light_twoside; rs->multisample_enable = state->multisample; rs->force_persample_interp = state->force_persample_interp; @@ -1038,7 +1106,7 @@ static void *si_create_rs_state(struct pipe_context *ctx, S_028A48_MSAA_ENABLE(state->multisample || state->poly_smooth || state->line_smooth) | - S_028A48_VPORT_SCISSOR_ENABLE(state->scissor)); + S_028A48_VPORT_SCISSOR_ENABLE(1)); si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL, S_028BE4_PIX_CENTER(state->half_pixel_center) | @@ -1105,6 +1173,11 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state) (!old_rs || old_rs->multisample_enable != rs->multisample_enable)) si_mark_atom_dirty(sctx, &sctx->db_render_state); + if (!old_rs || old_rs->scissor_enable != rs->scissor_enable) { + sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1; + si_mark_atom_dirty(sctx, &sctx->scissors.atom); + } + si_pm4_bind_state(sctx, rasterizer, rs); si_update_poly_offset_state(sctx); @@ -1310,16 +1383,18 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s /* DB_COUNT_CONTROL (occlusion queries) */ if (sctx->b.num_occlusion_queries > 0) { + bool perfect = sctx->b.num_perfect_occlusion_queries > 0; + if (sctx->b.chip_class >= CIK) { radeon_emit(cs, - S_028004_PERFECT_ZPASS_COUNTS(1) | + S_028004_PERFECT_ZPASS_COUNTS(perfect) | S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) | S_028004_ZPASS_ENABLE(1) | S_028004_SLICE_EVEN_ENABLE(1) | S_028004_SLICE_ODD_ENABLE(1)); } else { radeon_emit(cs, - S_028004_PERFECT_ZPASS_COUNTS(1) | + S_028004_PERFECT_ZPASS_COUNTS(perfect) | S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples)); } } else { @@ -2000,6 +2075,11 @@ boolean si_is_format_supported(struct pipe_screen *screen, case 4: case 8: break; + case 16: + if (format == PIPE_FORMAT_NONE) + return TRUE; + else + return FALSE; default: return FALSE; } @@ -2623,6 +2703,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, constbuf.user_buffer = sctx->b.sample_locations_16x; break; default: + R600_ERR("Requested an invalid number of samples %i.\n", + sctx->framebuffer.nr_samples); assert(0); } constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index c4d6b9d9eee..f55f19e2918 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -68,6 +68,7 @@ struct si_state_rasterizer { bool uses_poly_offset; bool clamp_fragment_color; bool rasterizer_discard; + bool scissor_enable; }; struct si_dsa_stencil_ref_part { @@ -144,10 +145,10 @@ struct si_shader_data { uint32_t sh_base[SI_NUM_SHADERS]; }; -/* User sampler views: 0..15 - * Polygon stipple tex: 16 +/* User sampler views: 0..31 + * Polygon stipple tex: 32 */ -#define SI_NUM_USER_SAMPLERS 16 /* AKA OpenGL textures units per shader */ +#define SI_NUM_USER_SAMPLERS 32 /* AKA OpenGL textures units per shader */ #define SI_POLY_STIPPLE_SAMPLER SI_NUM_USER_SAMPLERS #define SI_NUM_SAMPLERS (SI_POLY_STIPPLE_SAMPLER + 1) diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index bfd3598fc57..90f29d6e52a 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -270,6 +270,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; } /* should only get here on unhandled cases */ diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index db4b2735d58..1a4bf384b2a 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -94,6 +94,8 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, sp->framebuffer.width = fb->width; sp->framebuffer.height = fb->height; + sp->framebuffer.samples = fb->samples; + sp->framebuffer.layers = fb->layers; sp->dirty |= SP_NEW_FRAMEBUFFER; } diff --git a/src/gallium/drivers/svga/svga_pipe_blend.c b/src/gallium/drivers/svga/svga_pipe_blend.c index 0af80cd4296..0ba9313fd5e 100644 --- a/src/gallium/drivers/svga/svga_pipe_blend.c +++ b/src/gallium/drivers/svga/svga_pipe_blend.c @@ -142,6 +142,9 @@ svga_create_blend_state(struct pipe_context *pipe, struct svga_blend_state *blend = CALLOC_STRUCT( svga_blend_state ); unsigned i; + if (!blend) + return NULL; + /* Fill in the per-rendertarget blend state. We currently only * support independent blend enable and colormask per render target. */ diff --git a/src/gallium/drivers/svga/svga_pipe_depthstencil.c b/src/gallium/drivers/svga/svga_pipe_depthstencil.c index d84ed1df48e..83fcdc3d80b 100644 --- a/src/gallium/drivers/svga/svga_pipe_depthstencil.c +++ b/src/gallium/drivers/svga/svga_pipe_depthstencil.c @@ -134,6 +134,9 @@ svga_create_depth_stencil_state(struct pipe_context *pipe, struct svga_context *svga = svga_context(pipe); struct svga_depth_stencil_state *ds = CALLOC_STRUCT( svga_depth_stencil_state ); + if (!ds) + return NULL; + /* Don't try to figure out CW/CCW correspondence with * stencil[0]/[1] at this point. Presumably this can change as * back/front face are modified. diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c index 8e0db539574..d397c95da98 100644 --- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c +++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c @@ -161,6 +161,9 @@ svga_create_rasterizer_state(struct pipe_context *pipe, struct svga_rasterizer_state *rast = CALLOC_STRUCT( svga_rasterizer_state ); struct svga_screen *screen = svga_screen(pipe->screen); + if (!rast) + return NULL; + /* need this for draw module. */ rast->templ = *templ; diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index c0873c0c65a..536fb6f786f 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -404,6 +404,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL: case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: case PIPE_CAP_QUERY_BUFFER_OBJECT: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; } @@ -999,8 +1000,10 @@ svga_screen_create(struct svga_winsys_screen *sws) svgascreen->max_color_buffers = SVGA3D_DX_MAX_RENDER_TARGETS; /* Multisample samples per pixel */ - svgascreen->ms_samples = - get_uint_cap(sws, SVGA3D_DEVCAP_MULTISAMPLE_MASKABLESAMPLES, 0); + if (debug_get_bool_option("SVGA_MSAA", TRUE)) { + svgascreen->ms_samples = + get_uint_cap(sws, SVGA3D_DEVCAP_MULTISAMPLE_MASKABLESAMPLES, 0); + } /* Maximum number of constant buffers */ svgascreen->max_const_buffers = diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 57f851833e5..08b1d32afb0 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -1686,6 +1686,44 @@ static void trace_context_set_shader_buffers(struct pipe_context *_context, FREE(_buffers); } +static void trace_context_set_shader_images(struct pipe_context *_context, + unsigned shader, + unsigned start, unsigned nr, + struct pipe_image_view *images) +{ + struct trace_context *tr_context = trace_context(_context); + struct pipe_context *context = tr_context->pipe; + struct pipe_image_view *_images = NULL; + + trace_dump_call_begin("pipe_context", "set_shader_images"); + trace_dump_arg(ptr, context); + trace_dump_arg(uint, shader); + trace_dump_arg(uint, start); + trace_dump_arg_begin("images"); + trace_dump_struct_array(image_view, images, nr); + trace_dump_arg_end(); + trace_dump_call_end(); + + if (images) { + int i; + + _images = MALLOC(nr * sizeof(struct pipe_image_view)); + if (!_images) + return; + + for (i = 0; i < nr; i++) { + _images[i] = images[i]; + _images[i].resource = trace_resource_unwrap(tr_context, + _images[i].resource); + } + } + + context->set_shader_images(context, shader, start, nr, _images); + + if (_images) + FREE(_images); +} + static void trace_context_launch_grid(struct pipe_context *_pipe, const struct pipe_grid_info *info) { @@ -1809,6 +1847,7 @@ trace_context_create(struct trace_screen *tr_scr, TR_CTX_INIT(set_tess_state); TR_CTX_INIT(set_shader_buffers); TR_CTX_INIT(launch_grid); + TR_CTX_INIT(set_shader_images); TR_CTX_INIT(transfer_map); TR_CTX_INIT(transfer_unmap); diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c index b53d7dbec2f..591ca79a2fa 100644 --- a/src/gallium/drivers/trace/tr_dump_state.c +++ b/src/gallium/drivers/trace/tr_dump_state.c @@ -481,6 +481,8 @@ void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state) trace_dump_member(uint, state, width); trace_dump_member(uint, state, height); + trace_dump_member(uint, state, samples); + trace_dump_member(uint, state, layers); trace_dump_member(uint, state, nr_cbufs); trace_dump_member_array(ptr, state, cbufs); trace_dump_member(ptr, state, zsbuf); @@ -738,6 +740,46 @@ void trace_dump_shader_buffer(const struct pipe_shader_buffer *state) } +void trace_dump_image_view(const struct pipe_image_view *state) +{ + if (!trace_dumping_enabled_locked()) + return; + + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_image_view"); + trace_dump_member(resource_ptr, state, resource); + trace_dump_member(uint, state, format); + trace_dump_member(uint, state, access); + + trace_dump_member_begin("u"); + trace_dump_struct_begin(""); /* anonymous */ + if (state->resource->target == PIPE_BUFFER) { + trace_dump_member_begin("buf"); + trace_dump_struct_begin(""); /* anonymous */ + trace_dump_member(uint, &state->u.buf, first_element); + trace_dump_member(uint, &state->u.buf, last_element); + trace_dump_struct_end(); /* anonymous */ + trace_dump_member_end(); /* buf */ + } else { + trace_dump_member_begin("tex"); + trace_dump_struct_begin(""); /* anonymous */ + trace_dump_member(uint, &state->u.tex, first_layer); + trace_dump_member(uint, &state->u.tex, last_layer); + trace_dump_member(uint, &state->u.tex, level); + trace_dump_struct_end(); /* anonymous */ + trace_dump_member_end(); /* tex */ + } + trace_dump_struct_end(); /* anonymous */ + trace_dump_member_end(); /* u */ + + trace_dump_struct_end(); +} + + void trace_dump_draw_info(const struct pipe_draw_info *state) { if (!trace_dumping_enabled_locked()) diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h index ee0720d8ac8..fd2bc503052 100644 --- a/src/gallium/drivers/trace/tr_dump_state.h +++ b/src/gallium/drivers/trace/tr_dump_state.h @@ -91,4 +91,6 @@ void trace_dump_query_result(unsigned query_type, void trace_dump_grid_info(const struct pipe_grid_info *state); +void trace_dump_image_view(const struct pipe_image_view *view); + #endif /* TR_STATE_H */ diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 92d910ba6a5..167a2f5bd8e 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -207,6 +207,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; /* Stream output. */ diff --git a/src/gallium/drivers/virgl/virgl_screen.c b/src/gallium/drivers/virgl/virgl_screen.c index 8126bdec40c..5a5afc1712f 100644 --- a/src/gallium/drivers/virgl/virgl_screen.c +++ b/src/gallium/drivers/virgl/virgl_screen.c @@ -239,6 +239,7 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_PCI_BUS: case PIPE_CAP_PCI_DEVICE: case PIPE_CAP_PCI_FUNCTION: + case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: return 0; case PIPE_CAP_VENDOR_ID: return 0x1af4; diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 6f30f9ed7d3..5e204a3e5ea 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -690,6 +690,7 @@ enum pipe_cap PIPE_CAP_PCI_BUS, PIPE_CAP_PCI_DEVICE, PIPE_CAP_PCI_FUNCTION, + PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT, }; #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0) diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 5ab53728e82..9e466cefd8c 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -57,7 +57,7 @@ extern "C" { #define PIPE_MAX_CLIP_PLANES 8 #define PIPE_MAX_COLOR_BUFS 8 #define PIPE_MAX_CONSTANT_BUFFERS 32 -#define PIPE_MAX_SAMPLERS 18 /* 16 public + 2 driver internal */ +#define PIPE_MAX_SAMPLERS 32 #define PIPE_MAX_SHADER_INPUTS 80 /* 32 GENERIC + 32 PATCH + 16 others */ #define PIPE_MAX_SHADER_OUTPUTS 80 /* 32 GENERIC + 32 PATCH + 16 others */ #define PIPE_MAX_SHADER_SAMPLER_VIEWS 32 @@ -298,9 +298,17 @@ struct pipe_stencil_ref }; +/** + * Note that pipe_surfaces are "texture views for rendering" + * and so in the case of ARB_framebuffer_no_attachment there + * is no pipe_surface state available such that we may + * extract the number of samples and layers. + */ struct pipe_framebuffer_state { unsigned width, height; + unsigned samples; /**< Number of samples in a no-attachment framebuffer */ + unsigned layers; /**< Number of layers in a no-attachment framebuffer */ /** multiple color buffers for multiple render targets */ unsigned nr_cbufs; |