diff options
Diffstat (limited to 'src/gallium/drivers/nv50')
-rw-r--r-- | src/gallium/drivers/nv50/nv50_context.c | 8 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_context.h | 17 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_miptree.c | 17 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_program.c | 1060 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_program.h | 20 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_query.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_screen.c | 42 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_screen.h | 4 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_state.c | 50 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_state_validate.c | 26 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_tex.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_vbo.c | 8 |
12 files changed, 968 insertions, 288 deletions
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index 5997456e4c9..22a8498c69b 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -67,8 +67,12 @@ nv50_destroy(struct pipe_context *pipe) so_ref(NULL, &nv50->state.vertprog); if (nv50->state.fragprog) so_ref(NULL, &nv50->state.fragprog); - if (nv50->state.programs) - so_ref(NULL, &nv50->state.programs); + if (nv50->state.geomprog) + so_ref(NULL, &nv50->state.geomprog); + if (nv50->state.fp_linkage) + so_ref(NULL, &nv50->state.fp_linkage); + if (nv50->state.gp_linkage) + so_ref(NULL, &nv50->state.gp_linkage); if (nv50->state.vtxfmt) so_ref(NULL, &nv50->state.vtxfmt); if (nv50->state.vtxbuf) diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index cbd4c3ff86d..d024be6ea95 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -45,9 +45,11 @@ #define NV50_NEW_VERTPROG_CB (1 << 9) #define NV50_NEW_FRAGPROG (1 << 10) #define NV50_NEW_FRAGPROG_CB (1 << 11) -#define NV50_NEW_ARRAYS (1 << 12) -#define NV50_NEW_SAMPLER (1 << 13) -#define NV50_NEW_TEXTURE (1 << 14) +#define NV50_NEW_GEOMPROG (1 << 12) +#define NV50_NEW_GEOMPROG_CB (1 << 13) +#define NV50_NEW_ARRAYS (1 << 14) +#define NV50_NEW_SAMPLER (1 << 15) +#define NV50_NEW_TEXTURE (1 << 16) struct nv50_blend_stateobj { struct pipe_blend_state pipe; @@ -129,7 +131,9 @@ struct nv50_state { unsigned miptree_nr[PIPE_SHADER_TYPES]; struct nouveau_stateobj *vertprog; struct nouveau_stateobj *fragprog; - struct nouveau_stateobj *programs; + struct nouveau_stateobj *geomprog; + struct nouveau_stateobj *fp_linkage; + struct nouveau_stateobj *gp_linkage; struct nouveau_stateobj *vtxfmt; struct nouveau_stateobj *vtxbuf; struct nouveau_stateobj *vtxattr; @@ -157,6 +161,7 @@ struct nv50_context { struct pipe_framebuffer_state framebuffer; struct nv50_program *vertprog; struct nv50_program *fragprog; + struct nv50_program *geomprog; struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned vtxbuf_nr; @@ -207,7 +212,9 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers, /* nv50_program.c */ extern void nv50_vertprog_validate(struct nv50_context *nv50); extern void nv50_fragprog_validate(struct nv50_context *nv50); -extern void nv50_linkage_validate(struct nv50_context *nv50); +extern void nv50_geomprog_validate(struct nv50_context *nv50); +extern void nv50_fp_linkage_validate(struct nv50_context *nv50); +extern void nv50_gp_linkage_validate(struct nv50_context *nv50); extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p); diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 3f1edf0a139..dc8364ced7e 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -92,12 +92,23 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) case PIPE_FORMAT_Z24S8_UNORM: tile_flags = 0x1800; break; + case PIPE_FORMAT_Z16_UNORM: + tile_flags = 0x6c00; + break; case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8Z24_UNORM: tile_flags = 0x2800; break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + tile_flags = 0x7400; + break; default: - tile_flags = 0x7000; + if ((pt->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY) && + util_format_get_blocksizebits(pt->format) == 32) + tile_flags = 0x7a00; + else + tile_flags = 0x7000; break; } @@ -145,7 +156,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) mt->level[0].tile_mode, tile_flags, &mt->base.bo); if (ret) { - for (l = 0; l < pt->last_level; ++l) + for (l = 0; l <= pt->last_level; ++l) FREE(mt->level[l].image_offset); FREE(mt); return NULL; @@ -188,7 +199,7 @@ nv50_miptree_destroy(struct pipe_texture *pt) struct nv50_miptree *mt = nv50_miptree(pt); unsigned l; - for (l = 0; l < pt->last_level; ++l) + for (l = 0; l <= pt->last_level; ++l) FREE(mt->level[l].image_offset); nouveau_bo_ref(NULL, &mt->base.bo); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 53f9f0adf31..acd695b39bc 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -92,6 +92,9 @@ struct nv50_reg { int rhw; /* result hw for FP outputs, or interpolant index */ int acc; /* instruction where this reg is last read (first insn == 1) */ + + int vtx; /* vertex index, for GP inputs (TGSI Dimension.Index) */ + int indirect[2]; /* index into pc->addr, or -1 */ }; #define NV50_MOD_NEG 1 @@ -135,9 +138,11 @@ struct nv50_pc { int immd_nr; struct nv50_reg **addr; int addr_nr; - uint8_t addr_alloc; /* set bit indicates used for TGSI_FILE_ADDRESS */ + struct nv50_reg *sysval; + int sysval_nr; struct nv50_reg *temp_temp[16]; + struct nv50_program_exec *temp_temp_exec[16]; unsigned temp_temp_nr; /* broadcast and destination replacement regs */ @@ -170,6 +175,8 @@ struct nv50_pc { uint8_t edgeflag_out; }; +static struct nv50_reg *get_address_reg(struct nv50_pc *, struct nv50_reg *); + static INLINE void ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) { @@ -178,7 +185,9 @@ ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw) reg->hw = hw; reg->mod = 0; reg->rhw = -1; + reg->vtx = -1; reg->acc = 0; + reg->indirect[0] = reg->indirect[1] = -1; } static INLINE unsigned @@ -196,7 +205,8 @@ terminate_mbb(struct nv50_pc *pc) /* remove records of temporary address register values */ for (i = 0; i < NV50_SU_MAX_ADDR; ++i) - pc->r_addr[i].rhw = -1; + if (pc->r_addr[i].index < 0) + pc->r_addr[i].acc = 0; } static void @@ -245,7 +255,8 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) } } - assert(0); + NOUVEAU_ERR("out of registers\n"); + abort(); } static INLINE struct nv50_reg * @@ -258,6 +269,7 @@ reg_instance(struct nv50_pc *pc, struct nv50_reg *reg) if (reg) { alloc_reg(pc, reg); *ri = *reg; + reg->indirect[0] = reg->indirect[1] = -1; reg->mod = 0; } return ri; @@ -285,7 +297,8 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) } } - assert(0); + NOUVEAU_ERR("out of registers\n"); + abort(); return NULL; } @@ -347,23 +360,29 @@ free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4]) } static struct nv50_reg * -temp_temp(struct nv50_pc *pc) +temp_temp(struct nv50_pc *pc, struct nv50_program_exec *e) { if (pc->temp_temp_nr >= 16) assert(0); pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); + pc->temp_temp_exec[pc->temp_temp_nr] = e; return pc->temp_temp[pc->temp_temp_nr++]; } +/* This *must* be called for all nv50_program_exec that have been + * given as argument to temp_temp, or the temps will be leaked ! + */ static void -kill_temp_temp(struct nv50_pc *pc) +kill_temp_temp(struct nv50_pc *pc, struct nv50_program_exec *e) { int i; for (i = 0; i < pc->temp_temp_nr; i++) - free_temp(pc, pc->temp_temp[i]); - pc->temp_temp_nr = 0; + if (pc->temp_temp_exec[i] == e) + free_temp(pc, pc->temp_temp[i]); + if (!e) + pc->temp_temp_nr = 0; } static int @@ -425,6 +444,8 @@ emit(struct nv50_pc *pc, struct nv50_program_exec *e) p->exec_head = e; p->exec_tail = e; p->exec_size += (e->inst[0] & 1) ? 2 : 1; + + kill_temp_temp(pc, e); } static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *); @@ -514,11 +535,33 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) static INLINE void set_addr(struct nv50_program_exec *e, struct nv50_reg *a) { + assert(a->type == P_ADDR); + assert(!(e->inst[0] & 0x0c000000)); assert(!(e->inst[1] & 0x00000004)); e->inst[0] |= (a->hw & 3) << 26; - e->inst[1] |= (a->hw >> 2) << 2; + e->inst[1] |= a->hw & 4; +} + +static void +emit_arl(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, uint8_t); + +static void +emit_shl_imm(struct nv50_pc *, struct nv50_reg *, struct nv50_reg *, int); + +static void +emit_mov_from_addr(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[1] = 0x40000000; + set_long(pc, e); + set_dst(pc, dst, e); + set_addr(e, src); + + emit(pc, e); } static void @@ -537,72 +580,6 @@ emit_add_addr_imm(struct nv50_pc *pc, struct nv50_reg *dst, emit(pc, e); } -static struct nv50_reg * -alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref) -{ - struct nv50_reg *a_tgsi = NULL, *a = NULL; - int i; - uint8_t avail = ~pc->addr_alloc; - - if (!ref) { - /* allocate for TGSI_FILE_ADDRESS */ - while (avail) { - i = ffs(avail) - 1; - - if (pc->r_addr[i].rhw < 0 || - pc->r_addr[i].acc != pc->insn_cur) { - pc->addr_alloc |= (1 << i); - - pc->r_addr[i].rhw = -1; - pc->r_addr[i].index = i; - return &pc->r_addr[i]; - } - avail &= ~(1 << i); - } - assert(0); - return NULL; - } - - /* Allocate and set an address reg so we can access 'ref'. - * - * If and r_addr->index will be -1 or the hw index the value - * value in rhw is relative to. If rhw < 0, the reg has not - * been initialized or is in use for TGSI_FILE_ADDRESS. - */ - while (avail) { /* only consider regs that are not TGSI */ - i = ffs(avail) - 1; - avail &= ~(1 << i); - - if ((!a || a->rhw >= 0) && pc->r_addr[i].rhw < 0) { - /* prefer an usused reg with low hw index */ - a = &pc->r_addr[i]; - continue; - } - if (!a && pc->r_addr[i].acc != pc->insn_cur) - a = &pc->r_addr[i]; - - if (ref->hw - pc->r_addr[i].rhw >= 128) - continue; - - if ((ref->acc >= 0 && pc->r_addr[i].index < 0) || - (ref->acc < 0 && pc->r_addr[i].index == ref->index)) { - pc->r_addr[i].acc = pc->insn_cur; - return &pc->r_addr[i]; - } - } - assert(a); - - if (ref->acc < 0) - a_tgsi = pc->addr[ref->index]; - - emit_add_addr_imm(pc, a, a_tgsi, (ref->hw & ~0x7f) * 4); - - a->rhw = ref->hw & ~0x7f; - a->acc = pc->insn_cur; - a->index = a_tgsi ? ref->index : -1; - return a; -} - #define INTERP_LINEAR 0 #define INTERP_FLAT 1 #define INTERP_PERSPECTIVE 2 @@ -646,12 +623,12 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, e->param.shift = s; e->param.mask = m << (s % 32); - if (src->hw > 127) - set_addr(e, alloc_addr(pc, src)); + if (src->hw < 0 || src->hw > 127) /* need (additional) address reg */ + set_addr(e, get_address_reg(pc, src)); else if (src->acc < 0) { assert(src->type == P_CONST); - set_addr(e, pc->addr[src->index]); + set_addr(e, pc->addr[src->indirect[0]]); } e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22); @@ -683,6 +660,12 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) if (src->type == P_ATTR) { set_long(pc, e); e->inst[1] |= 0x00200000; + + if (src->vtx >= 0) { + /* indirect (vertex base + c) load from p[] */ + e->inst[0] |= 0x01800000; + set_addr(e, get_address_reg(pc, src)); + } } alloc_reg(pc, src); @@ -780,7 +763,7 @@ set_src_0_restricted(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_reg *temp; if (src->type != P_TEMP) { - temp = temp_temp(pc); + temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; } @@ -797,9 +780,14 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) if (src->type == P_ATTR) { set_long(pc, e); e->inst[1] |= 0x00200000; + + if (src->vtx >= 0) { + e->inst[0] |= 0x01800000; /* src from p[] */ + set_addr(e, get_address_reg(pc, src)); + } } else if (src->type == P_CONST || src->type == P_IMMD) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; @@ -815,19 +803,19 @@ static void set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) { if (src->type == P_ATTR) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; } else if (src->type == P_CONST || src->type == P_IMMD) { - assert(!(e->inst[0] & 0x00800000)); - if (e->inst[0] & 0x01000000) { - struct nv50_reg *temp = temp_temp(pc); + if (e->inst[0] & 0x01800000) { + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; } else { + assert(!(e->inst[0] & 0x00800000)); set_data(pc, src, 0x7f, 16, e); e->inst[0] |= 0x00800000; } @@ -845,19 +833,19 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) set_long(pc, e); if (src->type == P_ATTR) { - struct nv50_reg *temp = temp_temp(pc); + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; } else if (src->type == P_CONST || src->type == P_IMMD) { - assert(!(e->inst[0] & 0x01000000)); - if (e->inst[0] & 0x00800000) { - struct nv50_reg *temp = temp_temp(pc); + if (e->inst[0] & 0x01800000) { + struct nv50_reg *temp = temp_temp(pc, e); emit_mov(pc, temp, src); src = temp; } else { + assert(!(e->inst[0] & 0x01000000)); set_data(pc, src, 0x7f, 32+14, e); e->inst[0] |= 0x01000000; } @@ -868,6 +856,26 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) } static void +set_half_src(struct nv50_pc *pc, struct nv50_reg *src, int lh, + struct nv50_program_exec *e, int pos) +{ + struct nv50_reg *r = src; + + alloc_reg(pc, r); + if (r->type != P_TEMP) { + r = temp_temp(pc, e); + emit_mov(pc, r, src); + } + + if (r->hw > (NV50_SU_MAX_TEMP / 2)) { + NOUVEAU_ERR("out of low GPRs\n"); + abort(); + } + + e->inst[pos / 32] |= ((src->hw * 2) + lh) << (pos % 32); +} + +static void emit_mov_from_pred(struct nv50_pc *pc, struct nv50_reg *dst, int pred) { struct nv50_program_exec *e = exec(pc); @@ -966,11 +974,125 @@ emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src, e->inst[0] |= dst->hw << 2; e->inst[0] |= s << 16; /* shift left */ - set_src_0_restricted(pc, src, e); + set_src_0(pc, src, e); emit(pc, e); } +static boolean +address_reg_suitable(struct nv50_reg *a, struct nv50_reg *r) +{ + if (!r) + return FALSE; + + if (r->vtx != a->vtx) + return FALSE; + if (r->vtx >= 0) + return (r->indirect[1] == a->indirect[1]); + + if (r->hw < a->rhw || (r->hw - a->rhw) >= 128) + return FALSE; + + if (a->index >= 0) + return (a->index == r->indirect[0]); + return (a->indirect[0] == r->indirect[0]); +} + +static void +load_vertex_base(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *a, int shift) +{ + struct nv50_reg mem, *temp; + + ctor_reg(&mem, P_ATTR, -1, dst->vtx); + + assert(dst->type == P_ADDR); + if (!a) { + emit_arl(pc, dst, &mem, 0); + return; + } + temp = alloc_temp(pc, NULL); + + if (shift) { + emit_mov_from_addr(pc, temp, a); + if (shift < 0) + emit_shl_imm(pc, temp, temp, shift); + emit_arl(pc, dst, temp, MAX2(shift, 0)); + } + emit_mov(pc, temp, &mem); + set_addr(pc->p->exec_tail, dst); + + emit_arl(pc, dst, temp, 0); + free_temp(pc, temp); +} + +/* case (ref == NULL): allocate address register for TGSI_FILE_ADDRESS + * case (vtx >= 0, acc >= 0): load vertex base from a[vtx * 4] to $aX + * case (vtx >= 0, acc < 0): load vertex base from s[$aY + vtx * 4] to $aX + * case (vtx < 0, acc >= 0): memory address too high to encode + * case (vtx < 0, acc < 0): get source register for TGSI_FILE_ADDRESS + */ +static struct nv50_reg * +get_address_reg(struct nv50_pc *pc, struct nv50_reg *ref) +{ + int i; + struct nv50_reg *a_ref, *a = NULL; + + for (i = 0; i < NV50_SU_MAX_ADDR; ++i) { + if (pc->r_addr[i].acc == 0) + a = &pc->r_addr[i]; /* an unused address reg */ + else + if (address_reg_suitable(&pc->r_addr[i], ref)) { + pc->r_addr[i].acc = pc->insn_cur; + return &pc->r_addr[i]; + } else + if (!a && pc->r_addr[i].index < 0 && + pc->r_addr[i].acc < pc->insn_cur) + a = &pc->r_addr[i]; + } + if (!a) { + /* We'll be able to spill address regs when this + * mess is replaced with a proper compiler ... + */ + NOUVEAU_ERR("out of address regs\n"); + abort(); + return NULL; + } + + /* initialize and reserve for this TGSI instruction */ + a->rhw = 0; + a->index = a->indirect[0] = a->indirect[1] = -1; + a->acc = pc->insn_cur; + + if (!ref) { + a->vtx = -1; + return a; + } + a->vtx = ref->vtx; + + /* now put in the correct value ... */ + + if (ref->vtx >= 0) { + a->indirect[1] = ref->indirect[1]; + + /* For an indirect vertex index, we need to shift address right + * by 2, the address register will contain vtx * 16, we need to + * load from a[vtx * 4]. + */ + load_vertex_base(pc, a, (ref->acc < 0) ? + pc->addr[ref->indirect[1]] : NULL, -2); + } else { + assert(ref->acc < 0 || ref->indirect[0] < 0); + + a->rhw = ref->hw & ~0x7f; + a->indirect[0] = ref->indirect[0]; + a_ref = (ref->acc < 0) ? pc->addr[ref->indirect[0]] : NULL; + + emit_add_addr_imm(pc, a, a_ref, a->rhw * 4); + } + return a; +} + #define NV50_MAX_F32 0x880 #define NV50_MAX_S32 0x08c #define NV50_MAX_U32 0x084 @@ -1050,6 +1172,20 @@ emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, } static void +emit_not(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xd0000000; + e->inst[1] = 0x0402c000; + set_long(pc, e); + set_dst(pc, dst, e); + set_src_1(pc, src, e); + + emit(pc, e); +} + +static void emit_shift(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, unsigned dir) { @@ -1078,6 +1214,27 @@ emit_shift(struct nv50_pc *pc, struct nv50_reg *dst, } static void +emit_shl_imm(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src, int s) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x30000000; + e->inst[1] = 0xc4100000; + if (s < 0) { + e->inst[1] |= 1 << 29; + s = -s; + } + e->inst[1] |= ((s & 0x7f) << 16); + + set_long(pc, e); + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); +} + +static void emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2) { @@ -1321,7 +1478,7 @@ emit_add_b32(struct nv50_pc *pc, struct nv50_reg *dst, src1 = rsrc1; if (src0->mod & rsrc1->mod & NV50_MOD_NEG) { - src1 = alloc_temp(pc, NULL); + src1 = temp_temp(pc, e); emit_cvt(pc, src1, rsrc1, -1, CVT_S32_S32); } @@ -1350,9 +1507,43 @@ emit_add_b32(struct nv50_pc *pc, struct nv50_reg *dst, e->inst[0] |= 1 << 22; emit(pc, e); +} + +static void +emit_mad_u16(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, int lh_0, struct nv50_reg *src1, int lh_1, + struct nv50_reg *src2) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x60000000; + if (!pc->allow32) + set_long(pc, e); + set_dst(pc, dst, e); + + set_half_src(pc, src0, lh_0, e, 9); + set_half_src(pc, src1, lh_1, e, 16); + alloc_reg(pc, src2); + if (is_long(e) || (src2->type != P_TEMP) || (src2->hw != dst->hw)) + set_src_2(pc, src2, e); + + emit(pc, e); +} + +static void +emit_mul_u16(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, int lh_0, struct nv50_reg *src1, int lh_1) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0x40000000; + set_long(pc, e); + set_dst(pc, dst, e); + + set_half_src(pc, src0, lh_0, e, 9); + set_half_src(pc, src1, lh_1, e, 16); - if (src1 != rsrc1) - free_temp(pc, src1); + emit(pc, e); } static void @@ -1362,6 +1553,9 @@ emit_sad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e = exec(pc); e->inst[0] = 0x50000000; + if (!pc->allow32) + set_long(pc, e); + check_swap_src_0_1(pc, &src0, &src1); set_dst(pc, dst, e); set_src_0(pc, src0, e); set_src_1(pc, src1, e); @@ -1373,6 +1567,8 @@ emit_sad(struct nv50_pc *pc, struct nv50_reg *dst, e->inst[1] |= 0x0c << 24; else e->inst[0] |= 0x81 << 8; + + emit(pc, e); } static INLINE void @@ -1422,10 +1618,10 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, if (mask & (1 << 2)) { set_pred_wr(pc, 1, 0, pc->p->exec_tail); - tmp[1] = temp_temp(pc); + tmp[1] = temp_temp(pc, NULL); emit_minmax(pc, NV50_MAX_F32, tmp[1], src[1], zero); - tmp[3] = temp_temp(pc); + tmp[3] = temp_temp(pc, NULL); emit_minmax(pc, NV50_MAX_F32, tmp[3], src[3], neg128); emit_minmax(pc, NV50_MIN_F32, tmp[3], tmp[3], pos128); @@ -1524,6 +1720,18 @@ emit_ret(struct nv50_pc *pc, int pred, unsigned cc) emit_control_flow(pc, 0x3, pred, cc); } +static void +emit_prim_cmd(struct nv50_pc *pc, unsigned cmd) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xf0000000 | (cmd << 9); + e->inst[1] = 0xc0000000; + set_long(pc, e); + + emit(pc, e); +} + #define QOP_ADD 0 #define QOP_SUBR 1 #define QOP_SUB 2 @@ -1884,7 +2092,11 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e) case 0x5: /* SAD */ m = ~(0x81 << 8); - q = 0x0c << 24; + q = (0x0c << 24) | ((e->inst[0] & (0x7f << 2)) << 12); + break; + case 0x6: + /* MAD u16 */ + q = (e->inst[0] & (0x7f << 2)) << 12; break; case 0x8: /* INTERP (move centroid, perspective and flat bits) */ @@ -1964,8 +2176,11 @@ get_supported_mods(const struct tgsi_full_instruction *insn, int i) case TGSI_OPCODE_IMAX: case TGSI_OPCODE_IMIN: case TGSI_OPCODE_ISHR: + case TGSI_OPCODE_NOT: + case TGSI_OPCODE_UMAD: case TGSI_OPCODE_UMAX: case TGSI_OPCODE_UMIN: + case TGSI_OPCODE_UMUL: case TGSI_OPCODE_USHR: return NV50_MOD_I32; default: @@ -2059,14 +2274,19 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) { struct nv50_reg *r = pc->addr[dst->Register.Index * 4 + c]; if (!r) { - r = alloc_addr(pc, NULL); - pc->addr[dst->Register.Index * 4 + c] = r; + r = get_address_reg(pc, NULL); + r->index = dst->Register.Index * 4 + c; + pc->addr[r->index] = r; } assert(r); return r; } case TGSI_FILE_NULL: return NULL; + case TGSI_FILE_SYSTEM_VALUE: + assert(pc->sysval[dst->Register.Index].type == P_RESULT); + assert(c == 0); + return &pc->sysval[dst->Register.Index]; default: break; } @@ -2096,6 +2316,18 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, switch (src->Register.File) { case TGSI_FILE_INPUT: r = &pc->attr[src->Register.Index * 4 + c]; + + if (!src->Dimension.Dimension) + break; + r = reg_instance(pc, r); + r->vtx = src->Dimension.Index; + + if (!src->Dimension.Indirect) + break; + swz = tgsi_util_get_src_register_swizzle( + &src->DimIndirect, 0); + r->acc = -1; + r->indirect[1] = src->DimIndirect.Index * 4 + swz; break; case TGSI_FILE_TEMPORARY: r = &pc->temp[src->Register.Index * 4 + c]; @@ -2109,12 +2341,12 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, * use the index field to select the address reg. */ r = reg_instance(pc, NULL); + ctor_reg(r, P_CONST, -1, src->Register.Index * 4 + c); + swz = tgsi_util_get_src_register_swizzle( - &src->Indirect, 0); - ctor_reg(r, P_CONST, - src->Indirect.Index * 4 + swz, - src->Register.Index * 4 + c); + &src->Indirect, 0); r->acc = -1; + r->indirect[0] = src->Indirect.Index * 4 + swz; break; case TGSI_FILE_IMMEDIATE: r = &pc->immd[src->Register.Index * 4 + c]; @@ -2125,6 +2357,10 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, r = pc->addr[src->Register.Index * 4 + c]; assert(r); break; + case TGSI_FILE_SYSTEM_VALUE: + assert(c == 0); + r = &pc->sysval[src->Register.Index]; + break; default: assert(0); break; @@ -2153,7 +2389,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, } if ((r->mod & mod) != r->mod) { - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); emit_cvt(pc, temp, r, -1, cvn); r->mod = 0; r = temp; @@ -2161,7 +2397,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src, r->mod |= mod & NV50_MOD_I32; assert(r); - if (r->acc >= 0 && r != temp) + if (r->acc >= 0 && r->vtx < 0 && r != temp) return reg_instance(pc, r); /* will clear r->mod */ return r; } @@ -2341,7 +2577,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, brdc = temp = pc->r_brdc; if (brdc && brdc->type != P_TEMP) { - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); if (sat) brdc = temp; } else @@ -2350,7 +2586,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask & (1 << c)) || dst[c]->type == P_TEMP) continue; /* rdst[c] = dst[c]; */ /* done above */ - dst[c] = temp_temp(pc); + dst[c] = temp_temp(pc, NULL); } } @@ -2383,10 +2619,14 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_ARL: - assert(src[0][0]); - temp = temp_temp(pc); - emit_cvt(pc, temp, src[0][0], -1, CVT_FLOOR | CVT_S32_F32); - emit_arl(pc, dst[0], temp, 4); + temp = temp_temp(pc, NULL); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_cvt(pc, temp, src[0][c], -1, + CVT_FLOOR | CVT_S32_F32); + emit_arl(pc, dst[c], temp, 4); + } break; case TGSI_OPCODE_BGNLOOP: pc->loop_brka[pc->loop_lvl] = emit_breakaddr(pc); @@ -2441,7 +2681,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask &= 7)) break; if (temp == dst[3]) - temp = brdc = temp_temp(pc); + temp = brdc = temp_temp(pc, NULL); } emit_precossin(pc, temp, src[0][0]); emit_flop(pc, NV50_FLOP_COS, brdc, temp); @@ -2493,6 +2733,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->if_insn[pc->if_lvl++] = pc->p->exec_tail; terminate_mbb(pc); break; + case TGSI_OPCODE_EMIT: + emit_prim_cmd(pc, 1); + break; case TGSI_OPCODE_ENDIF: pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size; @@ -2516,8 +2759,12 @@ nv50_program_tx_insn(struct nv50_pc *pc, pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size; terminate_mbb(pc); break; + case TGSI_OPCODE_ENDPRIM: + emit_prim_cmd(pc, 2); + break; case TGSI_OPCODE_ENDSUB: assert(pc->in_subroutine); + terminate_mbb(pc); pc->in_subroutine = FALSE; break; case TGSI_OPCODE_EX2: @@ -2529,8 +2776,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, struct nv50_reg *t[2]; assert(!temp); - t[0] = temp_temp(pc); - t[1] = temp_temp(pc); + t[0] = temp_temp(pc, NULL); + t[1] = temp_temp(pc, NULL); if (mask & 0x6) emit_mov(pc, t[0], src[0][0]); @@ -2575,7 +2822,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_FRC: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; @@ -2639,9 +2886,9 @@ nv50_program_tx_insn(struct nv50_pc *pc, { struct nv50_reg *t[2]; - t[0] = temp_temp(pc); + t[0] = temp_temp(pc, NULL); if (mask & (1 << 1)) - t[1] = temp_temp(pc); + t[1] = temp_temp(pc, NULL); else t[1] = t[0]; @@ -2664,7 +2911,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_LRP: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); for (c = 0; c < 4; c++) { if (!(mask & (1 << c))) continue; @@ -2707,10 +2954,19 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_mul(pc, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_NOT: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_not(pc, dst[c], src[0][c]); + } + break; case TGSI_OPCODE_POW: emit_pow(pc, brdc, src[0][0], src[1][0]); break; case TGSI_OPCODE_RCP: + if (!sat && popcnt4(mask) == 1) + brdc = dst[ffs(mask) - 1]; emit_flop(pc, NV50_FLOP_RCP, brdc, src[0][0]); break; case TGSI_OPCODE_RET: @@ -2719,6 +2975,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_ret(pc, -1, 0); break; case TGSI_OPCODE_RSQ: + if (!sat && popcnt4(mask) == 1) + brdc = dst[ffs(mask) - 1]; src[0][0]->mod |= NV50_MOD_ABS; emit_flop(pc, NV50_FLOP_RSQ, brdc, src[0][0]); break; @@ -2730,7 +2988,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } break; case TGSI_OPCODE_SCS: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); if (mask & 3) emit_precossin(pc, temp, src[0][0]); if (mask & (1 << 0)) @@ -2759,7 +3017,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, if (!(mask &= 7)) break; if (temp == dst[3]) - temp = brdc = temp_temp(pc); + temp = brdc = temp_temp(pc, NULL); } emit_precossin(pc, temp, src[0][0]); emit_flop(pc, NV50_FLOP_SIN, brdc, temp); @@ -2847,8 +3105,41 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_minmax(pc, 0x0a4, dst[c], src[0][c], src[1][c]); } break; + case TGSI_OPCODE_UMAD: + { + assert(!temp); + temp = temp_temp(pc, NULL); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mul_u16(pc, temp, src[0][c], 0, src[1][c], 1); + emit_mad_u16(pc, temp, src[0][c], 1, src[1][c], 0, + temp); + emit_shl_imm(pc, temp, temp, 16); + emit_mad_u16(pc, temp, src[0][c], 0, src[1][c], 0, + temp); + emit_add_b32(pc, dst[c], temp, src[2][c]); + } + } + break; + case TGSI_OPCODE_UMUL: + { + assert(!temp); + temp = temp_temp(pc, NULL); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mul_u16(pc, temp, src[0][c], 0, src[1][c], 1); + emit_mad_u16(pc, temp, src[0][c], 1, src[1][c], 0, + temp); + emit_shl_imm(pc, temp, temp, 16); + emit_mad_u16(pc, dst[c], src[0][c], 0, src[1][c], 0, + temp); + } + } + break; case TGSI_OPCODE_XPD: - temp = temp_temp(pc); + temp = temp_temp(pc, NULL); if (mask & (1 << 0)) { emit_mul(pc, temp, src[0][2], src[1][1]); emit_msb(pc, dst[0], src[0][1], src[1][2], temp); @@ -2876,6 +3167,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, emit_nop(pc); pc->p->exec_tail->inst[1] |= 1; /* set exit bit */ + + terminate_mbb(pc); break; default: NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); @@ -2902,7 +3195,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, } } - kill_temp_temp(pc); + kill_temp_temp(pc, NULL); pc->reg_instance_nr = 0; return TRUE; @@ -3101,7 +3394,7 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) if (is_scalar_op(insn.Instruction.Opcode)) { pc->r_brdc = tgsi_broadcast_dst(pc, fd, deqs); if (!pc->r_brdc) - pc->r_brdc = temp_temp(pc); + pc->r_brdc = temp_temp(pc, NULL); return nv50_program_tx_insn(pc, &insn); } pc->r_brdc = NULL; @@ -3171,17 +3464,40 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg) * value of 0 for back-facing, and 0xffffffff for front-facing. */ static void -load_frontfacing(struct nv50_pc *pc, struct nv50_reg *a) +load_frontfacing(struct nv50_pc *pc, struct nv50_reg *sv) { - struct nv50_reg *one = alloc_immd(pc, 1.0f); + struct nv50_reg *temp = alloc_temp(pc, NULL); + int r_pred = 0; - assert(a->rhw == -1); - alloc_reg(pc, a); /* do this before rhw is set */ - a->rhw = 255; - load_interpolant(pc, a); - emit_bitop2(pc, a, a, one, TGSI_OPCODE_AND); + temp->rhw = 255; + emit_interp(pc, temp, NULL, INTERP_FLAT); - FREE(one); + emit_cvt(pc, sv, temp, r_pred, CVT_ABS | CVT_F32_S32); + + emit_not(pc, temp, temp); + set_pred(pc, 0x2, r_pred, pc->p->exec_tail); + emit_cvt(pc, sv, temp, -1, CVT_F32_S32); + set_pred(pc, 0x2, r_pred, pc->p->exec_tail); + + free_temp(pc, temp); +} + +static void +copy_semantic_info(struct nv50_program *p) +{ + unsigned i, id; + + for (i = 0; i < p->cfg.in_nr; ++i) { + id = p->cfg.in[i].id; + p->cfg.in[i].sn = p->info.input_semantic_name[id]; + p->cfg.in[i].si = p->info.input_semantic_index[id]; + } + + for (i = 0; i < p->cfg.out_nr; ++i) { + id = p->cfg.out[i].id; + p->cfg.out[i].sn = p->info.output_semantic_name[id]; + p->cfg.out[i].si = p->info.output_semantic_index[id]; + } } static boolean @@ -3190,7 +3506,7 @@ nv50_program_tx_prep(struct nv50_pc *pc) struct tgsi_parse_context tp; struct nv50_program *p = pc->p; boolean ret = FALSE; - unsigned i, c, flat_nr = 0; + unsigned i, c, instance_id, vertex_id, flat_nr = 0; tgsi_parse_init(&tp, pc->p->pipe.tokens); while (!tgsi_parse_end_of_tokens(&tp)) { @@ -3230,13 +3546,13 @@ nv50_program_tx_prep(struct nv50_pc *pc) switch (d->Semantic.Name) { case TGSI_SEMANTIC_BCOLOR: p->cfg.two_side[si].hw = first; - if (p->cfg.io_nr > first) - p->cfg.io_nr = first; + if (p->cfg.out_nr > first) + p->cfg.out_nr = first; break; case TGSI_SEMANTIC_PSIZE: p->cfg.psiz = first; - if (p->cfg.io_nr > first) - p->cfg.io_nr = first; + if (p->cfg.out_nr > first) + p->cfg.out_nr = first; break; case TGSI_SEMANTIC_EDGEFLAG: pc->edgeflag_out = first; @@ -3276,6 +3592,37 @@ nv50_program_tx_prep(struct nv50_pc *pc) pc->interp_mode[i] = mode; } break; + case TGSI_FILE_SYSTEM_VALUE: + assert(d->Declaration.Semantic); + switch (d->Semantic.Name) { + case TGSI_SEMANTIC_FACE: + assert(p->type == PIPE_SHADER_FRAGMENT); + load_frontfacing(pc, + &pc->sysval[first]); + break; + case TGSI_SEMANTIC_INSTANCEID: + assert(p->type == PIPE_SHADER_VERTEX); + instance_id = first; + p->cfg.regs[0] |= (1 << 4); + break; + case TGSI_SEMANTIC_PRIMID: + assert(p->type != PIPE_SHADER_VERTEX); + p->cfg.prim_id = first; + break; + /* + case TGSI_SEMANTIC_PRIMIDIN: + assert(p->type == PIPE_SHADER_GEOMETRY); + pc->sysval[first].hw = 6; + p->cfg.regs[0] |= (1 << 8); + break; + case TGSI_SEMANTIC_VERTEXID: + assert(p->type == PIPE_SHADER_VERTEX); + vertex_id = first; + p->cfg.regs[0] |= (1 << 12) | (1 << 0); + break; + */ + } + break; case TGSI_FILE_ADDRESS: case TGSI_FILE_CONSTANT: case TGSI_FILE_SAMPLER: @@ -3296,68 +3643,96 @@ nv50_program_tx_prep(struct nv50_pc *pc) } } - if (p->type == PIPE_SHADER_VERTEX) { + if (p->type == PIPE_SHADER_VERTEX || p->type == PIPE_SHADER_GEOMETRY) { int rid = 0; - for (i = 0; i < pc->attr_nr * 4; ++i) { - if (pc->attr[i].acc) { - pc->attr[i].hw = rid++; - p->cfg.attr[i / 32] |= 1 << (i % 32); + if (p->type == PIPE_SHADER_GEOMETRY) { + for (i = 0; i < pc->attr_nr; ++i) { + p->cfg.in[i].hw = rid; + p->cfg.in[i].id = i; + + for (c = 0; c < 4; ++c) { + int n = i * 4 + c; + if (!pc->attr[n].acc) + continue; + pc->attr[n].hw = rid++; + p->cfg.in[i].mask |= 1 << c; + } + } + } else { + for (i = 0; i < pc->attr_nr * 4; ++i) { + if (pc->attr[i].acc) { + pc->attr[i].hw = rid++; + p->cfg.attr[i / 32] |= 1 << (i % 32); + } } + if (p->cfg.regs[0] & (1 << 0)) + pc->sysval[vertex_id].hw = rid++; + if (p->cfg.regs[0] & (1 << 4)) + pc->sysval[instance_id].hw = rid++; } for (i = 0, rid = 0; i < pc->result_nr; ++i) { - p->cfg.io[i].hw = rid; - p->cfg.io[i].id = i; + p->cfg.out[i].hw = rid; + p->cfg.out[i].id = i; for (c = 0; c < 4; ++c) { int n = i * 4 + c; if (!pc->result[n].acc) continue; pc->result[n].hw = rid++; - p->cfg.io[i].mask |= 1 << c; + p->cfg.out[i].mask |= 1 << c; } } + if (p->cfg.prim_id < 0x40) { + /* GP has to write to PrimitiveID */ + ctor_reg(&pc->sysval[p->cfg.prim_id], + P_RESULT, p->cfg.prim_id, rid); + p->cfg.prim_id = rid++; + } for (c = 0; c < 2; ++c) if (p->cfg.two_side[c].hw < 0x40) - p->cfg.two_side[c] = p->cfg.io[ + p->cfg.two_side[c] = p->cfg.out[ p->cfg.two_side[c].hw]; if (p->cfg.psiz < 0x40) - p->cfg.psiz = p->cfg.io[p->cfg.psiz].hw; + p->cfg.psiz = p->cfg.out[p->cfg.psiz].hw; + + copy_semantic_info(p); } else if (p->type == PIPE_SHADER_FRAGMENT) { - int rid, aid; + int rid, aid, base; unsigned n = 0, m = pc->attr_nr - flat_nr; pc->allow32 = TRUE; - int base = (TGSI_SEMANTIC_POSITION == - p->info.input_semantic_name[0]) ? 0 : 1; + base = (TGSI_SEMANTIC_POSITION == + p->info.input_semantic_name[0]) ? 0 : 1; /* non-flat interpolants have to be mapped to * the lower hardware IDs, so sort them: */ for (i = 0; i < pc->attr_nr; i++) { if (pc->interp_mode[i] == INTERP_FLAT) - p->cfg.io[m++].id = i; + p->cfg.in[m++].id = i; else { if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE)) - p->cfg.io[n].linear = TRUE; - p->cfg.io[n++].id = i; + p->cfg.in[n].linear = TRUE; + p->cfg.in[n++].id = i; } } + copy_semantic_info(p); if (!base) /* set w-coordinate mask from perspective interp */ - p->cfg.io[0].mask |= p->cfg.regs[1] >> 24; + p->cfg.in[0].mask |= p->cfg.regs[1] >> 24; aid = popcnt4( /* if fcrd isn't contained in cfg.io */ - base ? (p->cfg.regs[1] >> 24) : p->cfg.io[0].mask); + base ? (p->cfg.regs[1] >> 24) : p->cfg.in[0].mask); for (n = 0; n < pc->attr_nr; ++n) { - p->cfg.io[n].hw = rid = aid; - i = p->cfg.io[n].id; + p->cfg.in[n].hw = rid = aid; + i = p->cfg.in[n].id; if (p->info.input_semantic_name[n] == TGSI_SEMANTIC_FACE) { @@ -3369,15 +3744,15 @@ nv50_program_tx_prep(struct nv50_pc *pc) if (!pc->attr[i * 4 + c].acc) continue; pc->attr[i * 4 + c].rhw = rid++; - p->cfg.io[n].mask |= 1 << c; + p->cfg.in[n].mask |= 1 << c; load_interpolant(pc, &pc->attr[i * 4 + c]); } - aid += popcnt4(p->cfg.io[n].mask); + aid += popcnt4(p->cfg.in[n].mask); } if (!base) - p->cfg.regs[1] |= p->cfg.io[0].mask << 24; + p->cfg.regs[1] |= p->cfg.in[0].mask << 24; m = popcnt4(p->cfg.regs[1] >> 24); @@ -3387,32 +3762,33 @@ nv50_program_tx_prep(struct nv50_pc *pc) p->cfg.regs[1] |= aid - m; if (flat_nr) { - i = p->cfg.io[pc->attr_nr - flat_nr].hw; + i = p->cfg.in[pc->attr_nr - flat_nr].hw; p->cfg.regs[1] |= (i - m) << 16; } else p->cfg.regs[1] |= p->cfg.regs[1] << 16; /* mark color semantic for light-twoside */ - n = 0x40; - for (i = 0; i < pc->attr_nr; i++) { - ubyte si, sn; - - sn = p->info.input_semantic_name[p->cfg.io[i].id]; - si = p->info.input_semantic_index[p->cfg.io[i].id]; - - if (sn == TGSI_SEMANTIC_COLOR) { - p->cfg.two_side[si] = p->cfg.io[i]; - - /* increase colour count */ - p->cfg.regs[0] += popcnt4( - p->cfg.two_side[si].mask) << 16; - - n = MIN2(n, p->cfg.io[i].hw - m); + n = 0x80; + for (i = 0; i < p->cfg.in_nr; i++) { + if (p->cfg.in[i].sn == TGSI_SEMANTIC_COLOR) { + n = MIN2(n, p->cfg.in[i].hw - m); + p->cfg.two_side[p->cfg.in[i].si] = p->cfg.in[i]; + + p->cfg.regs[0] += /* increase colour count */ + popcnt4(p->cfg.in[i].mask) << 16; } } - if (n < 0x40) + if (n < 0x80) p->cfg.regs[0] += n; + if (p->cfg.prim_id < 0x40) { + pc->sysval[p->cfg.prim_id].rhw = rid++; + emit_interp(pc, &pc->sysval[p->cfg.prim_id], NULL, + INTERP_FLAT); + /* increase FP_INTERPOLANT_CTRL_COUNT */ + p->cfg.regs[1] += 1; + } + /* Initialize FP results: * FragDepth is always first TGSI and last hw output */ @@ -3466,10 +3842,31 @@ free_nv50_pc(struct nv50_pc *pc) FREE(pc->attr); if (pc->temp) FREE(pc->temp); + if (pc->sysval) + FREE(pc->sysval); + if (pc->insn_pos) + FREE(pc->insn_pos); FREE(pc); } +static INLINE uint32_t +nv50_map_gs_output_prim(unsigned pprim) +{ + switch (pprim) { + case PIPE_PRIM_POINTS: + return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_POINTS; + case PIPE_PRIM_LINE_STRIP: + return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_LINE_STRIP; + case PIPE_PRIM_TRIANGLE_STRIP: + return NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE_TRIANGLE_STRIP; + default: + NOUVEAU_ERR("invalid GS_OUTPUT_PRIMITIVE: %u\n", pprim); + abort(); + return 0; + } +} + static boolean ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) { @@ -3483,25 +3880,55 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1; pc->addr_nr = p->info.file_max[TGSI_FILE_ADDRESS] + 1; assert(pc->addr_nr <= 2); + pc->sysval_nr = p->info.file_max[TGSI_FILE_SYSTEM_VALUE] + 1; p->cfg.high_temp = 4; p->cfg.two_side[0].hw = 0x40; p->cfg.two_side[1].hw = 0x40; + p->cfg.prim_id = 0x40; p->cfg.edgeflag_in = pc->edgeflag_out = 0xff; + for (i = 0; i < p->info.num_properties; ++i) { + unsigned *data = &p->info.properties[i].data[0]; + + switch (p->info.properties[i].name) { + case TGSI_PROPERTY_GS_OUTPUT_PRIM: + p->cfg.prim_type = nv50_map_gs_output_prim(data[0]); + break; + case TGSI_PROPERTY_GS_MAX_VERTICES: + p->cfg.vert_count = data[0]; + break; + default: + break; + } + } + switch (p->type) { case PIPE_SHADER_VERTEX: p->cfg.psiz = 0x40; p->cfg.clpd = 0x40; - p->cfg.io_nr = pc->result_nr; + p->cfg.out_nr = pc->result_nr; + break; + case PIPE_SHADER_GEOMETRY: + assert(p->cfg.prim_type); + assert(p->cfg.vert_count); + + p->cfg.psiz = 0x80; + p->cfg.clpd = 0x80; + p->cfg.prim_id = 0x80; + p->cfg.out_nr = pc->result_nr; + p->cfg.in_nr = pc->attr_nr; + + p->cfg.two_side[0].hw = 0x80; + p->cfg.two_side[1].hw = 0x80; break; case PIPE_SHADER_FRAGMENT: rtype[0] = rtype[1] = P_TEMP; p->cfg.regs[0] = 0x01000004; - p->cfg.io_nr = pc->attr_nr; + p->cfg.in_nr = pc->attr_nr; if (p->info.writes_z) { p->cfg.regs[2] |= 0x00000100; @@ -3559,7 +3986,16 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p) return FALSE; } for (i = 0; i < NV50_SU_MAX_ADDR; ++i) - ctor_reg(&pc->r_addr[i], P_ADDR, -256, i + 1); + ctor_reg(&pc->r_addr[i], P_ADDR, -1, i + 1); + + if (pc->sysval_nr) { + pc->sysval = CALLOC(pc->sysval_nr, sizeof(struct nv50_reg *)); + if (!pc->sysval) + return FALSE; + /* will only ever use SYSTEM_VALUE[i].x (hopefully) */ + for (i = 0; i < pc->sysval_nr; ++i) + ctor_reg(&pc->sysval[i], rtype[0], i, -1); + } return TRUE; } @@ -3721,13 +4157,17 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) if (p->param_nr) { unsigned cb; - uint32_t *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type], + uint32_t *map = pipe_buffer_map(pscreen, + nv50->constbuf[p->type], PIPE_BUFFER_USAGE_CPU_READ); - - if (p->type == PIPE_SHADER_VERTEX) + switch (p->type) { + case PIPE_SHADER_GEOMETRY: cb = NV50_CB_PGP; break; + case PIPE_SHADER_FRAGMENT: cb = NV50_CB_PFP; break; + default: cb = NV50_CB_PVP; - else - cb = NV50_CB_PFP; + assert(p->type == PIPE_SHADER_VERTEX); + break; + } nv50_program_upload_data(nv50, map, 0, p->param_nr, cb); pipe_buffer_unmap(pscreen, nv50->constbuf[p->type]); @@ -3821,19 +4261,18 @@ nv50_vertprog_validate(struct nv50_context *nv50) nv50_program_validate_data(nv50, p); nv50_program_validate_code(nv50, p); - so = so_new(5, 8, 2); + so = so_new(5, 7, 2); so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_HIGH, 0, 0); + NOUVEAU_BO_HIGH, 0, 0); so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | - NOUVEAU_BO_LOW, 0, 0); + NOUVEAU_BO_LOW, 0, 0); so_method(so, tesla, NV50TCL_VP_ATTR_EN_0, 2); so_data (so, p->cfg.attr[0]); so_data (so, p->cfg.attr[1]); so_method(so, tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1); so_data (so, p->cfg.high_result); - so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 2); - so_data (so, p->cfg.high_result); //8); + so_method(so, tesla, NV50TCL_VP_REG_ALLOC_TEMP, 1); so_data (so, p->cfg.high_temp); so_method(so, tesla, NV50TCL_VP_START_ID, 1); so_data (so, 0); /* program start offset */ @@ -3877,12 +4316,51 @@ nv50_fragprog_validate(struct nv50_context *nv50) so_ref(NULL, &so); } -static void +void +nv50_geomprog_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *p = nv50->geomprog; + struct nouveau_stateobj *so; + + if (!p->translated) { + nv50_program_validate(nv50, p); + if (!p->translated) + assert(0); + } + + nv50_program_validate_data(nv50, p); + nv50_program_validate_code(nv50, p); + + so = so_new(6, 7, 2); + so_method(so, tesla, NV50TCL_GP_ADDRESS_HIGH, 2); + so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, tesla, NV50TCL_GP_REG_ALLOC_TEMP, 1); + so_data (so, p->cfg.high_temp); + so_method(so, tesla, NV50TCL_GP_REG_ALLOC_RESULT, 1); + so_data (so, p->cfg.high_result); + so_method(so, tesla, NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE, 1); + so_data (so, p->cfg.prim_type); + so_method(so, tesla, NV50TCL_GP_VERTEX_OUTPUT_COUNT, 1); + so_data (so, p->cfg.vert_count); + so_method(so, tesla, NV50TCL_GP_START_ID, 1); + so_data (so, 0); + so_ref(so, &nv50->state.geomprog); + so_ref(NULL, &so); +} + +static uint32_t nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) { + struct nv50_program *vp; struct nv50_program *fp = nv50->fragprog; - struct nv50_program *vp = nv50->vertprog; unsigned i, c, m = base; + uint32_t origin = 0x00000010; + + vp = nv50->geomprog ? nv50->geomprog : nv50->vertprog; /* XXX: this might not work correctly in all cases yet - we'll * just assume that an FP generic input that is not written in @@ -3890,58 +4368,59 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base) */ memset(pntc, 0, 8 * sizeof(uint32_t)); - for (i = 0; i < fp->cfg.io_nr; i++) { - uint8_t sn, si; - uint8_t j, k = fp->cfg.io[i].id; - unsigned n = popcnt4(fp->cfg.io[i].mask); + for (i = 0; i < fp->cfg.in_nr; i++) { + unsigned j, n = popcnt4(fp->cfg.in[i].mask); - if (fp->info.input_semantic_name[k] != TGSI_SEMANTIC_GENERIC) { + if (fp->cfg.in[i].sn != TGSI_SEMANTIC_GENERIC) { m += n; continue; } - for (j = 0; j < vp->info.num_outputs; ++j) { - sn = vp->info.output_semantic_name[j]; - si = vp->info.output_semantic_index[j]; - - if (sn == fp->info.input_semantic_name[k] && - si == fp->info.input_semantic_index[k]) + for (j = 0; j < vp->cfg.out_nr; ++j) + if (vp->cfg.out[j].sn == fp->cfg.in[i].sn && + vp->cfg.out[j].si == fp->cfg.in[i].si) break; - } - if (j < vp->info.num_outputs) { - ubyte mode = - nv50->rasterizer->pipe.sprite_coord_mode[si]; + if (j < vp->cfg.out_nr) { + ubyte mode = nv50->rasterizer->pipe.sprite_coord_mode[ + vp->cfg.out[j].si]; if (mode == PIPE_SPRITE_COORD_NONE) { m += n; continue; - } + } else + if (mode == PIPE_SPRITE_COORD_LOWER_LEFT) + origin = 0; } /* this is either PointCoord or replaced by sprite coords */ for (c = 0; c < 4; c++) { - if (!(fp->cfg.io[i].mask & (1 << c))) + if (!(fp->cfg.in[i].mask & (1 << c))) continue; pntc[m / 8] |= (c + 1) << ((m % 8) * 4); ++m; } } + return origin; } static int -nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4], - struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo) +nv50_vec4_map(uint32_t *map32, int mid, uint8_t zval, uint32_t lin[4], + struct nv50_sreg4 *fpi, struct nv50_sreg4 *vpo) { int c; uint8_t mv = vpo->mask, mf = fpi->mask, oid = vpo->hw; - uint8_t *map = (uint8_t *)p_map; + uint8_t *map = (uint8_t *)map32; for (c = 0; c < 4; ++c) { if (mf & 1) { if (fpi->linear == TRUE) lin[mid / 32] |= 1 << (mid % 32); - map[mid++] = (mv & 1) ? oid : ((c == 3) ? 0x41 : 0x40); + if (mv & 1) + map[mid] = oid; + else + map[mid] = (c == 3) ? (zval + 1) : zval; + ++mid; } oid += mv & 1; @@ -3953,34 +4432,42 @@ nv50_sreg4_map(uint32_t *p_map, int mid, uint32_t lin[4], } void -nv50_linkage_validate(struct nv50_context *nv50) +nv50_fp_linkage_validate(struct nv50_context *nv50) { struct nouveau_grobj *tesla = nv50->screen->tesla; struct nv50_program *vp = nv50->vertprog; struct nv50_program *fp = nv50->fragprog; struct nouveau_stateobj *so; - struct nv50_sreg4 dummy, *vpo; + struct nv50_sreg4 dummy; int i, n, c, m = 0; - uint32_t map[16], lin[4], reg[5], pcrd[8]; + uint32_t map[16], lin[4], reg[6], pcrd[8]; + uint8_t zval = 0x40; + if (nv50->geomprog) { + vp = nv50->geomprog; + zval = 0x80; + } memset(map, 0, sizeof(map)); memset(lin, 0, sizeof(lin)); reg[1] = 0x00000004; /* low and high clip distance map ids */ reg[2] = 0x00000000; /* layer index map id (disabled, GP only) */ reg[3] = 0x00000000; /* point size map id & enable */ + reg[5] = 0x00000000; /* primitive ID map slot */ reg[0] = fp->cfg.regs[0]; /* colour semantic reg */ reg[4] = fp->cfg.regs[1]; /* interpolant info */ dummy.linear = FALSE; dummy.mask = 0xf; /* map all components of HPOS */ - m = nv50_sreg4_map(map, m, lin, &dummy, &vp->cfg.io[0]); + m = nv50_vec4_map(map, m, zval, lin, &dummy, &vp->cfg.out[0]); dummy.mask = 0x0; if (vp->cfg.clpd < 0x40) { - for (c = 0; c < vp->cfg.clpd_nr; ++c) - map[m++] = vp->cfg.clpd + c; + for (c = 0; c < vp->cfg.clpd_nr; ++c) { + map[m / 4] |= (vp->cfg.clpd + c) << ((m % 4) * 8); + ++m; + } reg[1] = (m << 8); } @@ -3988,35 +4475,37 @@ nv50_linkage_validate(struct nv50_context *nv50) /* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */ if (nv50->rasterizer->pipe.light_twoside) { - vpo = &vp->cfg.two_side[0]; + struct nv50_sreg4 *vpo = &vp->cfg.two_side[0]; + struct nv50_sreg4 *fpi = &fp->cfg.two_side[0]; - m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[0], &vpo[0]); - m = nv50_sreg4_map(map, m, lin, &fp->cfg.two_side[1], &vpo[1]); + m = nv50_vec4_map(map, m, zval, lin, &fpi[0], &vpo[0]); + m = nv50_vec4_map(map, m, zval, lin, &fpi[1], &vpo[1]); } reg[0] += m - 4; /* adjust FFC0 id */ reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */ - for (i = 0; i < fp->cfg.io_nr; i++) { - ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id]; - ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id]; - - /* position must be mapped first */ - assert(i == 0 || sn != TGSI_SEMANTIC_POSITION); - + for (i = 0; i < fp->cfg.in_nr; i++) { /* maybe even remove these from cfg.io */ - if (sn == TGSI_SEMANTIC_POSITION || sn == TGSI_SEMANTIC_FACE) + if (fp->cfg.in[i].sn == TGSI_SEMANTIC_POSITION || + fp->cfg.in[i].sn == TGSI_SEMANTIC_FACE) continue; - /* VP outputs and vp->cfg.io are in the same order */ - for (n = 0; n < vp->info.num_outputs; ++n) { - if (vp->info.output_semantic_name[n] == sn && - vp->info.output_semantic_index[n] == si) + for (n = 0; n < vp->cfg.out_nr; ++n) + if (vp->cfg.out[n].sn == fp->cfg.in[i].sn && + vp->cfg.out[n].si == fp->cfg.in[i].si) break; - } - vpo = (n < vp->info.num_outputs) ? &vp->cfg.io[n] : &dummy; - m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo); + m = nv50_vec4_map(map, m, zval, lin, &fp->cfg.in[i], + (n < vp->cfg.out_nr) ? + &vp->cfg.out[n] : &dummy); + } + /* PrimitiveID either is replaced by the system value, or + * written by the geometry shader into an output register + */ + if (fp->cfg.prim_id < 0x40) { + map[m / 4] |= vp->cfg.prim_id << ((m % 4) * 8); + reg[5] = m++; } if (nv50->rasterizer->pipe.point_size_per_vertex) { @@ -4024,14 +4513,28 @@ nv50_linkage_validate(struct nv50_context *nv50) reg[3] = (m++ << 4) | 1; } - /* now fill the stateobj */ - so = so_new(6, 58, 0); + /* now fill the stateobj (at most 28 so_data) */ + so = so_new(10, 54, 0); n = (m + 3) / 4; - so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); - so_data (so, m); - so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n); - so_datap (so, map, n); + assert(m <= 32); + if (vp->type == PIPE_SHADER_GEOMETRY) { + so_method(so, tesla, NV50TCL_GP_RESULT_MAP_SIZE, 1); + so_data (so, m); + so_method(so, tesla, NV50TCL_GP_RESULT_MAP(0), n); + so_datap (so, map, n); + } else { + so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1); + so_data (so, vp->cfg.regs[0]); + + so_method(so, tesla, NV50TCL_MAP_SEMANTIC_4, 1); + so_data (so, reg[5]); + + so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); + so_data (so, m); + so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), n); + so_datap (so, map, n); + } so_method(so, tesla, NV50TCL_MAP_SEMANTIC_0, 4); so_datap (so, reg, 4); @@ -4043,14 +4546,85 @@ nv50_linkage_validate(struct nv50_context *nv50) so_datap (so, lin, 4); if (nv50->rasterizer->pipe.point_sprite) { - nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff); + so_method(so, tesla, NV50TCL_POINT_SPRITE_CTRL, 1); + so_data (so, + nv50_pntc_replace(nv50, pcrd, (reg[4] >> 8) & 0xff)); so_method(so, tesla, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8); so_datap (so, pcrd, 8); } - so_ref(so, &nv50->state.programs); - so_ref(NULL, &so); + so_method(so, tesla, NV50TCL_GP_ENABLE, 1); + so_data (so, (vp->type == PIPE_SHADER_GEOMETRY) ? 1 : 0); + + so_ref(so, &nv50->state.fp_linkage); + so_ref(NULL, &so); +} + +static int +construct_vp_gp_mapping(uint32_t *map32, int m, + struct nv50_program *vp, struct nv50_program *gp) +{ + uint8_t *map = (uint8_t *)map32; + int i, j, c; + + for (i = 0; i < gp->cfg.in_nr; ++i) { + uint8_t oid, mv = 0, mg = gp->cfg.in[i].mask; + + for (j = 0; j < vp->cfg.out_nr; ++j) { + if (vp->cfg.out[j].sn == gp->cfg.in[i].sn && + vp->cfg.out[j].si == gp->cfg.in[i].si) { + mv = vp->cfg.out[j].mask; + oid = vp->cfg.out[j].hw; + break; + } + } + + for (c = 0; c < 4; ++c, mv >>= 1, mg >>= 1) { + if (mg & mv & 1) + map[m++] = oid; + else + if (mg & 1) + map[m++] = (c == 3) ? 0x41 : 0x40; + oid += mv & 1; + } + } + return m; +} + +void +nv50_gp_linkage_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so; + struct nv50_program *vp = nv50->vertprog; + struct nv50_program *gp = nv50->geomprog; + uint32_t map[16]; + int m = 0; + + if (!gp) { + so_ref(NULL, &nv50->state.gp_linkage); + return; + } + memset(map, 0, sizeof(map)); + + m = construct_vp_gp_mapping(map, m, vp, gp); + + so = so_new(3, 24 - 3, 0); + + so_method(so, tesla, NV50TCL_VP_GP_BUILTIN_ATTR_EN, 1); + so_data (so, vp->cfg.regs[0] | gp->cfg.regs[0]); + + assert(m <= 32); + so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1); + so_data (so, m); + + m = (m + 3) / 4; + so_method(so, tesla, NV50TCL_VP_RESULT_MAP(0), m); + so_datap (so, map, m); + + so_ref(so, &nv50->state.gp_linkage); + so_ref(NULL, &so); } void diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 461fec1d89c..1e3ad6bff05 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -16,11 +16,13 @@ struct nv50_program_exec { }; struct nv50_sreg4 { - uint8_t hw; - uint8_t id; /* tgsi index, nv50 needs them sorted: flat ones last */ + uint8_t hw; /* hw index, nv50 wants flat FP inputs last */ + uint8_t id; /* tgsi index */ uint8_t mask; boolean linear; + + ubyte sn, si; /* semantic name & index */ }; struct nv50_program { @@ -49,16 +51,24 @@ struct nv50_program { uint32_t regs[4]; /* for VPs, io_nr doesn't count 'private' results (PSIZ etc.) */ - unsigned io_nr; - struct nv50_sreg4 io[PIPE_MAX_SHADER_OUTPUTS]; + unsigned in_nr, out_nr; + struct nv50_sreg4 in[PIPE_MAX_SHADER_INPUTS]; + struct nv50_sreg4 out[PIPE_MAX_SHADER_OUTPUTS]; /* FP colour inputs, VP/GP back colour outputs */ struct nv50_sreg4 two_side[2]; - /* VP only */ + /* GP only */ + unsigned vert_count; + uint8_t prim_type; + + /* VP & GP only */ uint8_t clpd, clpd_nr; uint8_t psiz; uint8_t edgeflag_in; + + /* FP & GP only */ + uint8_t prim_id; } cfg; }; diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 5d9e18218ae..5a4ab3508b8 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -111,7 +111,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, if (!q->ready) { ret = nouveau_bo_map(q->bo, NOUVEAU_BO_RD | - wait ? 0 : NOUVEAU_BO_NOWAIT); + (wait ? 0 : NOUVEAU_BO_NOWAIT)); if (ret) return false; q->result = ((uint32_t *)q->bo->map)[1]; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 28e2b35deaa..73205652cc4 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -167,7 +167,7 @@ nv50_screen_destroy(struct pipe_screen *pscreen) struct nv50_screen *screen = nv50_screen(pscreen); unsigned i; - for (i = 0; i < 2; i++) { + for (i = 0; i < 3; i++) { if (screen->constbuf_parm[i]) nouveau_bo_ref(NULL, &screen->constbuf_parm[i]); } @@ -329,7 +329,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_ref(NULL, &so); /* Static tesla init */ - so = so_new(40, 84, 20); + so = so_new(44, 90, 22); so_method(so, screen->tesla, NV50TCL_COND_MODE, 1); so_data (so, NV50TCL_COND_MODE_ALWAYS); @@ -352,10 +352,11 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_data (so, 0xf); /* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */ - so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(0), 1); - so_data (so, 0x54); - so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(2), 1); - so_data (so, 0x54); + for (i = 0; i < 3; ++i) { + so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(i), 1); + so_data (so, 0x54); + } + /* origin is top left (set to 1 for bottom left) */ so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1); so_data (so, 0); @@ -370,7 +371,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) return NULL; } - for (i = 0; i < 2; i++) { + for (i = 0; i < 3; i++) { ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (128 * 4) * 4, &screen->constbuf_parm[i]); if (ret) { @@ -406,22 +407,33 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000001 | (NV50_CB_PMISC << 12)); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); + so_data (so, 0x00000021 | (NV50_CB_PMISC << 12)); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000031 | (NV50_CB_PMISC << 12)); so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf_parm[0], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_VERTEX], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_VERTEX], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_PVP << 16) | 0x00000800); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000101 | (NV50_CB_PVP << 12)); so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); - so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, screen->constbuf_parm[1], 0, NOUVEAU_BO_VRAM | - NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_GEOMETRY], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_GEOMETRY], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_PGP << 16) | 0x00000800); + so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); + so_data (so, 0x00000121 | (NV50_CB_PGP << 12)); + + so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_FRAGMENT], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf_parm[PIPE_SHADER_FRAGMENT], 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, (NV50_CB_PFP << 16) | 0x00000800); so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1); so_data (so, 0x00000131 | (NV50_CB_PFP << 12)); diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h index a038a4e3c2a..7a155ca9c45 100644 --- a/src/gallium/drivers/nv50/nv50_screen.h +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -18,10 +18,10 @@ struct nv50_screen { struct nouveau_notifier *sync; struct nouveau_bo *constbuf_misc[1]; - struct nouveau_bo *constbuf_parm[2]; + struct nouveau_bo *constbuf_parm[PIPE_SHADER_TYPES]; struct nouveau_resource *immd_heap[1]; - struct nouveau_resource *parm_heap[2]; + struct nouveau_resource *parm_heap[PIPE_SHADER_TYPES]; struct nouveau_bo *tic; struct nouveau_bo *tsc; diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 1f67df814b1..6ab33be663d 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -531,7 +531,7 @@ nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv50_program *p = hwcso; nv50_program_destroy(nv50, p); - FREE((void*)p->pipe.tokens); + FREE((void *)p->pipe.tokens); FREE(p); } @@ -563,7 +563,39 @@ nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso) struct nv50_program *p = hwcso; nv50_program_destroy(nv50, p); - FREE((void*)p->pipe.tokens); + FREE((void *)p->pipe.tokens); + FREE(p); +} + +static void * +nv50_gp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv50_program *p = CALLOC_STRUCT(nv50_program); + + p->pipe.tokens = tgsi_dup_tokens(cso->tokens); + p->type = PIPE_SHADER_GEOMETRY; + tgsi_scan_shader(p->pipe.tokens, &p->info); + return (void *)p; +} + +static void +nv50_gp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->fragprog = hwcso; + nv50->dirty |= NV50_NEW_GEOMPROG; +} + +static void +nv50_gp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_program *p = hwcso; + + nv50_program_destroy(nv50, p); + FREE((void *)p->pipe.tokens); FREE(p); } @@ -585,17 +617,21 @@ nv50_set_clip_state(struct pipe_context *pipe, static void nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, - const struct pipe_constant_buffer *buf ) + struct pipe_buffer *buf ) { struct nv50_context *nv50 = nv50_context(pipe); if (shader == PIPE_SHADER_VERTEX) { - nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer; + nv50->constbuf[PIPE_SHADER_VERTEX] = buf; nv50->dirty |= NV50_NEW_VERTPROG_CB; } else if (shader == PIPE_SHADER_FRAGMENT) { - nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; + nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf; nv50->dirty |= NV50_NEW_FRAGPROG_CB; + } else + if (shader == PIPE_SHADER_GEOMETRY) { + nv50->constbuf[PIPE_SHADER_GEOMETRY] = buf; + nv50->dirty |= NV50_NEW_GEOMPROG_CB; } } @@ -696,6 +732,10 @@ nv50_init_state_functions(struct nv50_context *nv50) nv50->pipe.bind_fs_state = nv50_fp_state_bind; nv50->pipe.delete_fs_state = nv50_fp_state_delete; + nv50->pipe.create_gs_state = nv50_gp_state_create; + nv50->pipe.bind_gs_state = nv50_gp_state_bind; + nv50->pipe.delete_gs_state = nv50_gp_state_delete; + nv50->pipe.set_blend_color = nv50_set_blend_color; nv50->pipe.set_clip_state = nv50_set_clip_state; nv50->pipe.set_constant_buffer = nv50_set_constant_buffer; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index f83232f43cf..fcd07b59cd6 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -199,6 +199,8 @@ nv50_state_emit(struct nv50_context *nv50) nv50->state.dirty |= NV50_NEW_VERTPROG; if (nv50->state.fragprog) nv50->state.dirty |= NV50_NEW_FRAGPROG; + if (nv50->state.geomprog) + nv50->state.dirty |= NV50_NEW_GEOMPROG; if (nv50->state.rast) nv50->state.dirty |= NV50_NEW_RASTERIZER; if (nv50->state.blend_colour) @@ -228,9 +230,14 @@ nv50_state_emit(struct nv50_context *nv50) so_emit(chan, nv50->state.vertprog); if (nv50->state.dirty & NV50_NEW_FRAGPROG) so_emit(chan, nv50->state.fragprog); + if (nv50->state.dirty & NV50_NEW_GEOMPROG && nv50->state.geomprog) + so_emit(chan, nv50->state.geomprog); if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | - NV50_NEW_RASTERIZER)) - so_emit(chan, nv50->state.programs); + NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER)) + so_emit(chan, nv50->state.fp_linkage); + if ((nv50->state.dirty & (NV50_NEW_VERTPROG | NV50_NEW_GEOMPROG)) + && nv50->state.gp_linkage) + so_emit(chan, nv50->state.gp_linkage); if (nv50->state.dirty & NV50_NEW_RASTERIZER) so_emit(chan, nv50->state.rast); if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR) @@ -291,9 +298,15 @@ nv50_state_validate(struct nv50_context *nv50) if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) nv50_fragprog_validate(nv50); + if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_GEOMPROG_CB)) + nv50_geomprog_validate(nv50); + if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG | - NV50_NEW_RASTERIZER)) - nv50_linkage_validate(nv50); + NV50_NEW_GEOMPROG | NV50_NEW_RASTERIZER)) + nv50_fp_linkage_validate(nv50); + + if (nv50->dirty & (NV50_NEW_GEOMPROG | NV50_NEW_VERTPROG)) + nv50_gp_linkage_validate(nv50); if (nv50->dirty & NV50_NEW_RASTERIZER) so_ref(nv50->rasterizer->so, &nv50->state.rast); @@ -400,8 +413,9 @@ viewport_uptodate: for (i = 0; i < PIPE_SHADER_TYPES; ++i) nr += nv50->sampler_nr[i]; - so = so_new(1+ 5 * PIPE_SHADER_TYPES, 1+ 19 * PIPE_SHADER_TYPES - + nr * 8, PIPE_SHADER_TYPES * 2); + so = so_new(1 + 5 * PIPE_SHADER_TYPES, + 1 + 19 * PIPE_SHADER_TYPES + nr * 8, + PIPE_SHADER_TYPES * 2); nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX); nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT); diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c index bef548b7286..871536dca9b 100644 --- a/src/gallium/drivers/nv50/nv50_tex.c +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -155,7 +155,7 @@ static boolean nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so, unsigned p) { - static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2 }; + static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2, 1 }; struct nouveau_grobj *eng2d = nv50->screen->eng2d; struct nouveau_grobj *tesla = nv50->screen->tesla; diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index f2e510fba61..89a94d2fe81 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -55,6 +55,14 @@ nv50_prim(unsigned mode) case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS; case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP; case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON; + case PIPE_PRIM_LINES_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_LINES_ADJACENCY; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_LINE_STRIP_ADJACENCY; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_TRIANGLES_ADJACENCY; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP_ADJACENCY; default: break; } |