diff options
author | Rhys Perry <[email protected]> | 2018-06-14 19:56:28 -0600 |
---|---|---|
committer | Brian Paul <[email protected]> | 2018-06-14 20:09:45 -0600 |
commit | 66ca7e400b8cf736943feddafef7f76adabf9120 (patch) | |
tree | a338e1c9fd3ced8b144bedbbebcba8fd52c91135 | |
parent | 9f217facbde04dd005b3f6b53bc97480b856d246 (diff) |
nvc0: add support for programmable sample locations
Signed-off-by: Rhys Perry <[email protected]>
10 files changed, 299 insertions, 46 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h index 3d0782f86b5..7c835ceab8d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h @@ -73,6 +73,7 @@ struct nv50_ir_prog_symbol #define NVISA_GK104_CHIPSET 0xe0 #define NVISA_GK20A_CHIPSET 0xea #define NVISA_GM107_CHIPSET 0x110 +#define NVISA_GM200_CHIPSET 0x120 struct nv50_ir_prog_info { @@ -145,6 +146,7 @@ struct nv50_ir_prog_info bool persampleInvocation; bool usesSampleMaskIn; bool readsFramebuffer; + bool readsSampleLocations; } fp; struct { uint32_t inputOffset; /* base address for user args */ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index 3c5bad05fe7..d7844d73816 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -1520,6 +1520,10 @@ void Source::scanInstructionSrc(const Instruction& insn, info->out[src.getIndex(0)].oread = 1; } } + if (src.getFile() == TGSI_FILE_SYSTEM_VALUE) { + if (info->sv[src.getIndex(0)].sn == TGSI_SEMANTIC_SAMPLEPOS) + info->prop.fp.readsSampleLocations = true; + } if (src.getFile() != TGSI_FILE_INPUT) return; @@ -1560,6 +1564,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) if (insn.getOpcode() == TGSI_OPCODE_FBFETCH) info->prop.fp.readsFramebuffer = true; + if (insn.getOpcode() == TGSI_OPCODE_INTERP_SAMPLE) + info->prop.fp.readsSampleLocations = true; + if (insn.dstCount()) { Instruction::DstRegister dst = insn.getDst(0); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 29f674b4514..5723847234e 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -2662,17 +2662,33 @@ NVC0LoweringPass::handleRDSV(Instruction *i) ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; break; case SV_SAMPLE_POS: { - Value *off = new_LValue(func, FILE_GPR); - ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); + Value *sampleID = bld.getScratch(); + ld = bld.mkOp1(OP_PIXLD, TYPE_U32, sampleID, bld.mkImm(0)); ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; - bld.mkOp2(OP_SHL, TYPE_U32, off, i->getDef(0), bld.mkImm(3)); - bld.mkLoad(TYPE_F32, - i->getDef(0), - bld.mkSymbol( - FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, - TYPE_U32, prog->driver->io.sampleInfoBase + - 4 * sym->reg.data.sv.index), - off); + Value *offset = calculateSampleOffset(sampleID); + + assert(prog->driver->prop.fp.readsSampleLocations); + + if (targ->getChipset() >= NVISA_GM200_CHIPSET) { + bld.mkLoad(TYPE_F32, + i->getDef(0), + bld.mkSymbol( + FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, + TYPE_U32, prog->driver->io.sampleInfoBase), + offset); + bld.mkOp2(OP_EXTBF, TYPE_U32, i->getDef(0), i->getDef(0), + bld.mkImm(0x040c + sym->reg.data.sv.index * 16)); + bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), TYPE_U32, i->getDef(0)); + bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), bld.mkImm(1.0f / 16.0f)); + } else { + bld.mkLoad(TYPE_F32, + i->getDef(0), + bld.mkSymbol( + FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, + TYPE_U32, prog->driver->io.sampleInfoBase + + 4 * sym->reg.data.sv.index), + offset); + } break; } case SV_SAMPLE_MASK: { @@ -2832,6 +2848,69 @@ NVC0LoweringPass::handleOUT(Instruction *i) return true; } +Value * +NVC0LoweringPass::calculateSampleOffset(Value *sampleID) +{ + Value *offset = bld.getScratch(); + if (targ->getChipset() >= NVISA_GM200_CHIPSET) { + // Sample location offsets (in bytes) are calculated like so: + // offset = (SV_POSITION.y % 4 * 2) + (SV_POSITION.x % 2) + // offset = offset * 32 + sampleID % 8 * 4; + // which is equivalent to: + // offset = (SV_POSITION.y & 0x3) << 6 + (SV_POSITION.x & 0x1) << 5; + // offset += sampleID << 2 + + // The second operand (src1) of the INSBF instructions are like so: + // 0xssll where ss is the size and ll is the offset. + // so: dest = src2 | (src0 & (1 << ss - 1)) << ll + + // Add sample ID (offset = (sampleID & 0x7) << 2) + bld.mkOp3(OP_INSBF, TYPE_U32, offset, sampleID, bld.mkImm(0x0302), bld.mkImm(0x0)); + + Symbol *xSym = bld.mkSysVal(SV_POSITION, 0); + Symbol *ySym = bld.mkSysVal(SV_POSITION, 1); + Value *coord = bld.getScratch(); + + // Add X coordinate (offset |= (SV_POSITION.x & 0x1) << 5) + bld.mkInterp(NV50_IR_INTERP_LINEAR, coord, + targ->getSVAddress(FILE_SHADER_INPUT, xSym), NULL); + bld.mkCvt(OP_CVT, TYPE_U32, coord, TYPE_F32, coord) + ->rnd = ROUND_ZI; + bld.mkOp3(OP_INSBF, TYPE_U32, offset, coord, bld.mkImm(0x0105), offset); + + // Add Y coordinate (offset |= (SV_POSITION.y & 0x3) << 6) + bld.mkInterp(NV50_IR_INTERP_LINEAR, coord, + targ->getSVAddress(FILE_SHADER_INPUT, ySym), NULL); + bld.mkCvt(OP_CVT, TYPE_U32, coord, TYPE_F32, coord) + ->rnd = ROUND_ZI; + bld.mkOp3(OP_INSBF, TYPE_U32, offset, coord, bld.mkImm(0x0206), offset); + } else { + bld.mkOp2(OP_SHL, TYPE_U32, offset, sampleID, bld.mkImm(3)); + } + return offset; +} + +// Handle programmable sample locations for GM20x+ +void +NVC0LoweringPass::handlePIXLD(Instruction *i) +{ + if (i->subOp != NV50_IR_SUBOP_PIXLD_OFFSET) + return; + if (targ->getChipset() < NVISA_GM200_CHIPSET) + return; + + assert(prog->driver->prop.fp.readsSampleLocations); + + bld.mkLoad(TYPE_F32, + i->getDef(0), + bld.mkSymbol( + FILE_MEMORY_CONST, prog->driver->io.auxCBSlot, + TYPE_U32, prog->driver->io.sampleInfoBase), + calculateSampleOffset(i->getSrc(0))); + + bld.getBB()->remove(i); +} + // Generate a binary predicate if an instruction is predicated by // e.g. an f32 value. void @@ -2931,6 +3010,9 @@ NVC0LoweringPass::visit(Instruction *i) case OP_BUFQ: handleBUFQ(i); break; + case OP_PIXLD: + handlePIXLD(i); + break; default: break; } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index 1b2b36d3cc9..91771fbf7e9 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -116,6 +116,7 @@ protected: void handleSharedATOMNVE4(Instruction *); void handleLDST(Instruction *); bool handleBUFQ(Instruction *); + void handlePIXLD(Instruction *); void checkPredicate(Instruction *); @@ -142,6 +143,7 @@ private: void processSurfaceCoordsNVE4(TexInstruction *); void processSurfaceCoordsNVC0(TexInstruction *); void convertSurfaceFormat(TexInstruction *); + Value *calculateSampleOffset(Value *sampleID); protected: Value *loadTexHandle(Value *ptr, unsigned int slot); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index 0729c88dffa..77237a3c0a3 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -62,6 +62,8 @@ #define NVC0_NEW_3D_DRIVERCONST (1 << 27) #define NVC0_NEW_3D_WINDOW_RECTS (1 << 28) +#define NVC0_NEW_3D_SAMPLE_LOCATIONS (1 << 29) + #define NVC0_NEW_CP_PROGRAM (1 << 0) #define NVC0_NEW_CP_SURFACES (1 << 1) #define NVC0_NEW_CP_TEXTURES (1 << 2) @@ -134,20 +136,21 @@ #define NVC0_CB_AUX_UBO_SIZE ((NVC0_MAX_PIPE_CONSTBUFS - 1) * 4 * 4) /* 8 sets of 32-bits integer pairs sample offsets */ #define NVC0_CB_AUX_SAMPLE_INFO 0x1a0 /* FP */ -#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 4 * 2) +/* 256 bytes, though only 64 bytes used before GM200 */ +#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 2 * 4 * 4) /* draw parameters (index bais, base instance, drawid) */ #define NVC0_CB_AUX_DRAW_INFO 0x1a0 /* VP */ /* 32 user buffers, at 4 32-bits integers each */ -#define NVC0_CB_AUX_BUF_INFO(i) 0x220 + (i) * 4 * 4 +#define NVC0_CB_AUX_BUF_INFO(i) 0x2a0 + (i) * 4 * 4 #define NVC0_CB_AUX_BUF_SIZE (NVC0_MAX_BUFFERS * 4 * 4) /* 8 surfaces, at 16 32-bits integers each */ -#define NVC0_CB_AUX_SU_INFO(i) 0x420 + (i) * 16 * 4 +#define NVC0_CB_AUX_SU_INFO(i) 0x4a0 + (i) * 16 * 4 #define NVC0_CB_AUX_SU_SIZE (NVC0_MAX_IMAGES * 16 * 4) /* 1 64-bits address and 1 32-bits sequence */ -#define NVC0_CB_AUX_MP_INFO 0x620 +#define NVC0_CB_AUX_MP_INFO 0x6a0 #define NVC0_CB_AUX_MP_SIZE 3 * 4 /* 512 64-byte blocks for bindless image handles */ -#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x630 + (i) * 16 * 4 +#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x6b0 + (i) * 16 * 4 #define NVC0_CB_AUX_BINDLESS_SIZE (NVE4_IMG_MAX_HANDLES * 16 * 4) /* 4 32-bits floats for the vertex runout, put at the end */ #define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6) @@ -229,6 +232,8 @@ struct nvc0_context { struct list_head img_head; struct pipe_framebuffer_state framebuffer; + bool sample_locations_enabled; + uint8_t sample_locations[2 * 4 * 8]; struct pipe_blend_color blend_colour; struct pipe_stencil_ref stencil_ref; struct pipe_poly_stipple stipple; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c index 9520d984bb3..57d98753f45 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c @@ -481,6 +481,9 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) } } } + /* GM20x+ needs TGSI_SEMANTIC_POSITION to access sample locations */ + if (info->prop.fp.readsSampleLocations && info->target >= NVISA_GM200_CHIPSET) + fp->hdr[5] |= 0x30000000; for (i = 0; i < info->numOutputs; ++i) { if (info->out[i].sn == TGSI_SEMANTIC_COLOR) diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 8e9ede0736a..0efa5840207 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -271,6 +271,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES: case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES: case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE: + case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS: return class_3d >= GM200_3D_CLASS; case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES: return class_3d >= GP100_3D_CLASS; @@ -319,7 +320,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CONSTBUF0_FLAGS: case PIPE_CAP_PACKED_UNIFORMS: case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES: - case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS: return 0; case PIPE_CAP_VENDOR_ID: @@ -546,6 +546,36 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen, } static void +nvc0_screen_get_sample_pixel_grid(struct pipe_screen *pscreen, + unsigned sample_count, + unsigned *width, unsigned *height) +{ + switch (sample_count) { + case 0: + case 1: + /* this could be 4x4, but the GL state tracker makes it difficult to + * create a 1x MSAA texture and smaller grids save CB space */ + *width = 2; + *height = 4; + break; + case 2: + *width = 2; + *height = 4; + break; + case 4: + *width = 2; + *height = 2; + break; + case 8: + *width = 1; + *height = 2; + break; + default: + assert(0); + } +} + +static void nvc0_screen_destroy(struct pipe_screen *pscreen) { struct nvc0_screen *screen = nvc0_screen(pscreen); @@ -871,6 +901,7 @@ nvc0_screen_create(struct nouveau_device *dev) pscreen->get_param = nvc0_screen_get_param; pscreen->get_shader_param = nvc0_screen_get_shader_param; pscreen->get_paramf = nvc0_screen_get_paramf; + pscreen->get_sample_pixel_grid = nvc0_screen_get_sample_pixel_grid; pscreen->get_driver_query_info = nvc0_screen_get_driver_query_info; pscreen->get_driver_query_group_info = nvc0_screen_get_driver_query_group_info; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c index 37dbbe66c7c..d9ee62523b9 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c @@ -854,7 +854,21 @@ nvc0_set_framebuffer_state(struct pipe_context *pipe, util_copy_framebuffer_state(&nvc0->framebuffer, fb); - nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER; + nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_SAMPLE_LOCATIONS; +} + +static void +nvc0_set_sample_locations(struct pipe_context *pipe, + size_t size, const uint8_t *locations) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + + nvc0->sample_locations_enabled = size && locations; + if (size > sizeof(nvc0->sample_locations)) + size = sizeof(nvc0->sample_locations); + memcpy(nvc0->sample_locations, locations, size); + + nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLE_LOCATIONS; } static void @@ -1407,6 +1421,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0) pipe->set_min_samples = nvc0_set_min_samples; pipe->set_constant_buffer = nvc0_set_constant_buffer; pipe->set_framebuffer_state = nvc0_set_framebuffer_state; + pipe->set_sample_locations = nvc0_set_sample_locations; pipe->set_polygon_stipple = nvc0_set_polygon_stipple; pipe->set_scissor_states = nvc0_set_scissor_states; pipe->set_viewport_states = nvc0_set_viewport_states; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index 8e2192d3de2..cc18f41c4bb 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -71,13 +71,132 @@ nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers) PUSH_DATA (push, 0); // base layer } +static uint32_t +gm200_encode_cb_sample_location(uint8_t x, uint8_t y) +{ + static const uint8_t lut[] = { + 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7}; + uint32_t result = 0; + /* S0.12 representation for TGSI_OPCODE_INTERP_SAMPLE */ + result |= lut[x] << 8 | lut[y] << 24; + /* fill in gaps with data in a representation for SV_SAMPLE_POS */ + result |= x << 12 | y << 28; + return result; +} + +static void +gm200_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; + unsigned grid_width, grid_height, hw_grid_width; + uint8_t sample_locations[16][2]; + unsigned cb[64]; + unsigned i, pixel, pixel_y, pixel_x, sample; + uint32_t packed_locations[4] = {}; + + screen->base.base.get_sample_pixel_grid( + &screen->base.base, ms, &grid_width, &grid_height); + + hw_grid_width = grid_width; + if (ms == 1) /* get_sample_pixel_grid() exposes 2x4 for 1x msaa */ + hw_grid_width = 4; + + if (nvc0->sample_locations_enabled) { + uint8_t locations[2 * 4 * 8]; + memcpy(locations, nvc0->sample_locations, sizeof(locations)); + util_sample_locations_flip_y( + &screen->base.base, nvc0->framebuffer.height, ms, locations); + + for (pixel = 0; pixel < hw_grid_width*grid_height; pixel++) { + for (sample = 0; sample < ms; sample++) { + unsigned pixel_x = pixel % hw_grid_width; + unsigned pixel_y = pixel / hw_grid_width; + unsigned wi = pixel * ms + sample; + unsigned ri = (pixel_y * grid_width + pixel_x % grid_width); + ri = ri * ms + sample; + sample_locations[wi][0] = locations[ri] & 0xf; + sample_locations[wi][1] = 16 - (locations[ri] >> 4); + } + } + } else { + const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms); + for (i = 0; i < 16; i++) { + sample_locations[i][0] = ptr[i % ms][0]; + sample_locations[i][1] = ptr[i % ms][1]; + } + } + + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, NVC0_CB_AUX_SIZE); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 64); + PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO); + for (pixel_y = 0; pixel_y < 4; pixel_y++) { + for (pixel_x = 0; pixel_x < 2; pixel_x++) { + for (sample = 0; sample < ms; sample++) { + unsigned write_index = (pixel_y * 2 + pixel_x) * 8 + sample; + unsigned read_index = pixel_y % grid_height * hw_grid_width; + uint8_t x, y; + read_index += pixel_x % grid_width; + read_index = read_index * ms + sample; + x = sample_locations[read_index][0]; + y = sample_locations[read_index][1]; + cb[write_index] = gm200_encode_cb_sample_location(x, y); + } + } + } + PUSH_DATAp(push, cb, 64); + + for (i = 0; i < 16; i++) { + packed_locations[i / 4] |= sample_locations[i][0] << ((i % 4) * 8); + packed_locations[i / 4] |= sample_locations[i][1] << ((i % 4) * 8 + 4); + } + + BEGIN_NVC0(push, SUBC_3D(0x11e0), 4); + PUSH_DATAp(push, packed_locations, 4); +} + +static void +nvc0_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_screen *screen = nvc0->screen; + unsigned i; + + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); + PUSH_DATA (push, NVC0_CB_AUX_SIZE); + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); + BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms); + PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO); + for (i = 0; i < ms; i++) { + float xy[2]; + nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy); + PUSH_DATAf(push, xy[0]); + PUSH_DATAf(push, xy[1]); + } +} + +static void +validate_sample_locations(struct nvc0_context *nvc0) +{ + unsigned ms = util_framebuffer_get_num_samples(&nvc0->framebuffer); + + if (nvc0->screen->base.class_3d >= GM200_3D_CLASS) + gm200_validate_sample_locations(nvc0, ms); + else + nvc0_validate_sample_locations(nvc0, ms); +} + static void nvc0_validate_fb(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct pipe_framebuffer_state *fb = &nvc0->framebuffer; - struct nvc0_screen *screen = nvc0->screen; - unsigned i, ms; + unsigned i; unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1; unsigned nr_cbufs = fb->nr_cbufs; bool serialize = false; @@ -197,33 +316,6 @@ nvc0_validate_fb(struct nvc0_context *nvc0) PUSH_DATA (push, (076543210 << 4) | nr_cbufs); IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode); - ms = 1 << ms_mode; - BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3); - PUSH_DATA (push, NVC0_CB_AUX_SIZE); - PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); - PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4)); - BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms); - PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO); - for (i = 0; i < ms; i++) { - float xy[2]; - nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy); - PUSH_DATAf(push, xy[0]); - PUSH_DATAf(push, xy[1]); - } - - if (screen->base.class_3d >= GM200_3D_CLASS) { - const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms); - uint32_t val[4] = {}; - - for (i = 0; i < 16; i++) { - val[i / 4] |= ptr[i % ms][0] << (((i % 4) * 8) + 0); - val[i / 4] |= ptr[i % ms][1] << (((i % 4) * 8) + 4); - } - - BEGIN_NVC0(push, SUBC_3D(0x11e0), 4); - PUSH_DATAp(push, val, 4); - } - if (serialize) IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0); @@ -879,6 +971,8 @@ validate_list_3d[] = { NVC0_NEW_3D_TEVLPROG | NVC0_NEW_3D_GMTYPROG }, { nvc0_validate_driverconst, NVC0_NEW_3D_DRIVERCONST }, + { validate_sample_locations, NVC0_NEW_3D_SAMPLE_LOCATIONS | + NVC0_NEW_3D_FRAMEBUFFER}, }; bool diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c index 0f86c11b7f4..39b1369758a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c @@ -754,6 +754,16 @@ nvc0_clear(struct pipe_context *pipe, unsigned buffers, } } +static void +gm200_evaluate_depth_buffer(struct pipe_context *pipe) +{ + struct nvc0_context *nvc0 = nvc0_context(pipe); + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + + nvc0_state_validate_3d(nvc0, NVC0_NEW_3D_FRAMEBUFFER); + IMMED_NVC0(push, SUBC_3D(0x11fc), 1); +} + /* =============================== BLIT CODE =================================== */ @@ -1720,4 +1730,6 @@ nvc0_init_surface_functions(struct nvc0_context *nvc0) pipe->clear_depth_stencil = nvc0_clear_depth_stencil; pipe->clear_texture = nv50_clear_texture; pipe->clear_buffer = nvc0_clear_buffer; + if (nvc0->screen->base.class_3d >= GM200_3D_CLASS) + pipe->evaluate_depth_buffer = gm200_evaluate_depth_buffer; } |