diff options
Diffstat (limited to 'src/gallium')
43 files changed, 2114 insertions, 657 deletions
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index 900c64df4b9..c6069927b25 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -37,13 +37,7 @@ struct cso_cache { - struct cso_hash *blend_hash; - struct cso_hash *depth_stencil_hash; - struct cso_hash *fs_hash; - struct cso_hash *vs_hash; - struct cso_hash *rasterizer_hash; - struct cso_hash *sampler_hash; - struct cso_hash *velements_hash; + struct cso_hash *hashes[CSO_CACHE_MAX]; int max_size; cso_sanitize_callback sanitize_cb; @@ -86,34 +80,10 @@ unsigned cso_construct_key(void *item, int item_size) return hash_key((item), item_size); } -static struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_type type) +static INLINE struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_type type) { - struct cso_hash *hash = 0; - - switch(type) { - case CSO_BLEND: - hash = sc->blend_hash; - break; - case CSO_SAMPLER: - hash = sc->sampler_hash; - break; - case CSO_DEPTH_STENCIL_ALPHA: - hash = sc->depth_stencil_hash; - break; - case CSO_RASTERIZER: - hash = sc->rasterizer_hash; - break; - case CSO_FRAGMENT_SHADER: - hash = sc->fs_hash; - break; - case CSO_VERTEX_SHADER: - hash = sc->vs_hash; - break; - case CSO_VELEMENTS: - hash = sc->velements_hash; - break; - } - + struct cso_hash *hash; + hash = sc->hashes[type]; return hash; } @@ -298,17 +268,14 @@ void * cso_take_state(struct cso_cache *sc, struct cso_cache *cso_cache_create(void) { struct cso_cache *sc = MALLOC_STRUCT(cso_cache); + int i; if (sc == NULL) return NULL; sc->max_size = 4096; - sc->blend_hash = cso_hash_create(); - sc->sampler_hash = cso_hash_create(); - sc->depth_stencil_hash = cso_hash_create(); - sc->rasterizer_hash = cso_hash_create(); - sc->fs_hash = cso_hash_create(); - sc->vs_hash = cso_hash_create(); - sc->velements_hash = cso_hash_create(); + for (i = 0; i < CSO_CACHE_MAX; i++) + sc->hashes[i] = cso_hash_create(); + sc->sanitize_cb = sanitize_cb; sc->sanitize_data = 0; @@ -318,33 +285,9 @@ struct cso_cache *cso_cache_create(void) void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, cso_state_callback func, void *user_data) { - struct cso_hash *hash = 0; + struct cso_hash *hash = _cso_hash_for_type(sc, type); struct cso_hash_iter iter; - switch (type) { - case CSO_BLEND: - hash = sc->blend_hash; - break; - case CSO_SAMPLER: - hash = sc->sampler_hash; - break; - case CSO_DEPTH_STENCIL_ALPHA: - hash = sc->depth_stencil_hash; - break; - case CSO_RASTERIZER: - hash = sc->rasterizer_hash; - break; - case CSO_FRAGMENT_SHADER: - hash = sc->fs_hash; - break; - case CSO_VERTEX_SHADER: - hash = sc->vs_hash; - break; - case CSO_VELEMENTS: - hash = sc->velements_hash; - break; - } - iter = cso_hash_first_node(hash); while (!cso_hash_iter_is_null(iter)) { void *state = cso_hash_iter_data(iter); @@ -357,6 +300,7 @@ void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, void cso_cache_delete(struct cso_cache *sc) { + int i; assert(sc); if (!sc) @@ -371,28 +315,20 @@ void cso_cache_delete(struct cso_cache *sc) cso_for_each_state(sc, CSO_SAMPLER, delete_sampler_state, 0); cso_for_each_state(sc, CSO_VELEMENTS, delete_velements, 0); - cso_hash_delete(sc->blend_hash); - cso_hash_delete(sc->sampler_hash); - cso_hash_delete(sc->depth_stencil_hash); - cso_hash_delete(sc->rasterizer_hash); - cso_hash_delete(sc->fs_hash); - cso_hash_delete(sc->vs_hash); - cso_hash_delete(sc->velements_hash); + for (i = 0; i < CSO_CACHE_MAX; i++) + cso_hash_delete(sc->hashes[i]); + FREE(sc); } void cso_set_maximum_cache_size(struct cso_cache *sc, int number) { + int i; + sc->max_size = number; - sanitize_hash(sc, sc->blend_hash, CSO_BLEND, sc->max_size); - sanitize_hash(sc, sc->depth_stencil_hash, CSO_DEPTH_STENCIL_ALPHA, - sc->max_size); - sanitize_hash(sc, sc->fs_hash, CSO_FRAGMENT_SHADER, sc->max_size); - sanitize_hash(sc, sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size); - sanitize_hash(sc, sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size); - sanitize_hash(sc, sc->sampler_hash, CSO_SAMPLER, sc->max_size); - sanitize_hash(sc, sc->velements_hash, CSO_VELEMENTS, sc->max_size); + for (i = 0; i < CSO_CACHE_MAX; i++) + sanitize_hash(sc, sc->hashes[i], i, sc->max_size); } int cso_maximum_cache_size(const struct cso_cache *sc) diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index fb09b83c623..1b17423c72b 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -86,13 +86,14 @@ extern "C" { #endif enum cso_cache_type { + CSO_RASTERIZER, CSO_BLEND, - CSO_SAMPLER, CSO_DEPTH_STENCIL_ALPHA, - CSO_RASTERIZER, CSO_FRAGMENT_SHADER, CSO_VERTEX_SHADER, - CSO_VELEMENTS + CSO_SAMPLER, + CSO_VELEMENTS, + CSO_CACHE_MAX, }; typedef void (*cso_state_callback)(void *ctx, void *obj); diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c index 4564ab81f99..a920741c36b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c @@ -72,7 +72,7 @@ struct ureg_tokens { #define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS #define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS #define UREG_MAX_CONSTANT_RANGE 32 -#define UREG_MAX_IMMEDIATE 32 +#define UREG_MAX_IMMEDIATE 256 #define UREG_MAX_TEMP 256 #define UREG_MAX_ADDR 2 #define UREG_MAX_PRED 1 diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 2ecade5f7e4..65a99fcb394 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -477,10 +477,13 @@ float_to_byte_tex(float f) static INLINE unsigned util_logbase2(unsigned n) { - unsigned log2 = 0; - while (n >>= 1) - ++log2; - return log2; + unsigned pos = 0; + if (n >= 1<<16) { n >>= 16; pos += 16; } + if (n >= 1<< 8) { n >>= 8; pos += 8; } + if (n >= 1<< 4) { n >>= 4; pos += 4; } + if (n >= 1<< 2) { n >>= 2; pos += 2; } + if (n >= 1<< 1) { pos += 1; } + return pos; } diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h index 3c851f73401..ca7c67d7c53 100644 --- a/src/gallium/auxiliary/util/u_prim.h +++ b/src/gallium/auxiliary/util/u_prim.h @@ -78,55 +78,32 @@ static INLINE boolean u_validate_pipe_prim( unsigned pipe_prim, unsigned nr ) static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr ) { boolean ok = TRUE; - - switch (pipe_prim) { - case PIPE_PRIM_POINTS: - ok = (*nr >= 1); - break; - case PIPE_PRIM_LINES: - ok = (*nr >= 2); - *nr -= (*nr % 2); - break; - case PIPE_PRIM_LINE_STRIP: - case PIPE_PRIM_LINE_LOOP: - ok = (*nr >= 2); - break; - case PIPE_PRIM_TRIANGLES: - ok = (*nr >= 3); - *nr -= (*nr % 3); - break; - case PIPE_PRIM_TRIANGLE_STRIP: - case PIPE_PRIM_TRIANGLE_FAN: - case PIPE_PRIM_POLYGON: - ok = (*nr >= 3); - break; - case PIPE_PRIM_QUADS: - ok = (*nr >= 4); - *nr -= (*nr % 4); - break; - case PIPE_PRIM_QUAD_STRIP: - ok = (*nr >= 4); - *nr -= (*nr % 2); - break; - case PIPE_PRIM_LINES_ADJACENCY: - ok = (*nr >= 4); - *nr -= (*nr % 4); - break; - case PIPE_PRIM_LINE_STRIP_ADJACENCY: - ok = (*nr >= 4); - break; - case PIPE_PRIM_TRIANGLES_ADJACENCY: - ok = (*nr >= 6); - *nr -= (*nr % 5); - break; - case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: - ok = (*nr >= 4); - break; - default: - ok = 0; - break; + const static int values[][2] = { + { 1, 0 }, /* PIPE_PRIM_POINTS */ + { 2, 2 }, /* PIPE_PRIM_LINES */ + { 2, 0 }, /* PIPE_PRIM_LINE_LOOP */ + { 2, 0 }, /* PIPE_PRIM_LINE_STRIP */ + { 3, 3 }, /* PIPE_PRIM_TRIANGLES */ + { 3, 0 }, /* PIPE_PRIM_TRIANGLE_STRIP */ + { 3, 0 }, /* PIPE_PRIM_TRIANGLE_FAN */ + { 4, 4 }, /* PIPE_PRIM_TRIANGLE_QUADS */ + { 4, 2 }, /* PIPE_PRIM_TRIANGLE_QUAD_STRIP */ + { 3, 0 }, /* PIPE_PRIM_TRIANGLE_POLYGON */ + { 4, 4 }, /* PIPE_PRIM_LINES_ADJACENCY */ + { 4, 0 }, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ + { 6, 5 }, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ + { 4, 0 }, /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ + }; + + if (unlikely(pipe_prim >= PIPE_PRIM_MAX)) { + *nr = 0; + return FALSE; } + ok = (*nr >= values[pipe_prim][0]); + if (values[pipe_prim][1]) + *nr -= (*nr % values[pipe_prim][1]); + if (!ok) *nr = 0; diff --git a/src/gallium/drivers/i915/i915_resource.h b/src/gallium/drivers/i915/i915_resource.h index 86620e6a123..c15ecdfc22a 100644 --- a/src/gallium/drivers/i915/i915_resource.h +++ b/src/gallium/drivers/i915/i915_resource.h @@ -45,8 +45,8 @@ struct i915_buffer { boolean free_on_destroy; }; -#define I915_MAX_TEXTURE_2D_LEVELS 11 /* max 1024x1024 */ -#define I915_MAX_TEXTURE_3D_LEVELS 8 /* max 128x128x128 */ +#define I915_MAX_TEXTURE_2D_LEVELS 12 /* max 2048x2048 */ +#define I915_MAX_TEXTURE_3D_LEVELS 9 /* max 256x256x256 */ struct offset_pair { diff --git a/src/gallium/drivers/nv50/nv50_pc_emit.c b/src/gallium/drivers/nv50/nv50_pc_emit.c index 252c58dd8ff..600850da011 100644 --- a/src/gallium/drivers/nv50/nv50_pc_emit.c +++ b/src/gallium/drivers/nv50/nv50_pc_emit.c @@ -744,8 +744,8 @@ emit_add_a16(struct nv_pc *pc, struct nv_instruction *i) set_pred(pc, i); - if (i->src[1]) - set_a16_bits(pc, SREG(i->src[1])->id + 1); + if (s && i->src[0]) + set_a16_bits(pc, SREG(i->src[0])->id); } static void diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c index 4ec77df8fb7..388ebcdbf32 100644 --- a/src/gallium/drivers/r300/r300_blit.c +++ b/src/gallium/drivers/r300/r300_blit.c @@ -30,14 +30,24 @@ enum r300_blitter_op /* bitmask */ { - R300_CLEAR = 1, - R300_CLEAR_SURFACE = 2, - R300_COPY = 4 + R300_STOP_QUERY = 1, + R300_SAVE_TEXTURES = 2, + R300_SAVE_FRAMEBUFFER = 4, + R300_IGNORE_RENDER_COND = 8, + + R300_CLEAR = R300_STOP_QUERY, + + R300_CLEAR_SURFACE = R300_STOP_QUERY | R300_SAVE_FRAMEBUFFER, + + R300_COPY = R300_STOP_QUERY | R300_SAVE_FRAMEBUFFER | + R300_SAVE_TEXTURES | R300_IGNORE_RENDER_COND, + + R300_DECOMPRESS = R300_STOP_QUERY | R300_IGNORE_RENDER_COND, }; static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op op) { - if (r300->query_current) { + if ((op & R300_STOP_QUERY) && r300->query_current) { r300->blitter_saved_query = r300->query_current; r300_stop_query(r300); } @@ -57,11 +67,11 @@ static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op o util_blitter_save_vertex_buffers(r300->blitter, r300->vbuf_mgr->nr_vertex_buffers, r300->vbuf_mgr->vertex_buffer); - if (op & (R300_CLEAR_SURFACE | R300_COPY)) { + if (op & R300_SAVE_FRAMEBUFFER) { util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state); } - if (op & R300_COPY) { + if (op & R300_SAVE_TEXTURES) { struct r300_textures_state* state = (struct r300_textures_state*)r300->textures_state.state; @@ -73,6 +83,14 @@ static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op o r300->blitter, state->sampler_view_count, (struct pipe_sampler_view**)state->sampler_views); } + + if (op & R300_IGNORE_RENDER_COND) { + /* Save the flag. */ + r300->blitter_saved_skip_rendering = r300->skip_rendering+1; + r300->skip_rendering = FALSE; + } else { + r300->blitter_saved_skip_rendering = 0; + } } static void r300_blitter_end(struct r300_context *r300) @@ -81,6 +99,11 @@ static void r300_blitter_end(struct r300_context *r300) r300_resume_query(r300, r300->blitter_saved_query); r300->blitter_saved_query = NULL; } + + if (r300->blitter_saved_skip_rendering) { + /* Restore the flag. */ + r300->skip_rendering = r300->blitter_saved_skip_rendering-1; + } } static uint32_t r300_depth_clear_cb_value(enum pipe_format format, @@ -234,6 +257,9 @@ static void r300_clear(struct pipe_context* pipe, /* Setup Hyper-Z clears. */ if (r300->hyperz_enabled) { + DBG(r300, DBG_HYPERZ, "r300: Clear memory: %s%s\n", + zmask_clear ? "ZMASK " : "", hiz_clear ? "HIZ" : ""); + if (zmask_clear) { hyperz_dcv = hyperz->zb_depthclearvalue = r300_depth_clear_value(fb->zsbuf->format, depth, stencil); @@ -371,7 +397,7 @@ void r300_decompress_zmask(struct r300_context *r300) r300->zmask_decompress = TRUE; r300_mark_atom_dirty(r300, &r300->hyperz_state); - r300_blitter_begin(r300, R300_CLEAR); + r300_blitter_begin(r300, R300_DECOMPRESS); util_blitter_clear_depth_custom(r300->blitter, fb->width, fb->height, 0, r300->dsa_decompress_zmask); r300_blitter_end(r300); diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 139dd210b8f..d71db0001a9 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -584,6 +584,8 @@ struct r300_context { uint32_t zbuffer_bpp; /* Whether rendering is conditional and should be skipped. */ boolean skip_rendering; + /* The flag above saved by blitter. */ + unsigned char blitter_saved_skip_rendering; /* Point sprites texcoord index, 1 bit per texcoord */ int sprite_coord_enable; /* Whether two-sided color selection is enabled (AKA light_twoside). */ diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index b60cfd1f248..9a0052ac6d1 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -27,7 +27,7 @@ #include <stdio.h> static const struct debug_named_value debug_options[] = { - { "info", DBG_INFO, "Print hardware info"}, + { "info", DBG_INFO, "Print hardware info (printed by default on debug builds"}, { "fp", DBG_FP, "Log fragment program compilation" }, { "vp", DBG_VP, "Log vertex program compilation" }, { "pstat", DBG_P_STAT, "Log vertex/fragment program stats" }, @@ -37,14 +37,11 @@ static const struct debug_named_value debug_options[] = { { "psc", DBG_PSC, "Log vertex stream registers" }, { "tex", DBG_TEX, "Log basic info about textures" }, { "texalloc", DBG_TEXALLOC, "Log texture mipmap tree info" }, - { "fall", DBG_FALL, "Log fallbacks" }, { "rs", DBG_RS, "Log rasterizer" }, { "fb", DBG_FB, "Log framebuffer" }, { "cbzb", DBG_CBZB, "Log fast color clear info" }, { "hyperz", DBG_HYPERZ, "Log HyperZ info" }, - { "upload", DBG_UPLOAD, "Log user buffer upload info" }, { "scissor", DBG_SCISSOR, "Log scissor info" }, - { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries" }, { "anisohq", DBG_ANISOHQ, "Use high quality anisotropic filtering" }, { "notiling", DBG_NO_TILING, "Disable tiling" }, { "noimmd", DBG_NO_IMMD, "Disable immediate mode" }, diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 874037ed9fd..d214af4cd5b 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -439,6 +439,19 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf->pitch_zmask); } + /* Set up a dummy zbuffer. Otherwise occlusion queries won't work. + * Use the first colorbuffer, we will disable writes in the DSA state + * so as not to corrupt it. */ + } else if (fb->nr_cbufs) { + surf = r300_surface(fb->cbufs[0]); + + OUT_CS_REG(R300_ZB_FORMAT, R300_DEPTHFORMAT_16BIT_INT_Z); + + OUT_CS_REG(R300_ZB_DEPTHOFFSET, 0); + OUT_CS_RELOC(surf); + + OUT_CS_REG(R300_ZB_DEPTHPITCH, 4 | R300_DEPTHMICROTILE_TILED_SQUARE); + OUT_CS_RELOC(surf); } END_CS; diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c index e946d61d0ed..0f021e9f4e8 100644 --- a/src/gallium/drivers/r300/r300_hyperz.c +++ b/src/gallium/drivers/r300/r300_hyperz.c @@ -195,6 +195,7 @@ static void r300_update_hyperz(struct r300_context* r300) } return; } + DBG(r300, DBG_HYPERZ, "r300: Z-func: %i\n", dsa->dsa.depth.func); /* Set the HiZ function if needed. */ if (r300->hiz_func == HIZ_FUNC_NONE) { diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 9752a519491..782f041e926 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -156,75 +156,23 @@ static void r300_render_condition(struct pipe_context *pipe, uint64_t result = 0; boolean wait; + r300->skip_rendering = FALSE; + if (query) { wait = mode == PIPE_RENDER_COND_WAIT || mode == PIPE_RENDER_COND_BY_REGION_WAIT; - if (!r300_get_query_result(pipe, query, wait, &result)) { - r300->skip_rendering = FALSE; - } else { + if (r300_get_query_result(pipe, query, wait, &result)) { r300->skip_rendering = result == 0; } - } else { - r300->skip_rendering = FALSE; } } -/*************************************************************************** - * Fake occlusion queries (for debugging) - ***************************************************************************/ - -static unsigned r300_fake_query; - -static struct pipe_query *r300_fake_create_query(struct pipe_context *pipe, - unsigned query_type) -{ - return (struct pipe_query*)&r300_fake_query; -} - -static void r300_fake_destroy_query(struct pipe_context* pipe, - struct pipe_query* query) -{ -} - -static void r300_fake_begin_query(struct pipe_context* pipe, - struct pipe_query* query) -{ -} - -static void r300_fake_end_query(struct pipe_context* pipe, - struct pipe_query* query) -{ -} - -static boolean r300_fake_get_query_result(struct pipe_context* pipe, - struct pipe_query* query, - boolean wait, void* vresult) -{ - uint64_t *result = (uint64_t*)vresult; - *result = 1000000; - return TRUE; -} - -static void r300_fake_render_condition(struct pipe_context *pipe, - struct pipe_query *query, uint mode) -{ -} - void r300_init_query_functions(struct r300_context* r300) { - if (DBG_ON(r300, DBG_FAKE_OCC)) { - r300->context.create_query = r300_fake_create_query; - r300->context.destroy_query = r300_fake_destroy_query; - r300->context.begin_query = r300_fake_begin_query; - r300->context.end_query = r300_fake_end_query; - r300->context.get_query_result = r300_fake_get_query_result; - r300->context.render_condition = r300_fake_render_condition; - } else { - r300->context.create_query = r300_create_query; - r300->context.destroy_query = r300_destroy_query; - r300->context.begin_query = r300_begin_query; - r300->context.end_query = r300_end_query; - r300->context.get_query_result = r300_get_query_result; - r300->context.render_condition = r300_render_condition; - } + r300->context.create_query = r300_create_query; + r300->context.destroy_query = r300_destroy_query; + r300->context.begin_query = r300_begin_query; + r300->context.end_query = r300_end_query; + r300->context.get_query_result = r300_get_query_result; + r300->context.render_condition = r300_render_condition; } diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 429b85545f7..b24e7faa644 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -1200,6 +1200,9 @@ static void r300_blitter_draw_rectangle(struct blitter_context *blitter, const float zeros[4] = {0, 0, 0, 0}; CS_LOCALS(r300); + if (r300->skip_rendering) + return; + r300->context.set_vertex_buffers(&r300->context, 0, NULL); if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index bca86edb1d7..e5c53bf3500 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -88,23 +88,20 @@ radeon_winsys(struct pipe_screen *screen) { #define DBG_TEX (1 << 5) #define DBG_TEXALLOC (1 << 6) #define DBG_RS (1 << 7) -#define DBG_FALL (1 << 8) -#define DBG_FB (1 << 9) -#define DBG_RS_BLOCK (1 << 10) -#define DBG_CBZB (1 << 11) -#define DBG_HYPERZ (1 << 12) -#define DBG_SCISSOR (1 << 13) -#define DBG_UPLOAD (1 << 14) -#define DBG_INFO (1 << 15) +#define DBG_FB (1 << 8) +#define DBG_RS_BLOCK (1 << 9) +#define DBG_CBZB (1 << 10) +#define DBG_HYPERZ (1 << 11) +#define DBG_SCISSOR (1 << 12) +#define DBG_INFO (1 << 13) /* Features. */ #define DBG_ANISOHQ (1 << 16) #define DBG_NO_TILING (1 << 17) #define DBG_NO_IMMD (1 << 18) -#define DBG_FAKE_OCC (1 << 19) -#define DBG_NO_OPT (1 << 20) -#define DBG_NO_CBZB (1 << 21) -#define DBG_NO_ZMASK (1 << 22) -#define DBG_NO_HIZ (1 << 23) +#define DBG_NO_OPT (1 << 19) +#define DBG_NO_CBZB (1 << 20) +#define DBG_NO_ZMASK (1 << 21) +#define DBG_NO_HIZ (1 << 22) /* Statistics. */ #define DBG_P_STAT (1 << 25) /*@}*/ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index bc6c67dd034..7127ea1ac16 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -549,6 +549,10 @@ static void* dsa->z_stencil_control |= (r300_translate_depth_stencil_function(state->depth.func) << R300_Z_FUNC_SHIFT); + } else { + /* We must enable depth test, otherwise occlusion queries won't work. */ + dsa->z_buffer_control |= R300_Z_ENABLE; + dsa->z_stencil_control |= R300_ZS_ALWAYS; } /* Stencil buffer setup. */ @@ -632,11 +636,13 @@ static void* OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value); END_CB; + /* We must enable depth test, otherwise occlusion queries won't work. + * We setup a dummy zbuffer to silent the CS checker, see emit_fb_state. */ BEGIN_CB(dsa->cb_zb_no_readwrite, 10); OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); OUT_CB_REG_SEQ(R300_ZB_CNTL, 3); - OUT_CB(0); - OUT_CB(0); + OUT_CB(R300_Z_ENABLE); + OUT_CB(R300_ZS_ALWAYS); OUT_CB(0); OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0); OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value); @@ -645,8 +651,8 @@ static void* BEGIN_CB(dsa->cb_fp16_zb_no_readwrite, 10); OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function_fp16); OUT_CB_REG_SEQ(R300_ZB_CNTL, 3); - OUT_CB(0); - OUT_CB(0); + OUT_CB(R300_Z_ENABLE); + OUT_CB(R300_ZS_ALWAYS); OUT_CB(0); OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0); OUT_CB_REG(R500_FG_ALPHA_VALUE, dsa->alpha_value); @@ -792,12 +798,14 @@ void r300_mark_fb_state_dirty(struct r300_context *r300, /* Now compute the fb_state atom size. */ r300->fb_state.size = 2 + (8 * state->nr_cbufs); - if (r300->cbzb_clear) + if (r300->cbzb_clear) { r300->fb_state.size += 10; - else if (state->zsbuf) { + } else if (state->zsbuf) { r300->fb_state.size += 10; if (r300->hyperz_enabled) r300->fb_state.size += 8; + } else if (state->nr_cbufs) { + r300->fb_state.size += 10; } /* The size of the rest of atoms stays the same. */ @@ -1457,6 +1465,8 @@ r300_create_sampler_view(struct pipe_context *pipe, boolean dxtc_swizzle = r300_screen(pipe->screen)->caps.dxtc_swizzle; if (view) { + unsigned hwformat; + view->base = *templ; view->base.reference.count = 1; view->base.context = pipe; @@ -1468,11 +1478,19 @@ r300_create_sampler_view(struct pipe_context *pipe, view->swizzle[2] = templ->swizzle_b; view->swizzle[3] = templ->swizzle_a; + hwformat = r300_translate_texformat(templ->format, + view->swizzle, + is_r500, + dxtc_swizzle); + + if (hwformat == ~0) { + fprintf(stderr, "r300: Ooops. Got unsupported format %s in %s.\n", + util_format_short_name(templ->format), __func__); + } + assert(hwformat != ~0); + view->format = tex->tx_format; - view->format.format1 |= r300_translate_texformat(templ->format, - view->swizzle, - is_r500, - dxtc_swizzle); + view->format.format1 |= hwformat; if (is_r500) { view->format.format2 |= r500_tx_format_msb_bit(templ->format); } diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index 3793b919dde..fb0b0f104bf 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -62,14 +62,17 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); - bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | + bc->bytecode[id] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) | - S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) | - S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); + S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst); + if (bc->chiprev == CHIPREV_EVERGREEN) /* no EOP on cayman */ + bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); + id++; + break; case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP: case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE: @@ -80,6 +83,7 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN: + case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | S_SQ_CF_WORD1_BARRIER(1) | diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 187f00e1e52..54f5410c324 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -71,15 +71,19 @@ static void evergreen_set_blend_color(struct pipe_context *ctx, static void *evergreen_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_blend *blend = CALLOC_STRUCT(r600_pipe_blend); struct r600_pipe_state *rstate; u32 color_control, target_mask; /* FIXME there is more then 8 framebuffer */ unsigned blend_cntl[8]; + enum radeon_family family; if (blend == NULL) { return NULL; } + + family = r600_get_family(rctx->radeon); rstate = &blend->rstate; rstate->id = R600_PIPE_STATE_BLEND; @@ -102,9 +106,16 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, } } blend->cb_target_mask = target_mask; + r600_pipe_state_add_reg(rstate, R_028808_CB_COLOR_CONTROL, color_control, 0xFFFFFFFD, NULL); - r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); + + if (family != CHIP_CAYMAN) + r600_pipe_state_add_reg(rstate, R_028C3C_PA_SC_AA_MASK, 0xFFFFFFFF, 0xFFFFFFFF, NULL); + else { + r600_pipe_state_add_reg(rstate, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0xFFFFFFFF, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, 0xFFFFFFFF, 0xFFFFFFFF, NULL); + } for (int i = 0; i < 8; i++) { /* state->rt entries > 0 only written if independent blending */ @@ -143,6 +154,7 @@ static void *evergreen_create_blend_state(struct pipe_context *ctx, static void *evergreen_create_dsa_state(struct pipe_context *ctx, const struct pipe_depth_stencil_alpha_state *state) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_dsa *dsa = CALLOC_STRUCT(r600_pipe_dsa); unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control; @@ -229,11 +241,15 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, static void *evergreen_create_rs_state(struct pipe_context *ctx, const struct pipe_rasterizer_state *state) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer); struct r600_pipe_state *rstate; unsigned tmp; unsigned prov_vtx = 1, polygon_dual_mode; unsigned clip_rule; + enum radeon_family family; + + family = r600_get_family(rctx->radeon); if (rs == NULL) { return NULL; @@ -290,17 +306,30 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx, tmp = (unsigned)state->line_width * 8; r600_pipe_state_add_reg(rstate, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL); + if (family == CHIP_CAYMAN) { + r600_pipe_state_add_reg(rstate, CM_R_028BDC_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028BE4_PA_SU_VTX_CNTL, + S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), + 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, - S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), - 0xFFFFFFFF, NULL); + } else { + r600_pipe_state_add_reg(rstate, R_028C00_PA_SC_LINE_CNTL, 0x00000400, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C10_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C18_PA_CL_GB_HORZ_DISC_ADJ, 0x3F800000, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028C08_PA_SU_VTX_CNTL, + S_028C08_PIX_CENTER_HALF(state->gl_rasterization_rules), + 0xFFFFFFFF, NULL); + } + r600_pipe_state_add_reg(rstate, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0x0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL); return rstate; } @@ -318,7 +347,7 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_SAMPLER; util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, + r600_pipe_state_add_reg_noblock(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | @@ -328,21 +357,21 @@ static void *evergreen_create_sampler_state(struct pipe_context *ctx, S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) | S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0, + r600_pipe_state_add_reg_noblock(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0, S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, - S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | - (state->seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) | - S_03C008_TYPE(1), - 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, + S_03C008_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | + (state->seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) | + S_03C008_TYPE(1), + 0xFFFFFFFF, NULL); if (uc.ui) { - r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg_noblock(rstate, R_00A410_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); } return rstate; } @@ -351,6 +380,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte struct pipe_resource *texture, const struct pipe_sampler_view *state) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); struct r600_pipe_state *rstate; const struct util_format_description *desc; @@ -832,10 +862,14 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = CALLOC_STRUCT(r600_pipe_state); u32 shader_mask, tl, br, target_mask; + enum radeon_family family; + int tl_x, tl_y, br_x, br_y; if (rstate == NULL) return; + family = r600_get_family(rctx->radeon); + evergreen_context_flush_dest_caches(&rctx->ctx); rctx->ctx.num_dest_buffers = state->nr_cbufs; @@ -860,8 +894,22 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, target_mask ^= 0xf << (i * 4); shader_mask |= 0xf << (i * 4); } - tl = S_028240_TL_X(0) | S_028240_TL_Y(0); - br = S_028244_BR_X(state->width) | S_028244_BR_Y(state->height); + tl_x = 0; + tl_y = 0; + br_x = state->width; + br_y = state->height; + /* EG hw workaround */ + if (br_x == 0) + tl_x = 1; + if (br_y == 0) + tl_y = 1; + /* cayman hw workaround */ + if (family == CHIP_CAYMAN) { + if (br_x == 1 && br_y == 1) + br_x = 2; + } + tl = S_028240_TL_X(tl_x) | S_028240_TL_Y(tl_y); + br = S_028244_BR_X(br_x) | S_028244_BR_Y(br_y); r600_pipe_state_add_reg(rstate, R_028240_PA_SC_GENERIC_SCISSOR_TL, tl, @@ -898,10 +946,17 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, 0x00000000, target_mask, NULL); r600_pipe_state_add_reg(rstate, R_02823C_CB_SHADER_MASK, shader_mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, - 0x00000000, 0xFFFFFFFF, NULL); + + + if (family == CHIP_CAYMAN) { + r600_pipe_state_add_reg(rstate, CM_R_028BE0_PA_SC_AA_CONFIG, + 0x00000000, 0xFFFFFFFF, NULL); + } else { + r600_pipe_state_add_reg(rstate, R_028C04_PA_SC_AA_CONFIG, + 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX, + 0x00000000, 0xFFFFFFFF, NULL); + } free(rctx->states[R600_PIPE_STATE_FRAMEBUFFER]); rctx->states[R600_PIPE_STATE_FRAMEBUFFER] = rstate; @@ -968,6 +1023,85 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx) rctx->context.texture_barrier = evergreen_texture_barrier; } +static void cayman_init_config(struct r600_pipe_context *rctx) +{ + struct r600_pipe_state *rstate = &rctx->config; + unsigned tmp; + + tmp = 0x00000000; + tmp |= S_008C00_EXPORT_SRC_C(1); + r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, (4 << 28), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A4C_PA_SC_MODE_CNTL_1, 0x0, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028A10_VGT_OUTPUT_PATH_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A14_VGT_HOS_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A20_VGT_HOS_REUSE_DEPTH, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A24_VGT_GROUP_PRIM_TYPE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A28_VGT_GROUP_FIRST_DECR, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A2C_VGT_GROUP_DECR, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A30_VGT_GROUP_VECT_0_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A34_VGT_GROUP_VECT_1_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028A40_VGT_GS_MODE, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B94_VGT_STRMOUT_CONFIG, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB4_VGT_REUSE_OFF, 0x00000000, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028AB8_VGT_VTX_CNT_EN, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028380_SQ_VTX_SEMANTIC_0, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028384_SQ_VTX_SEMANTIC_1, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028388_SQ_VTX_SEMANTIC_2, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02838C_SQ_VTX_SEMANTIC_3, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028390_SQ_VTX_SEMANTIC_4, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028394_SQ_VTX_SEMANTIC_5, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028398_SQ_VTX_SEMANTIC_6, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_02839C_SQ_VTX_SEMANTIC_7, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283A0_SQ_VTX_SEMANTIC_8, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283A4_SQ_VTX_SEMANTIC_9, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283A8_SQ_VTX_SEMANTIC_10, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283AC_SQ_VTX_SEMANTIC_11, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283B0_SQ_VTX_SEMANTIC_12, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283B4_SQ_VTX_SEMANTIC_13, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283B8_SQ_VTX_SEMANTIC_14, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283BC_SQ_VTX_SEMANTIC_15, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283C0_SQ_VTX_SEMANTIC_16, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283C4_SQ_VTX_SEMANTIC_17, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283C8_SQ_VTX_SEMANTIC_18, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283CC_SQ_VTX_SEMANTIC_19, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283D0_SQ_VTX_SEMANTIC_20, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283D4_SQ_VTX_SEMANTIC_21, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283D8_SQ_VTX_SEMANTIC_22, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283DC_SQ_VTX_SEMANTIC_23, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283E0_SQ_VTX_SEMANTIC_24, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283E4_SQ_VTX_SEMANTIC_25, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283E8_SQ_VTX_SEMANTIC_26, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283EC_SQ_VTX_SEMANTIC_27, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283F0_SQ_VTX_SEMANTIC_28, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283F4_SQ_VTX_SEMANTIC_29, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283F8_SQ_VTX_SEMANTIC_30, 0x0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_0283FC_SQ_VTX_SEMANTIC_31, 0x0, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, R_028810_PA_CL_CLIP_CNTL, 0x0, 0xFFFFFFFF, NULL); + + r600_pipe_state_add_reg(rstate, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210, 0xffffffff, 0); + r600_pipe_state_add_reg(rstate, CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98, 0xffffffff, 0); + + r600_pipe_state_add_reg(rstate, CM_R_0288E8_SQ_LDS_ALLOC, 0, 0xffffffff, NULL); + r600_pipe_state_add_reg(rstate, R_0288EC_SQ_LDS_ALLOC_PS, 0, 0xffffffff, NULL); + + r600_pipe_state_add_reg(rstate, CM_R_028804_DB_EQAA, 0x110000, 0xffffffff, NULL); + r600_context_pipe_state_set(&rctx->ctx, rstate); +} + void evergreen_init_config(struct r600_pipe_context *rctx) { struct r600_pipe_state *rstate = &rctx->config; @@ -999,6 +1133,12 @@ void evergreen_init_config(struct r600_pipe_context *rctx) unsigned tmp; family = r600_get_family(rctx->radeon); + + if (family == CHIP_CAYMAN) { + cayman_init_config(rctx); + return; + } + ps_prio = 0; vs_prio = 1; gs_prio = 2; @@ -1115,6 +1255,48 @@ void evergreen_init_config(struct r600_pipe_context *rctx) num_hs_stack_entries = 42; num_ls_stack_entries = 42; break; + case CHIP_SUMO: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 96; + num_vs_threads = 25; + num_gs_threads = 25; + num_es_threads = 25; + num_hs_threads = 25; + num_ls_threads = 25; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_SUMO2: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 96; + num_vs_threads = 25; + num_gs_threads = 25; + num_es_threads = 25; + num_hs_threads = 25; + num_ls_threads = 25; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; case CHIP_BARTS: num_ps_gprs = 93; num_vs_gprs = 46; @@ -1184,6 +1366,8 @@ void evergreen_init_config(struct r600_pipe_context *rctx) switch (family) { case CHIP_CEDAR: case CHIP_PALM: + case CHIP_SUMO: + case CHIP_SUMO2: case CHIP_CAICOS: break; default: @@ -1374,6 +1558,7 @@ void evergreen_polygon_offset_update(struct r600_pipe_context *rctx) void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control; @@ -1502,6 +1687,7 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; unsigned spi_vs_out_id[10]; @@ -1545,8 +1731,10 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader 0xFFFFFFFF, NULL); } -void evergreen_fetch_shader(struct r600_vertex_element *ve) +void evergreen_fetch_shader(struct pipe_context *ctx, + struct r600_vertex_element *ve) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = &ve->rstate; rstate->id = R600_PIPE_STATE_FETCH_SHADER; rstate->nregs = 0; @@ -1580,11 +1768,13 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) return rstate; } -void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx, - struct r600_pipe_state *rstate, - struct r600_resource *rbuffer, - unsigned offset, unsigned stride) +void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride) { + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, @@ -1607,3 +1797,17 @@ void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx, r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, 0xC0000000, 0xFFFFFFFF, NULL); } + + +void evergreen_pipe_mod_buffer_resource(struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride) +{ + rstate->nregs = 0; + r600_pipe_state_mod_reg_bo(rstate, offset, rbuffer->bo); + r600_pipe_state_mod_reg(rstate, rbuffer->bo_size - offset - 1); + r600_pipe_state_mod_reg(rstate, S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | + S_030008_STRIDE(stride)); + rstate->nregs = 8; + +} diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 3e878106bea..ee0c7c9ed9b 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -33,15 +33,19 @@ #define EVERGREEN_CONTEXT_REG_END 0X00029000 #define EVERGREEN_RESOURCE_OFFSET 0x00030000 #define EVERGREEN_RESOURCE_END 0x00034000 +#define CAYMAN_RESOURCE_END 0x00038000 #define EVERGREEN_LOOP_CONST_OFFSET 0x0003A200 #define EVERGREEN_LOOP_CONST_END 0x0003A26C #define EVERGREEN_BOOL_CONST_OFFSET 0x0003A500 #define EVERGREEN_BOOL_CONST_END 0x0003A506 +#define CAYMAN_BOOL_CONST_END 0x0003A518 #define EVERGREEN_SAMPLER_OFFSET 0X0003C000 #define EVERGREEN_SAMPLER_END 0X0003CFF0 +#define CAYMAN_SAMPLER_END 0X0003C600 #define EVERGREEN_CTL_CONST_OFFSET 0x0003CFF0 #define EVERGREEN_CTL_CONST_END 0x0003E200 +#define CAYMAN_CTL_CONST_END 0x0003FF0C #define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 #define EVENT_TYPE_ZPASS_DONE 0x15 @@ -1907,4 +1911,43 @@ #define ENDIAN_8IN32 2 #define ENDIAN_8IN64 3 +#define CM_R_0288E8_SQ_LDS_ALLOC 0x000288E8 + +#define CM_R_028804_DB_EQAA 0x00028804 + +#define CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0 0x00028BD4 +#define CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1 0x00028BD8 +#define CM_R_028BDC_PA_SC_LINE_CNTL 0x28bdc +#define CM_R_028BE0_PA_SC_AA_CONFIG 0x28be0 +#define CM_R_028BE4_PA_SU_VTX_CNTL 0x28be4 +#define CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ 0x28be8 +#define CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ 0x28bec +#define CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ 0x28bf0 +#define CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ 0x28bf4 + +#define CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 0x28bf8 +#define CM_R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1 0x28bfc +#define CM_R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2 0x28c00 +#define CM_R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3 0x28c04 + +#define CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0 0x28c08 +#define CM_R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1 0x28c0c +#define CM_R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2 0x28c10 +#define CM_R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3 0x28c14 + +#define CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0 0x28c18 +#define CM_R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1 0x28c1c +#define CM_R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2 0x28c20 +#define CM_R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3 0x28c24 + +#define CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0 0x28c28 +#define CM_R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1 0x28c2c +#define CM_R_028C30_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2 0x28c30 +#define CM_R_028C34_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3 0x28c34 + +#define CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0 0x28c38 +#define CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1 0x28c3c + +#define CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 0x00008C10 +#define CM_R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 0x00008C14 #endif diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 7b57fc80dc2..23e7181a86e 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -92,9 +92,12 @@ enum radeon_family { CHIP_CYPRESS, CHIP_HEMLOCK, CHIP_PALM, + CHIP_SUMO, + CHIP_SUMO2, CHIP_BARTS, CHIP_TURKS, CHIP_CAICOS, + CHIP_CAYMAN, CHIP_LAST, }; @@ -102,6 +105,7 @@ enum chip_class { R600, R700, EVERGREEN, + CAYMAN, }; struct r600_tiling_info { @@ -141,11 +145,23 @@ static INLINE unsigned r600_bo_offset(struct r600_bo *bo) #define R600_BLOCK_MAX_BO 32 #define R600_BLOCK_MAX_REG 128 +/* each range covers 9 bits of dword space = 512 dwords = 2k bytes */ +/* there is a block entry for each register so 512 blocks */ +/* we have no registers to read/write below 0x8000 (0x2000 in dw space) */ +/* we use some fake offsets at 0x40000 to do evergreen sampler borders so take 0x42000 as a max bound*/ +#define RANGE_OFFSET_START 0x8000 +#define HASH_SHIFT 9 +#define NUM_RANGES (0x42000 - RANGE_OFFSET_START) / (4 << HASH_SHIFT) /* 128 << 9 = 64k */ + +#define CTX_RANGE_ID(offset) ((((offset - RANGE_OFFSET_START) >> 2) >> HASH_SHIFT) & 255) +#define CTX_BLOCK_ID(offset) (((offset - RANGE_OFFSET_START) >> 2) & ((1 << HASH_SHIFT) - 1)) + struct r600_pipe_reg { - u32 offset; - u32 mask; u32 value; - struct r600_bo *bo; + u32 mask; + struct r600_block *block; + struct r600_bo *bo; + u32 id; }; struct r600_pipe_state { @@ -154,18 +170,6 @@ struct r600_pipe_state { struct r600_pipe_reg regs[R600_BLOCK_MAX_REG]; }; -static inline void r600_pipe_state_add_reg(struct r600_pipe_state *state, - u32 offset, u32 value, u32 mask, - struct r600_bo *bo) -{ - state->regs[state->nregs].offset = offset; - state->regs[state->nregs].value = value; - state->regs[state->nregs].mask = mask; - state->regs[state->nregs].bo = bo; - state->nregs++; - assert(state->nregs < R600_BLOCK_MAX_REG); -} - #define R600_BLOCK_STATUS_ENABLED (1 << 0) #define R600_BLOCK_STATUS_DIRTY (1 << 1) @@ -307,4 +311,30 @@ void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struc struct radeon *radeon_decref(struct radeon *radeon); +void _r600_pipe_state_add_reg(struct r600_context *ctx, + struct r600_pipe_state *state, + u32 offset, u32 value, u32 mask, + u32 range_id, u32 block_id, + struct r600_bo *bo); + +void r600_pipe_state_add_reg_noblock(struct r600_pipe_state *state, + u32 offset, u32 value, u32 mask, + struct r600_bo *bo); +#define r600_pipe_state_add_reg(state, offset, value, mask, bo) _r600_pipe_state_add_reg(&rctx->ctx, state, offset, value, mask, CTX_RANGE_ID(offset), CTX_BLOCK_ID(offset), bo) + +static inline void r600_pipe_state_mod_reg(struct r600_pipe_state *state, + u32 value) +{ + state->regs[state->nregs].value = value; + state->nregs++; +} + +static inline void r600_pipe_state_mod_reg_bo(struct r600_pipe_state *state, + u32 value, struct r600_bo *bo) +{ + state->regs[state->nregs].value = value; + state->regs[state->nregs].bo = bo; + state->nregs++; +} + #endif diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 033e84665f5..65e539eba35 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -94,6 +94,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r } break; case CHIPREV_EVERGREEN: + case CHIPREV_CAYMAN: switch (alu->inst) { case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: return 0; @@ -221,11 +222,16 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family) case CHIP_CYPRESS: case CHIP_HEMLOCK: case CHIP_PALM: + case CHIP_SUMO: + case CHIP_SUMO2: case CHIP_BARTS: case CHIP_TURKS: case CHIP_CAICOS: bc->chiprev = CHIPREV_EVERGREEN; break; + case CHIP_CAYMAN: + bc->chiprev = CHIPREV_CAYMAN; + break; default: R600_ERR("unknown family %d\n", bc->family); return -EINVAL; @@ -334,6 +340,7 @@ static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu) alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT); case CHIPREV_EVERGREEN: + case CHIPREV_CAYMAN: default: return !alu->is_op3 && ( alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE || @@ -384,6 +391,7 @@ static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu) alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4); case CHIPREV_EVERGREEN: + case CHIPREV_CAYMAN: default: return !alu->is_op3 && ( alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE || @@ -401,6 +409,7 @@ static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu) return !alu->is_op3 && alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; case CHIPREV_EVERGREEN: + case CHIPREV_CAYMAN: default: return !alu->is_op3 && alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE; @@ -417,6 +426,7 @@ static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu) alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR || alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); case CHIPREV_EVERGREEN: + case CHIPREV_CAYMAN: default: return !alu->is_op3 && ( alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT); @@ -469,6 +479,7 @@ static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu) alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 || alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4; case CHIPREV_EVERGREEN: + case CHIPREV_CAYMAN: default: if (!alu->is_op3) /* Note that FLT_TO_INT_* instructions are vector-only instructions @@ -514,13 +525,16 @@ static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first, { struct r600_bc_alu *alu; unsigned i, chan, trans; + int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; - for (i = 0; i < 5; i++) + for (i = 0; i < max_slots; i++) assignment[i] = NULL; for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) { chan = alu->dst.chan; - if (is_alu_trans_unit_inst(bc, alu)) + if (max_slots == 4) + trans = 0; + else if (is_alu_trans_unit_inst(bc, alu)) trans = 1; else if (is_alu_vec_unit_inst(bc, alu)) trans = 0; @@ -719,8 +733,10 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, struct alu_bank_swizzle bs; int bank_swizzle[5]; int i, r = 0, forced = 0; - boolean scalar_only = true; - for (i = 0; i < 5; i++) { + boolean scalar_only = bc->chiprev == CHIPREV_CAYMAN ? false : true; + int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; + + for (i = 0; i < max_slots; i++) { if (slots[i] && slots[i]->bank_swizzle_force) { slots[i]->bank_swizzle = slots[i]->bank_swizzle_force; forced = 1; @@ -737,6 +753,13 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, bank_swizzle[i] = SQ_ALU_VEC_012; bank_swizzle[4] = SQ_ALU_SCL_210; while(bank_swizzle[4] <= SQ_ALU_SCL_221) { + + if (max_slots == 4) { + for (i = 0; i < max_slots; i++) { + if (bank_swizzle[i] == SQ_ALU_VEC_210) + return -1; + } + } init_bank_swizzle(&bs); if (scalar_only == false) { for (i = 0; i < 4; i++) { @@ -749,11 +772,11 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, } else r = 0; - if (!r && slots[4]) { + if (!r && slots[4] && max_slots == 5) { r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]); } if (!r) { - for (i = 0; i < 5; i++) { + for (i = 0; i < max_slots; i++) { if (slots[i]) slots[i]->bank_swizzle = bank_swizzle[i]; } @@ -763,7 +786,7 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, if (scalar_only) { bank_swizzle[4]++; } else { - for (i = 0; i < 5; i++) { + for (i = 0; i < max_slots; i++) { bank_swizzle[i]++; if (bank_swizzle[i] <= SQ_ALU_VEC_210) break; @@ -783,12 +806,13 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, struct r600_bc_alu *prev[5]; int gpr[5], chan[5]; int i, j, r, src, num_src; + int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; r = assign_alu_units(bc, alu_prev, prev); if (r) return r; - for (i = 0; i < 5; ++i) { + for (i = 0; i < max_slots; ++i) { if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) { gpr[i] = prev[i]->dst.sel; /* cube writes more than PV.X */ @@ -800,7 +824,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, gpr[i] = -1; } - for (i = 0; i < 5; ++i) { + for (i = 0; i < max_slots; ++i) { struct r600_bc_alu *alu = slots[i]; if(!alu) continue; @@ -810,11 +834,13 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc, if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) continue; - if (alu->src[src].sel == gpr[4] && - alu->src[src].chan == chan[4]) { - alu->src[src].sel = V_SQ_ALU_SRC_PS; - alu->src[src].chan = 0; - continue; + if (bc->chiprev < CHIPREV_CAYMAN) { + if (alu->src[src].sel == gpr[4] && + alu->src[src].chan == chan[4]) { + alu->src[src].sel = V_SQ_ALU_SRC_PS; + alu->src[src].chan = 0; + continue; + } } for (j = 0; j < 4; ++j) { @@ -922,12 +948,13 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], int i, j, r, src, num_src; int num_once_inst = 0; int have_mova = 0, have_rel = 0; + int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; r = assign_alu_units(bc, alu_prev, prev); if (r) return r; - for (i = 0; i < 5; ++i) { + for (i = 0; i < max_slots; ++i) { struct r600_bc_alu *alu; /* check number of literals */ @@ -951,7 +978,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], result[i] = prev[i]; continue; } else if (prev[i] && slots[i]) { - if (result[4] == NULL && prev[4] == NULL && slots[4] == NULL) { + if (max_slots == 5 && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) { /* Trans unit is still free try to use it. */ if (is_alu_any_unit_inst(bc, slots[i])) { result[i] = prev[i]; @@ -991,7 +1018,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], if (!is_gpr(alu->src[src].sel)) continue; - for (j = 0; j < 5; ++j) { + for (j = 0; j < max_slots; ++j) { if (!prev[j] || !prev[j]->dst.write) continue; @@ -1019,7 +1046,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], bc->cf_last->ndw -= align(prev_nliteral, 2); /* sort instructions */ - for (i = 0; i < 5; ++i) { + for (i = 0; i < max_slots; ++i) { slots[i] = result[i]; if (result[i]) { LIST_DEL(&result[i]->list); @@ -1032,7 +1059,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1; /* determine new first instruction */ - for (i = 0; i < 5; ++i) { + for (i = 0; i < max_slots; ++i) { if (result[i]) { bc->cf_last->curr_bs_head = result[i]; break; @@ -1225,6 +1252,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int uint32_t literal[4]; unsigned nliteral; struct r600_bc_alu *slots[5]; + int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5; r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots); if (r) return r; @@ -1245,7 +1273,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int if (r) return r; - for (i = 0, nliteral = 0; i < 5; i++) { + for (i = 0, nliteral = 0; i < max_slots; i++) { if (slots[i]) { r = r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral); if (r) @@ -1282,6 +1310,7 @@ static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc) return 16; case CHIPREV_EVERGREEN: + case CHIPREV_CAYMAN: return 64; default: @@ -1290,6 +1319,19 @@ static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc) } } +static inline boolean last_inst_was_vtx_fetch(struct r600_bc *bc) +{ + if (bc->chiprev == CHIPREV_CAYMAN) { + if (bc->cf_last->inst != CM_V_SQ_CF_WORD1_SQ_CF_INST_TC) + return TRUE; + } else { + if (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX && + bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) + return TRUE; + } + return FALSE; +} + int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) { struct r600_bc_vtx *nvtx = r600_bc_vtx(); @@ -1301,15 +1343,17 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx) /* cf can contains only alu or only vtx or only tex */ if (bc->cf_last == NULL || - (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX && - bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) || - bc->force_add_cf) { + last_inst_was_vtx_fetch(bc) || + bc->force_add_cf) { r = r600_bc_add_cf(bc); if (r) { free(nvtx); return r; } - bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX; + if (bc->chiprev == CHIPREV_CAYMAN) + bc->cf_last->inst = CM_V_SQ_CF_WORD1_SQ_CF_INST_TC; + else + bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX; } LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx); /* each fetch use 4 dwords */ @@ -1379,14 +1423,21 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst) return 0; } +int cm_bc_add_cf_end(struct r600_bc *bc) +{ + return r600_bc_add_cfinst(bc, CM_V_SQ_CF_WORD1_SQ_CF_INST_END); +} + /* common to all 3 families */ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) { - bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | + bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | - S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x) | - S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); + S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x); + if (bc->chiprev < CHIPREV_CAYMAN) + bc->bytecode[id] |= S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count); + id++; bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) | S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) | S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) | @@ -1397,9 +1448,11 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) | S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) | S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr); - bc->bytecode[id++] = S_SQ_VTX_WORD2_OFFSET(vtx->offset) | - S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian) | - S_SQ_VTX_WORD2_MEGA_FETCH(1); + bc->bytecode[id] = S_SQ_VTX_WORD2_OFFSET(vtx->offset)| + S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian); + if (bc->chiprev < CHIPREV_CAYMAN) + bc->bytecode[id] |= S_SQ_VTX_WORD2_MEGA_FETCH(1); + id++; bc->bytecode[id++] = 0; return 0; } @@ -1601,6 +1654,7 @@ int r600_bc_build(struct r600_bc *bc) case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: + case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); @@ -1616,7 +1670,7 @@ int r600_bc_build(struct r600_bc *bc) return -ENOMEM; LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { addr = cf->addr; - if (bc->chiprev == CHIPREV_EVERGREEN) + if (bc->chiprev >= CHIPREV_EVERGREEN) r = eg_bc_cf_build(bc, cf); else r = r600_bc_cf_build(bc, cf); @@ -1640,6 +1694,7 @@ int r600_bc_build(struct r600_bc *bc) break; case CHIPREV_R700: case CHIPREV_EVERGREEN: /* eg alu is same encoding as r700 */ + case CHIPREV_CAYMAN: /* eg alu is same encoding as r700 */ r = r700_bc_alu_build(bc, alu, addr); break; default: @@ -1668,6 +1723,14 @@ int r600_bc_build(struct r600_bc *bc) } break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: + if (bc->chiprev == CHIPREV_CAYMAN) { + LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { + r = r600_bc_vtx_build(bc, vtx, addr); + if (r) + return r; + addr += 4; + } + } LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { r = r600_bc_tex_build(bc, tex, addr); if (r) @@ -1688,6 +1751,7 @@ int r600_bc_build(struct r600_bc *bc) case V_SQ_CF_WORD1_SQ_CF_INST_POP: case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: + case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); @@ -1752,6 +1816,9 @@ void r600_bc_dump(struct r600_bc *bc) case 2: chip = 'E'; break; + case 3: + chip = 'C'; + break; case 0: default: chip = '6'; @@ -1818,6 +1885,7 @@ void r600_bc_dump(struct r600_bc *bc) case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: case V_SQ_CF_WORD1_SQ_CF_INST_RETURN: + case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]); fprintf(stderr, "ADDR:%d\n", cf->cf_addr); id++; @@ -1920,7 +1988,10 @@ void r600_bc_dump(struct r600_bc *bc) fprintf(stderr, "%04d %08X ", id, bc->bytecode[id]); fprintf(stderr, "SRC(GPR:%d ", vtx->src_gpr); fprintf(stderr, "SEL_X:%d) ", vtx->src_sel_x); - fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count); + if (bc->chiprev < CHIPREV_CAYMAN) + fprintf(stderr, "MEGA_FETCH_COUNT:%d ", vtx->mega_fetch_count); + else + fprintf(stderr, "SEL_Y:%d) ", 0); fprintf(stderr, "DST(GPR:%d ", vtx->dst_gpr); fprintf(stderr, "SEL_X:%d ", vtx->dst_sel_x); fprintf(stderr, "SEL_Y:%d ", vtx->dst_sel_y); @@ -2212,9 +2283,9 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru r600_bc_clear(&bc); if (rctx->family >= CHIP_CEDAR) - evergreen_fetch_shader(ve); + evergreen_fetch_shader(&rctx->context, ve); else - r600_fetch_shader(ve); + r600_fetch_shader(&rctx->context, ve); return 0; } diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 26d337fe125..540f45bbd06 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -205,6 +205,8 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg); void r600_bc_dump(struct r600_bc *bc); +int cm_bc_add_cf_end(struct r600_bc *bc); + int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve); /* r700_asm.c */ diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h index a85d0bbf1e1..184f32c9960 100644 --- a/src/gallium/drivers/r600/r600_opcodes.h +++ b/src/gallium/drivers/r600/r600_opcodes.h @@ -171,9 +171,12 @@ #define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT 0x00000027 #define V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE 0x00000028 +/* cayman doesn't have VTX */ #define EG_V_SQ_CF_WORD1_SQ_CF_INST_NOP 0x00000000 #define EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX 0x00000001 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_TC 0x00000001 #define EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX 0x00000002 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_2 0x00000002 #define EG_V_SQ_CF_WORD1_SQ_CF_INST_GDS 0x00000003 #define EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START 0x00000004 #define EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END 0x00000005 @@ -200,19 +203,36 @@ #define EG_V_SQ_CF_WORD1_SQ_CF_INST_WAIT_ACK 0x0000001a #define EG_V_SQ_CF_WORD1_SQ_CF_INST_TC_ACK 0x0000001b #define EG_V_SQ_CF_WORD1_SQ_CF_INST_VC_ACK 0x0000001c +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_28 0x0000001c #define EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMPTABLE 0x0000001d #define EG_V_SQ_CF_WORD1_SQ_CF_INST_GLOBAL_WAVE_SYNC 0x0000001e #define EG_V_SQ_CF_WORD1_SQ_CF_INST_HALT 0x0000001f +/* cayman extras */ +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_END 0x00000020 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_LDS_DEALLOC 0x00000021 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_PUSH_WQM 0x00000022 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_POP_WQM 0x00000023 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_ELSE_WQM 0x00000024 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_JUMP_ANY 0x00000025 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_REACTIVATE 0x00000026 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_REACTIVATE_WQM 0x00000027 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_INTERRUPT 0x00000028 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_INTERRUPT_AND_SLEEP 0x00000029 +#define CM_V_SQ_CF_WORD1_SQ_CF_INST_SET_PRIORITY 0x00000030 + #define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU 0x00000008 #define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE 0x00000009 #define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER 0x0000000A #define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER 0x0000000B #define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_EXTENDED 0x0000000C -#define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_CONTINUE 0x0000000D -#define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_BREAK 0x0000000E +#define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_CONTINUE 0x0000000D /* different on CAYMAN */ +#define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_BREAK 0x0000000E /* different on CAYMAN */ #define EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_ELSE_AFTER 0x0000000F +#define CM_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_REACTIVATE_BEFORE 0x0000000D +#define CM_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_VALID_PIXEL_MODE 0x0000000E + #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD 0x00000000 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL 0x00000001 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL_IEEE 0x00000002 @@ -299,11 +319,11 @@ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADDC_UINT 0x00000052 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SUBB_UINT 0x00000053 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_BARRIER 0x00000054 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_SEQ_BEGIN 0x00000055 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_SEQ_END 0x00000056 +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_SEQ_BEGIN 0x00000055 /* not on CAYMAN */ +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_GROUP_SEQ_END 0x00000056 /* not on CAYMAN */ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_MODE 0x00000057 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_CF_IDX0 0x00000058 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_CF_IDX1 0x00000059 +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_CF_IDX0 0x00000058 /* not on CAYMAN */ +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_CF_IDX1 0x00000059 /* not on CAYMAN */ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SET_LDS_SIZE 0x0000005A #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE 0x00000081 @@ -322,8 +342,8 @@ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT 0x00000090 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT 0x00000091 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT 0x00000092 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT 0x00000093 -#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT 0x00000094 +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT 0x00000093 /* not on CAYMAN */ +#define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT 0x00000094 /* not on CAYMAN */ #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_64 0x00000095 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED_64 0x00000096 #define EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_64 0x00000097 @@ -393,9 +413,10 @@ #define CHIPREV_R600 0 #define CHIPREV_R700 1 #define CHIPREV_EVERGREEN 2 +#define CHIPREV_CAYMAN 3 -#define BC_INST(bc, x) ((bc)->chiprev == CHIPREV_EVERGREEN ? EG_##x : x) +#define BC_INST(bc, x) ((bc)->chiprev >= CHIPREV_EVERGREEN ? EG_##x : x) -#define CTX_INST(x) (ctx->bc->chiprev == CHIPREV_EVERGREEN ? EG_##x : x) +#define CTX_INST(x) (ctx->bc->chiprev >= CHIPREV_EVERGREEN ? EG_##x : x) #endif diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index ec13e48e14e..70e3619de4b 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -260,9 +260,12 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void case CHIP_CYPRESS: case CHIP_HEMLOCK: case CHIP_PALM: + case CHIP_SUMO: + case CHIP_SUMO2: case CHIP_BARTS: case CHIP_TURKS: case CHIP_CAICOS: + case CHIP_CAYMAN: evergreen_init_state_functions(rctx); if (evergreen_context_init(&rctx->ctx, rctx->radeon)) { r600_destroy_context(&rctx->context); @@ -334,9 +337,12 @@ static const char *r600_get_family_name(enum radeon_family family) case CHIP_CYPRESS: return "AMD CYPRESS"; case CHIP_HEMLOCK: return "AMD HEMLOCK"; case CHIP_PALM: return "AMD PALM"; + case CHIP_SUMO: return "AMD SUMO"; + case CHIP_SUMO2: return "AMD SUMO2"; case CHIP_BARTS: return "AMD BARTS"; case CHIP_TURKS: return "AMD TURKS"; case CHIP_CAICOS: return "AMD CAICOS"; + case CHIP_CAYMAN: return "AMD CAYMAN"; default: return "AMD unknown"; } } diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 0e4cfeb5b80..8002d943abd 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -65,6 +65,7 @@ enum r600_pipe_state_id { R600_PIPE_STATE_RESOURCE, R600_PIPE_STATE_POLYGON_OFFSET, R600_PIPE_STATE_FETCH_SHADER, + R600_PIPE_STATE_SPI, R600_PIPE_NSTATES }; @@ -188,6 +189,8 @@ struct r600_pipe_context { struct r600_pipe_state ps_const_buffer; struct r600_pipe_state ps_const_buffer_resource[R600_MAX_CONST_BUFFERS]; struct r600_pipe_rasterizer *rasterizer; + struct r600_pipe_state vgt; + struct r600_pipe_state spi; /* shader information */ unsigned sprite_coord_enable; bool flatshade; @@ -217,11 +220,14 @@ void evergreen_init_state_functions(struct r600_pipe_context *rctx); void evergreen_init_config(struct r600_pipe_context *rctx); void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader); void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); -void evergreen_fetch_shader(struct r600_vertex_element *ve); +void evergreen_fetch_shader(struct pipe_context *ctx, struct r600_vertex_element *ve); void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx); void evergreen_polygon_offset_update(struct r600_pipe_context *rctx); -void evergreen_pipe_set_buffer_resource(struct r600_pipe_context *rctx, - struct r600_pipe_state *rstate, +void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride); +void evergreen_pipe_mod_buffer_resource(struct r600_pipe_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride); @@ -258,11 +264,14 @@ void r600_init_state_functions(struct r600_pipe_context *rctx); void r600_init_config(struct r600_pipe_context *rctx); void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader); void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader); -void r600_fetch_shader(struct r600_vertex_element *ve); +void r600_fetch_shader(struct pipe_context *ctx, struct r600_vertex_element *ve); void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx); void r600_polygon_offset_update(struct r600_pipe_context *rctx); -void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx, - struct r600_pipe_state *rstate, +void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride); +void r600_pipe_mod_buffer_resource(struct r600_pipe_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride); diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 845d41ace02..39e6d85d7b4 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -36,11 +36,37 @@ #include <errno.h> #include <byteswap.h> +/* CAYMAN notes +Why CAYMAN got loops for lots of instructions is explained here. + +-These 8xx t-slot only ops are implemented in all vector slots. +MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT +These 8xx t-slot only opcodes become vector ops, with all four +slots expecting the arguments on sources a and b. Result is +broadcast to all channels. +MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT +These 8xx t-slot only opcodes become vector ops in the z, y, and +x slots. +EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 +RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 +SQRT_IEEE/_64 +SIN/COS +The w slot may have an independent co-issued operation, or if the +result is required to be in the w slot, the opcode above may be +issued in the w slot as well. +The compiler must issue the source argument to slots z, y, and x +*/ + + int r600_find_vs_semantic_index(struct r600_shader *vs, struct r600_shader *ps, int id) { struct r600_shader_io *input = &ps->input[id]; + /* position/face doesn't get/need a semantic index */ + if (input->name == TGSI_SEMANTIC_POSITION || input->name == TGSI_SEMANTIC_FACE) + return 0; + for (int i = 0; i < vs->noutput; i++) { if (input->name == vs->output[i].name && input->sid == vs->output[i].sid) { @@ -181,7 +207,7 @@ struct r600_shader_tgsi_instruction { int (*process)(struct r600_shader_ctx *ctx); }; -static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[]; +static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); static int tgsi_is_supported(struct r600_shader_ctx *ctx) @@ -292,7 +318,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->input[i].interpolate = d->Declaration.Interpolate; ctx->shader->input[i].centroid = d->Declaration.Centroid; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + i; - if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev == CHIPREV_EVERGREEN) { + if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->bc->chiprev >= CHIPREV_EVERGREEN) { /* turn input into interpolate on EG */ if (ctx->shader->input[i].name != TGSI_SEMANTIC_POSITION) { if (ctx->shader->input[i].interpolate > 0) { @@ -619,13 +645,13 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh } if (ctx.type == TGSI_PROCESSOR_VERTEX) { ctx.file_offset[TGSI_FILE_INPUT] = 1; - if (ctx.bc->chiprev == CHIPREV_EVERGREEN) { + if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) { r600_bc_add_cfinst(ctx.bc, EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); } else { r600_bc_add_cfinst(ctx.bc, V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS); } } - if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev == CHIPREV_EVERGREEN) { + if (ctx.type == TGSI_PROCESSOR_FRAGMENT && ctx.bc->chiprev >= CHIPREV_EVERGREEN) { ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); } ctx.file_offset[TGSI_FILE_OUTPUT] = ctx.file_offset[TGSI_FILE_INPUT] + @@ -679,7 +705,9 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh goto out_err; if ((r = tgsi_split_literal_constant(&ctx))) goto out_err; - if (ctx.bc->chiprev == CHIPREV_EVERGREEN) + if (ctx.bc->chiprev == CHIPREV_CAYMAN) + ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; + else if (ctx.bc->chiprev >= CHIPREV_EVERGREEN) ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; else ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; @@ -800,8 +828,10 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh } /* set export done on last export of each type */ for (i = noutput - 1, output_done = 0; i >= 0; i--) { - if (i == (noutput - 1)) { - output[i].end_of_program = 1; + if (ctx.bc->chiprev < CHIPREV_CAYMAN) { + if (i == (noutput - 1)) { + output[i].end_of_program = 1; + } } if (!(output_done & (1 << output[i].type))) { output_done |= (1 << output[i].type); @@ -814,6 +844,10 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh if (r) goto out_err; } + /* add program end */ + if (ctx.bc->chiprev == CHIPREV_CAYMAN) + cm_bc_add_cf_end(ctx.bc); + free(ctx.literals); tgsi_parse_free(&ctx.parse); return 0; @@ -933,6 +967,31 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx) return tgsi_op2_s(ctx, 1); } +static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + int i, j, r; + struct r600_bc_alu alu; + int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; + + for (i = 0 ; i < last_slot; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = ctx->inst_info->r600_opcode; + for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { + r600_bc_src(&alu.src[j], &ctx->src[j], 0); + } + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; + + if (i == last_slot - 1) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + /* * r600 - trunc to -PI..PI range * r700 - normalize by dividing by 2PI @@ -1013,6 +1072,37 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx) return 0; } +static int cayman_trig(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu; + int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; + int i, r; + + r = tgsi_setup_trig(ctx); + if (r) + return r; + + + for (i = 0; i < last_slot; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = ctx->inst_info->r600_opcode; + alu.dst.chan = i; + + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; + + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + if (i == last_slot - 1) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + static int tgsi_trig(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; @@ -1060,7 +1150,7 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) { struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; - int r; + int i, r; /* We'll only need the trig stuff if we are going to write to the * X or Y components of the destination vector. @@ -1073,30 +1163,69 @@ static int tgsi_scs(struct r600_shader_ctx *ctx) /* dst.x = COS */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); - tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0 ; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + + if (i == 0) + alu.dst.write = 1; + else + alu.dst.write = 0; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + if (i == 2) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS); + tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 0; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } } /* dst.y = SIN */ if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); - tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0 ; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (i == 1) + alu.dst.write = 1; + else + alu.dst.write = 0; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + if (i == 2) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN); + tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 0; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } } /* dst.z = 0.0; */ @@ -1216,16 +1345,36 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) { int chan; int sel; + int i; - /* dst.z = log(src.y) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); - r600_bc_src(&alu.src[0], &ctx->src[0], 1); - tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + /* dst.z = log(src.y) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); + r600_bc_src(&alu.src[0], &ctx->src[0], 1); + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (i == 2) { + alu.dst.write = 1; + alu.last = 1; + } else + alu.dst.write = 0; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + /* dst.z = log(src.y) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED); + r600_bc_src(&alu.src[0], &ctx->src[0], 1); + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } chan = alu.dst.chan; sel = alu.dst.sel; @@ -1247,16 +1396,35 @@ static int tgsi_lit(struct r600_shader_ctx *ctx) if (r) return r; - /* dst.z = exp(tmp.x) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 0; - tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + /* dst.z = exp(tmp.x) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + if (i == 2) { + alu.dst.write = 1; + alu.last = 1; + } else + alu.dst.write = 0; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + /* dst.z = exp(tmp.x) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } } return 0; } @@ -1332,6 +1500,56 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) return tgsi_helper_tempx_replicate(ctx); } +static int cayman_pow(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + int i, r; + struct r600_bc_alu alu; + int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; + + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + alu.dst.write = 1; + if (i == 2) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + /* b * LOG2(a) */ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); + r600_bc_src(&alu.src[0], &ctx->src[1], 0); + alu.src[1].sel = ctx->temp_reg; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + + for (i = 0; i < last_slot; i++) { + /* POW(a,b) = EXP2(b * LOG2(a))*/ + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.src[0].sel = ctx->temp_reg; + + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; + if (i == last_slot - 1) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; +} + static int tgsi_pow(struct r600_shader_ctx *ctx) { struct r600_bc_alu alu; @@ -1550,24 +1768,46 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index; if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { + int out_chan; /* Add perspective divide */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 3); + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + out_chan = 2; + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 3); - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 3; - alu.last = 1; - alu.dst.write = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 2) + alu.last = 1; + if (out_chan == i) + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + + } else { + out_chan = 3; + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 3); + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = out_chan; + alu.last = 1; + alu.dst.write = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } for (i = 0; i < 3; i++) { memset(&alu, 0, sizeof(struct r600_bc_alu)); alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL); alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 3; + alu.src[0].chan = out_chan; r600_bc_src(&alu.src[1], &ctx->src[0], i); alu.dst.sel = ctx->temp_reg; alu.dst.chan = i; @@ -1612,18 +1852,37 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) } /* tmp1.z = RCP_e(|tmp1.z|) */ - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 2; - alu.src[0].abs = 1; - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 2; - alu.dst.write = 1; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 2; + alu.src[0].abs = 1; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 2) + alu.dst.write = 1; + if (i == 2) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 2; + alu.src[0].abs = 1; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 2; + alu.dst.write = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x @@ -1958,6 +2217,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; + int i; /* result.x = 2^floor(src); */ if (inst->Dst[0].Register.WriteMask & 1) { @@ -1974,17 +2234,35 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) if (r) return r; - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 0; + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 0; - alu.dst.write = 1; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 0) + alu.dst.write = 1; + if (i == 2) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 0; + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + alu.dst.write = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } } /* result.y = tmp - floor(tmp); */ @@ -2012,19 +2290,38 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) /* result.z = RoughApprox2ToX(tmp);*/ if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); - alu.dst.sel = ctx->temp_reg; - alu.dst.write = 1; - alu.dst.chan = 2; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 2) { + alu.dst.write = 1; + alu.last = 1; + } - alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + alu.dst.chan = 2; + + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } } /* result.w = 1.0;*/ @@ -2051,21 +2348,42 @@ static int tgsi_log(struct r600_shader_ctx *ctx) struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct r600_bc_alu alu; int r; + int i; /* result.x = floor(log2(src)); */ if (inst->Dst[0].Register.WriteMask & 1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 0) + alu.dst.write = 1; + if (i == 2) + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 0; - alu.dst.write = 1; - alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + } else { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 0; + alu.dst.write = 1; + alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR); alu.src[0].sel = ctx->temp_reg; @@ -2083,19 +2401,40 @@ static int tgsi_log(struct r600_shader_ctx *ctx) /* result.y = src.x / (2 ^ floor(log2(src.x))); */ if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 1; - alu.dst.write = 1; - alu.last = 1; + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 1) + alu.dst.write = 1; + if (i == 2) + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 1; + alu.dst.write = 1; + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -2112,35 +2451,73 @@ static int tgsi_log(struct r600_shader_ctx *ctx) if (r) return r; - memset(&alu, 0, sizeof(struct r600_bc_alu)); - - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 1; + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 1; + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 1) + alu.dst.write = 1; + if (i == 2) + alu.last = 1; - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 1; - alu.dst.write = 1; - alu.last = 1; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 1; + alu.dst.write = 1; + alu.last = 1; - memset(&alu, 0, sizeof(struct r600_bc_alu)); + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - alu.src[0].sel = ctx->temp_reg; - alu.src[0].chan = 1; + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 1; + + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = i; + if (i == 1) + alu.dst.write = 1; + if (i == 2) + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + alu.src[0].sel = ctx->temp_reg; + alu.src[0].chan = 1; - alu.dst.sel = ctx->temp_reg; - alu.dst.chan = 1; - alu.dst.write = 1; - alu.last = 1; + alu.dst.sel = ctx->temp_reg; + alu.dst.chan = 1; + alu.dst.write = 1; + alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } memset(&alu, 0, sizeof(struct r600_bc_alu)); @@ -2163,19 +2540,39 @@ static int tgsi_log(struct r600_shader_ctx *ctx) /* result.z = log2(src);*/ if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { - memset(&alu, 0, sizeof(struct r600_bc_alu)); + if (ctx->bc->chiprev == CHIPREV_CAYMAN) { + for (i = 0; i < 3; i++) { + memset(&alu, 0, sizeof(struct r600_bc_alu)); - alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); - r600_bc_src(&alu.src[0], &ctx->src[0], 0); + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); - alu.dst.sel = ctx->temp_reg; - alu.dst.write = 1; - alu.dst.chan = 2; - alu.last = 1; + alu.dst.sel = ctx->temp_reg; + if (i == 2) + alu.dst.write = 1; + alu.dst.chan = i; + if (i == 2) + alu.last = 1; - r = r600_bc_add_alu(ctx->bc, &alu); - if (r) - return r; + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } + } else { + memset(&alu, 0, sizeof(struct r600_bc_alu)); + + alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE); + r600_bc_src(&alu.src[0], &ctx->src[0], 0); + + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + alu.dst.chan = 2; + alu.last = 1; + + r = r600_bc_add_alu(ctx->bc, &alu); + if (r) + return r; + } } /* result.w = 1.0; */ @@ -2946,3 +3343,161 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, }; + +static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { + {TGSI_OPCODE_ARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, + {TGSI_OPCODE_MOV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, + {TGSI_OPCODE_LIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lit}, + {TGSI_OPCODE_RCP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE, cayman_emit_float_instr}, + {TGSI_OPCODE_RSQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE, cayman_emit_float_instr}, + {TGSI_OPCODE_EXP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_exp}, + {TGSI_OPCODE_LOG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_log}, + {TGSI_OPCODE_MUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL, tgsi_op2}, + {TGSI_OPCODE_ADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, + {TGSI_OPCODE_DP3, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, + {TGSI_OPCODE_DP4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, + {TGSI_OPCODE_DST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_opdst}, + {TGSI_OPCODE_MIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN, tgsi_op2}, + {TGSI_OPCODE_MAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX, tgsi_op2}, + {TGSI_OPCODE_SLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2_swap}, + {TGSI_OPCODE_SGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2}, + {TGSI_OPCODE_MAD, 1, EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD, tgsi_op3}, + {TGSI_OPCODE_SUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD, tgsi_op2}, + {TGSI_OPCODE_LRP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_lrp}, + {TGSI_OPCODE_CND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {20, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DP2A, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {22, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {23, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FRC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT, tgsi_op2}, + {TGSI_OPCODE_CLAMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_FLR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR, tgsi_op2}, + {TGSI_OPCODE_ROUND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_EX2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE, cayman_emit_float_instr}, + {TGSI_OPCODE_LG2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE, cayman_emit_float_instr}, + {TGSI_OPCODE_POW, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, cayman_pow}, + {TGSI_OPCODE_XPD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_xpd}, + /* gap */ + {32, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ABS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, + {TGSI_OPCODE_RCC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DPH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, + {TGSI_OPCODE_COS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS, cayman_trig}, + {TGSI_OPCODE_DDX, 0, SQ_TEX_INST_GET_GRADIENTS_H, tgsi_tex}, + {TGSI_OPCODE_DDY, 0, SQ_TEX_INST_GET_GRADIENTS_V, tgsi_tex}, + {TGSI_OPCODE_KILP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* predicated kill */ + {TGSI_OPCODE_PK2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PK4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE, tgsi_op2}, + {TGSI_OPCODE_SFL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SGT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT, tgsi_op2}, + {TGSI_OPCODE_SIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN, cayman_trig}, + {TGSI_OPCODE_SLE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE, tgsi_op2_swap}, + {TGSI_OPCODE_SNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE, tgsi_op2}, + {TGSI_OPCODE_STR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TEX, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, + {TGSI_OPCODE_TXD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXP, 0, SQ_TEX_INST_SAMPLE, tgsi_tex}, + {TGSI_OPCODE_UP2H, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP2US, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP4B, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UP4UB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_X2D, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ARR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_eg_arl}, + {TGSI_OPCODE_BRA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CAL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_RET, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SSG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_ssg}, + {TGSI_OPCODE_CMP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_cmp}, + {TGSI_OPCODE_SCS, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_scs}, + {TGSI_OPCODE_TXB, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, + {TGSI_OPCODE_NRM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DP2, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, + {TGSI_OPCODE_TXL, 0, SQ_TEX_INST_SAMPLE_L, tgsi_tex}, + {TGSI_OPCODE_BRK, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont}, + {TGSI_OPCODE_IF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, + /* gap */ + {75, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {76, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ELSE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, + {TGSI_OPCODE_ENDIF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, + /* gap */ + {79, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, + {TGSI_OPCODE_SHL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {88, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_AND, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_OR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_MOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_XOR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXF, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_TXQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CONT, 0, EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont}, + {TGSI_OPCODE_EMIT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDPRIM, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BGNLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop}, + {TGSI_OPCODE_BGNSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDLOOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop}, + {TGSI_OPCODE_ENDSUB, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {103, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {104, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {105, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {106, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NOP, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + /* gap */ + {108, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {109, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {110, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {111, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_NRM4, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CALLNZ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IFC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_BREAKC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_KIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT, tgsi_kill}, /* conditional kill */ + {TGSI_OPCODE_END, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_end}, /* aka HALT */ + /* gap */ + {118, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_F2I, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_INEG, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ISGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ISHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ISLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_F2U, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_U2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UADD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UDIV, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMAD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMAX, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMIN, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMOD, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_UMUL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USEQ, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USGE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USHR, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USLT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_USNE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_SWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_LAST, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, +}; diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 0a1fa723e13..46fdbfed34a 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -199,6 +199,7 @@ static void *r600_create_blend_state(struct pipe_context *ctx, static void *r600_create_dsa_state(struct pipe_context *ctx, const struct pipe_depth_stencil_alpha_state *state) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_dsa *dsa = CALLOC_STRUCT(r600_pipe_dsa); unsigned db_depth_control, alpha_test_control, alpha_ref, db_shader_control; unsigned stencil_ref_mask, stencil_ref_mask_bf, db_render_override, db_render_control; @@ -286,6 +287,7 @@ static void *r600_create_dsa_state(struct pipe_context *ctx, static void *r600_create_rs_state(struct pipe_context *ctx, const struct pipe_rasterizer_state *state) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer); struct r600_pipe_state *rstate; unsigned tmp, cb; @@ -382,26 +384,26 @@ static void *r600_create_sampler_state(struct pipe_context *ctx, rstate->id = R600_PIPE_STATE_SAMPLER; util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc); - r600_pipe_state_add_reg(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, - S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | - S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | - S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | - S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter) | aniso_flag_offset) | - S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter) | aniso_flag_offset) | - S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | - S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) | - S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | - S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0, - S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | - S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | - S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg_noblock(rstate, R_03C000_SQ_TEX_SAMPLER_WORD0_0, + S_03C000_CLAMP_X(r600_tex_wrap(state->wrap_s)) | + S_03C000_CLAMP_Y(r600_tex_wrap(state->wrap_t)) | + S_03C000_CLAMP_Z(r600_tex_wrap(state->wrap_r)) | + S_03C000_XY_MAG_FILTER(r600_tex_filter(state->mag_img_filter) | aniso_flag_offset) | + S_03C000_XY_MIN_FILTER(r600_tex_filter(state->min_img_filter) | aniso_flag_offset) | + S_03C000_MIP_FILTER(r600_tex_mipfilter(state->min_mip_filter)) | + S_03C000_MAX_ANISO(r600_tex_aniso_filter(state->max_anisotropy)) | + S_03C000_DEPTH_COMPARE_FUNCTION(r600_tex_compare(state->compare_func)) | + S_03C000_BORDER_COLOR_TYPE(uc.ui ? V_03C000_SQ_TEX_BORDER_COLOR_REGISTER : 0), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg_noblock(rstate, R_03C004_SQ_TEX_SAMPLER_WORD1_0, + S_03C004_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 6)) | + S_03C004_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 6)) | + S_03C004_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 6)), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg_noblock(rstate, R_03C008_SQ_TEX_SAMPLER_WORD2_0, S_03C008_TYPE(1), 0xFFFFFFFF, NULL); if (uc.ui) { - r600_pipe_state_add_reg(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg_noblock(rstate, R_00A400_TD_PS_SAMPLER0_BORDER_RED, fui(state->border_color[0]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg_noblock(rstate, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN, fui(state->border_color[1]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg_noblock(rstate, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE, fui(state->border_color[2]), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg_noblock(rstate, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA, fui(state->border_color[3]), 0xFFFFFFFF, NULL); } return rstate; } @@ -410,6 +412,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c struct pipe_resource *texture, const struct pipe_sampler_view *state) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); struct r600_pipe_state *rstate; const struct util_format_description *desc; @@ -1285,6 +1288,7 @@ void r600_init_config(struct r600_pipe_context *rctx) void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shader) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; unsigned i, exports_ps, num_cout, spi_ps_in_control_0, spi_input_z, spi_ps_in_control_1, db_shader_control; @@ -1378,6 +1382,7 @@ void r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader *shad void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shader) { + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_state *rstate = &shader->rstate; struct r600_shader *rshader = &shader->shader; unsigned spi_vs_out_id[10]; @@ -1424,9 +1429,11 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad 0xFFFFFFFF, NULL); } -void r600_fetch_shader(struct r600_vertex_element *ve) +void r600_fetch_shader(struct pipe_context *ctx, + struct r600_vertex_element *ve) { struct r600_pipe_state *rstate; + struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; rstate = &ve->rstate; rstate->id = R600_PIPE_STATE_FETCH_SHADER; @@ -1478,11 +1485,13 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) return rstate; } -void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx, - struct r600_pipe_state *rstate, - struct r600_resource *rbuffer, - unsigned offset, unsigned stride) +void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx, + struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride) { + rstate->id = R600_PIPE_STATE_RESOURCE; + rstate->nregs = 0; r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, offset, 0xFFFFFFFF, rbuffer->bo); r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, @@ -1499,3 +1508,15 @@ void r600_pipe_set_buffer_resource(struct r600_pipe_context *rctx, r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, 0xC0000000, 0xFFFFFFFF, NULL); } + +void r600_pipe_mod_buffer_resource(struct r600_pipe_state *rstate, + struct r600_resource *rbuffer, + unsigned offset, unsigned stride) +{ + rstate->nregs = 0; + r600_pipe_state_mod_reg_bo(rstate, offset, rbuffer->bo); + r600_pipe_state_mod_reg(rstate, rbuffer->bo_size - offset - 1); + r600_pipe_state_mod_reg(rstate, S_038008_ENDIAN_SWAP(r600_endian_swap(32)) | + S_038008_STRIDE(stride)); + rstate->nregs = 7; +} diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index de1b811ce89..48ab15f9323 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -32,43 +32,33 @@ #include "r600_pipe.h" #include "r600d.h" +static void r600_spi_update(struct r600_pipe_context *rctx); + static int r600_conv_pipe_prim(unsigned pprim, unsigned *prim) { - switch (pprim) { - case PIPE_PRIM_POINTS: - *prim = V_008958_DI_PT_POINTLIST; - return 0; - case PIPE_PRIM_LINES: - *prim = V_008958_DI_PT_LINELIST; - return 0; - case PIPE_PRIM_LINE_STRIP: - *prim = V_008958_DI_PT_LINESTRIP; - return 0; - case PIPE_PRIM_LINE_LOOP: - *prim = V_008958_DI_PT_LINELOOP; - return 0; - case PIPE_PRIM_TRIANGLES: - *prim = V_008958_DI_PT_TRILIST; - return 0; - case PIPE_PRIM_TRIANGLE_STRIP: - *prim = V_008958_DI_PT_TRISTRIP; - return 0; - case PIPE_PRIM_TRIANGLE_FAN: - *prim = V_008958_DI_PT_TRIFAN; - return 0; - case PIPE_PRIM_POLYGON: - *prim = V_008958_DI_PT_POLYGON; - return 0; - case PIPE_PRIM_QUADS: - *prim = V_008958_DI_PT_QUADLIST; - return 0; - case PIPE_PRIM_QUAD_STRIP: - *prim = V_008958_DI_PT_QUADSTRIP; - return 0; - default: + static const int prim_conv[] = { + V_008958_DI_PT_POINTLIST, + V_008958_DI_PT_LINELIST, + V_008958_DI_PT_LINELOOP, + V_008958_DI_PT_LINESTRIP, + V_008958_DI_PT_TRILIST, + V_008958_DI_PT_TRISTRIP, + V_008958_DI_PT_TRIFAN, + V_008958_DI_PT_QUADLIST, + V_008958_DI_PT_QUADSTRIP, + V_008958_DI_PT_POLYGON, + -1, + -1, + -1, + -1 + }; + + *prim = prim_conv[pprim]; + if (*prim == -1) { fprintf(stderr, "%s:%d unsupported %d\n", __func__, __LINE__, pprim); return -1; } + return 0; } /* common state between evergreen and r600 */ @@ -121,6 +111,8 @@ void r600_bind_rs_state(struct pipe_context *ctx, void *state) } else { r600_polygon_offset_update(rctx); } + if (rctx->ps_shader && rctx->vs_shader) + r600_spi_update(rctx); } void r600_delete_rs_state(struct pipe_context *ctx, void *state) @@ -281,6 +273,8 @@ void r600_bind_ps_shader(struct pipe_context *ctx, void *state) if (state) { r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_shader->rstate); } + if (rctx->ps_shader && rctx->vs_shader) + r600_spi_update(rctx); } void r600_bind_vs_shader(struct pipe_context *ctx, void *state) @@ -292,6 +286,8 @@ void r600_bind_vs_shader(struct pipe_context *ctx, void *state) if (state) { r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_shader->rstate); } + if (rctx->ps_shader && rctx->vs_shader) + r600_spi_update(rctx); } void r600_delete_ps_shader(struct pipe_context *ctx, void *state) @@ -338,14 +334,27 @@ static void r600_update_alpha_ref(struct r600_pipe_context *rctx) } /* FIXME optimize away spi update when it's not needed */ -static void r600_spi_update(struct r600_pipe_context *rctx, unsigned prim) +static void r600_spi_block_init(struct r600_pipe_context *rctx, struct r600_pipe_state *rstate) +{ + int i; + rstate->nregs = 0; + rstate->id = R600_PIPE_STATE_SPI; + for (i = 0; i < 32; i++) { + r600_pipe_state_add_reg(rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, 0, 0xFFFFFFFF, NULL); + } +} + +static void r600_spi_update(struct r600_pipe_context *rctx) { struct r600_pipe_shader *shader = rctx->ps_shader; - struct r600_pipe_state rstate; + struct r600_pipe_state *rstate = &rctx->spi; struct r600_shader *rshader = &shader->shader; unsigned i, tmp; - rstate.nregs = 0; + if (rctx->spi.id == 0) + r600_spi_block_init(rctx, &rctx->spi); + + rstate->nregs = 0; for (i = 0; i < rshader->ninput; i++) { tmp = S_028644_SEMANTIC(r600_find_vs_semantic_index(&rctx->vs_shader->shader, rshader, i)); @@ -368,15 +377,10 @@ static void r600_spi_update(struct r600_pipe_context *rctx, unsigned prim) tmp |= S_028644_SEL_LINEAR(1); } - r600_pipe_state_add_reg(&rstate, R_028644_SPI_PS_INPUT_CNTL_0 + i * 4, tmp, 0xFFFFFFFF, NULL); + r600_pipe_state_mod_reg(rstate, tmp); } - if (prim == PIPE_PRIM_QUADS || prim == PIPE_PRIM_QUAD_STRIP || prim == PIPE_PRIM_POLYGON) { - r600_pipe_state_add_reg(&rstate, R_028814_PA_SU_SC_MODE_CNTL, - S_028814_PROVOKING_VTX_LAST(1), - S_028814_PROVOKING_VTX_LAST(1), NULL); - } - r600_context_pipe_state_set(&rctx->ctx, &rstate); + r600_context_pipe_state_set(&rctx->ctx, rstate); } void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, @@ -410,13 +414,19 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); rstate = &rctx->vs_const_buffer_resource[index]; - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; + if (!rstate->id) { + if (rctx->family >= CHIP_CEDAR) { + evergreen_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + } else { + r600_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + } + } + if (rctx->family >= CHIP_CEDAR) { - evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); } else { - r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); r600_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); } break; @@ -432,13 +442,18 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); rstate = &rctx->ps_const_buffer_resource[index]; - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; + if (!rstate->id) { + if (rctx->family >= CHIP_CEDAR) { + evergreen_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + } else { + r600_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + } + } if (rctx->family >= CHIP_CEDAR) { - evergreen_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + evergreen_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); } else { - r600_pipe_set_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + r600_pipe_mod_buffer_resource(rstate, &rbuffer->r, offset, 16); r600_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); } break; @@ -468,8 +483,6 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) for (i = 0 ; i < count; i++) { rstate = &rctx->fs_resource[i]; - rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; if (rctx->vertex_elements->vbuffer_need_offset) { /* one resource per vertex elements */ @@ -488,11 +501,19 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) continue; offset += vertex_buffer->buffer_offset + r600_bo_offset(rbuffer->bo); + if (!rstate->id) { + if (rctx->family >= CHIP_CEDAR) { + evergreen_pipe_init_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride); + } else { + r600_pipe_init_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride); + } + } + if (rctx->family >= CHIP_CEDAR) { - evergreen_pipe_set_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride); + evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride); evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); } else { - r600_pipe_set_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride); + r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride); r600_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); } } @@ -504,7 +525,6 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) struct r600_resource *rbuffer; u32 vgt_dma_index_type, vgt_dma_swap_mode, vgt_draw_initiator, mask; struct r600_draw rdraw; - struct r600_pipe_state vgt; struct r600_drawl draw = {}; unsigned prim; @@ -576,23 +596,41 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) } r600_update_alpha_ref(rctx); - r600_spi_update(rctx, draw.info.mode); mask = 0; for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { mask |= (0xF << (i * 4)); } - vgt.id = R600_PIPE_STATE_VGT; - vgt.nregs = 0; - r600_pipe_state_add_reg(&vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL); - r600_context_pipe_state_set(&rctx->ctx, &vgt); + if (rctx->vgt.id != R600_PIPE_STATE_VGT) { + rctx->vgt.id = R600_PIPE_STATE_VGT; + rctx->vgt.nregs = 0; + r600_pipe_state_add_reg(&rctx->vgt, R_008958_VGT_PRIMITIVE_TYPE, prim, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vgt, R_028400_VGT_MAX_VTX_INDX, draw.info.max_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vgt, R_028408_VGT_INDX_OFFSET, draw.info.index_bias, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(&rctx->vgt, R_028814_PA_SU_SC_MODE_CNTL, + 0, + S_028814_PROVOKING_VTX_LAST(1), NULL); + + } + + rctx->vgt.nregs = 0; + r600_pipe_state_mod_reg(&rctx->vgt, prim); + r600_pipe_state_mod_reg(&rctx->vgt, rctx->cb_target_mask & mask); + r600_pipe_state_mod_reg(&rctx->vgt, draw.info.max_index); + r600_pipe_state_mod_reg(&rctx->vgt, draw.info.min_index); + r600_pipe_state_mod_reg(&rctx->vgt, draw.info.index_bias); + r600_pipe_state_mod_reg(&rctx->vgt, 0); + r600_pipe_state_mod_reg(&rctx->vgt, draw.info.start_instance); + if (draw.info.mode == PIPE_PRIM_QUADS || draw.info.mode == PIPE_PRIM_QUAD_STRIP || draw.info.mode == PIPE_PRIM_POLYGON) { + r600_pipe_state_mod_reg(&rctx->vgt, S_028814_PROVOKING_VTX_LAST(1)); + } + + r600_context_pipe_state_set(&rctx->ctx, &rctx->vgt); rdraw.vgt_num_indices = draw.info.count; rdraw.vgt_num_instances = draw.info.instance_count; @@ -621,3 +659,39 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) u_vbuf_mgr_draw_end(rctx->vbuf_mgr); } + +void _r600_pipe_state_add_reg(struct r600_context *ctx, + struct r600_pipe_state *state, + u32 offset, u32 value, u32 mask, + u32 range_id, u32 block_id, + struct r600_bo *bo) +{ + struct r600_range *range; + struct r600_block *block; + + range = &ctx->range[range_id]; + block = range->blocks[block_id]; + state->regs[state->nregs].block = block; + state->regs[state->nregs].id = (offset - block->start_offset) >> 2; + + state->regs[state->nregs].value = value; + state->regs[state->nregs].mask = mask; + state->regs[state->nregs].bo = bo; + + state->nregs++; + assert(state->nregs < R600_BLOCK_MAX_REG); +} + +void r600_pipe_state_add_reg_noblock(struct r600_pipe_state *state, + u32 offset, u32 value, u32 mask, + struct r600_bo *bo) +{ + state->regs[state->nregs].id = offset; + state->regs[state->nregs].block = NULL; + state->regs[state->nregs].value = value; + state->regs[state->nregs].mask = mask; + state->regs[state->nregs].bo = bo; + + state->nregs++; + assert(state->nregs < R600_BLOCK_MAX_REG); +} diff --git a/src/gallium/drivers/softpipe/sp_fence.c b/src/gallium/drivers/softpipe/sp_fence.c index 7b79a0df4ea..c2897ed1ef8 100644 --- a/src/gallium/drivers/softpipe/sp_fence.c +++ b/src/gallium/drivers/softpipe/sp_fence.c @@ -36,8 +36,7 @@ softpipe_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence) { - assert(!*ptr); - assert(!fence); + *ptr = fence; } @@ -45,7 +44,7 @@ static boolean softpipe_fence_signalled(struct pipe_screen *screen, struct pipe_fence_handle *fence) { - assert(!fence); + assert(fence); return TRUE; } @@ -55,7 +54,7 @@ softpipe_fence_finish(struct pipe_screen *screen, struct pipe_fence_handle *fence, uint64_t timeout) { - assert(!fence); + assert(fence); return TRUE; } diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 720fea83cb2..a2733e95878 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -96,9 +96,9 @@ softpipe_flush( struct pipe_context *pipe, ++frame_no; } #endif - + if (fence) - *fence = NULL; + *fence = (void*)(intptr_t)1; } void diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 4f0d6c4b97a..254976e099c 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -1422,6 +1422,40 @@ static void trace_redefine_user_buffer(struct pipe_context *_context, } +static void trace_render_condition(struct pipe_context *_context, + struct pipe_query *query, + uint mode) +{ + struct trace_context *tr_context = trace_context(_context); + struct pipe_context *context = tr_context->pipe; + + trace_dump_call_begin("pipe_context", "render_condition"); + + trace_dump_arg(ptr, context); + trace_dump_arg(ptr, query); + trace_dump_arg(uint, mode); + + trace_dump_call_end(); + + context->render_condition(context, query, mode); +} + + +static void trace_texture_barrier(struct pipe_context *_context) +{ + struct trace_context *tr_context = trace_context(_context); + struct pipe_context *context = tr_context->pipe; + + trace_dump_call_begin("pipe_context", "texture_barrier"); + + trace_dump_arg(ptr, context); + + trace_dump_call_end(); + + context->texture_barrier(context); +} + + static const struct debug_named_value rbug_blocker_flags[] = { {"before", 1, NULL}, {"after", 2, NULL}, @@ -1499,6 +1533,8 @@ trace_context_create(struct trace_screen *tr_scr, tr_ctx->base.clear_render_target = trace_context_clear_render_target; tr_ctx->base.clear_depth_stencil = trace_context_clear_depth_stencil; tr_ctx->base.flush = trace_context_flush; + tr_ctx->base.render_condition = pipe->render_condition ? trace_render_condition : NULL; + tr_ctx->base.texture_barrier = pipe->texture_barrier ? trace_texture_barrier : NULL; tr_ctx->base.get_transfer = trace_context_get_transfer; tr_ctx->base.transfer_destroy = trace_context_transfer_destroy; diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index 1818c8b9e9a..40f6f2bcb5f 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -46,7 +46,7 @@ #ifndef P_CONFIG_H_ #define P_CONFIG_H_ - +#include <limits.h> /* * Compiler */ diff --git a/src/gallium/state_trackers/d3d1x/d3d1xshader/src/sm4_parse.cpp b/src/gallium/state_trackers/d3d1x/d3d1xshader/src/sm4_parse.cpp index 2c0f8269af3..30962900122 100644 --- a/src/gallium/state_trackers/d3d1x/d3d1xshader/src/sm4_parse.cpp +++ b/src/gallium/state_trackers/d3d1x/d3d1xshader/src/sm4_parse.cpp @@ -91,6 +91,7 @@ struct sm4_parser break; case SM4_OPERAND_COMPNUM_1: op.comps = 1; + op.swizzle[1] = op.swizzle[2] = op.swizzle[3] = 0; break; case SM4_OPERAND_COMPNUM_4: op.comps = 4; @@ -143,7 +144,7 @@ struct sm4_parser repr = optok.index2_repr; else fail("Unhandled operand index representation"); - op.indices[0].disp = 0; + op.indices[i].disp = 0; // TODO: is disp supposed to be signed here?? switch(repr) { diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.c b/src/gallium/state_trackers/wgl/stw_framebuffer.c index 7a689f9977d..f595efe03f4 100644 --- a/src/gallium/state_trackers/wgl/stw_framebuffer.c +++ b/src/gallium/state_trackers/wgl/stw_framebuffer.c @@ -92,6 +92,8 @@ stw_framebuffer_destroy_locked( stw_st_destroy_framebuffer_locked(fb->stfb); + ReleaseDC(fb->hWnd, fb->hDC); + pipe_mutex_unlock( fb->mutex ); pipe_mutex_destroy( fb->mutex ); @@ -168,6 +170,7 @@ stw_framebuffer_get_size( struct stw_framebuffer *fb ) #if 0 debug_printf("\n"); + debug_printf("%s: hwnd = %p\n", __FUNCTION__, fb->hWnd); debug_printf("%s: client_position = (%li, %li)\n", __FUNCTION__, client_pos.x, client_pos.y); debug_printf("%s: window_rect = (%li, %li) - (%li, %li)\n", @@ -251,7 +254,11 @@ stw_framebuffer_create( if (fb == NULL) return NULL; - fb->hDC = hdc; + /* Applications use, create, destroy device contexts, so the hdc passed is. We create our own DC + * because we need one for single buffered visuals. + */ + fb->hDC = GetDC(hWnd); + fb->hWnd = hWnd; fb->iPixelFormat = iPixelFormat; @@ -378,24 +385,13 @@ stw_framebuffer_from_hdc_locked( HDC hdc ) { HWND hwnd; - struct stw_framebuffer *fb; - /* - * Some applications create and use several HDCs for the same window, so - * looking up the framebuffer by the HDC is not reliable. Use HWND whenever - * possible. - */ hwnd = WindowFromDC(hdc); - if(hwnd) - return stw_framebuffer_from_hwnd_locked(hwnd); - - for (fb = stw_dev->fb_head; fb != NULL; fb = fb->next) - if (fb->hDC == hdc) { - pipe_mutex_lock(fb->mutex); - break; - } + if (!hwnd) { + return NULL; + } - return fb; + return stw_framebuffer_from_hwnd_locked(hwnd); } @@ -607,7 +603,7 @@ DrvSwapBuffers( stw_flush_current_locked(fb); - return stw_st_swap_framebuffer_locked(fb->stfb); + return stw_st_swap_framebuffer_locked(hdc, fb->stfb); } diff --git a/src/gallium/state_trackers/wgl/stw_st.c b/src/gallium/state_trackers/wgl/stw_st.c index b58d91673b7..9174533fc06 100644 --- a/src/gallium/state_trackers/wgl/stw_st.c +++ b/src/gallium/state_trackers/wgl/stw_st.c @@ -154,7 +154,8 @@ stw_st_framebuffer_validate(struct st_framebuffer_iface *stfb, * Present an attachment of the framebuffer. */ static boolean -stw_st_framebuffer_present_locked(struct st_framebuffer_iface *stfb, +stw_st_framebuffer_present_locked(HDC hdc, + struct st_framebuffer_iface *stfb, enum st_attachment_type statt) { struct stw_st_framebuffer *stwfb = stw_st_framebuffer(stfb); @@ -162,7 +163,7 @@ stw_st_framebuffer_present_locked(struct st_framebuffer_iface *stfb, resource = stwfb->textures[statt]; if (resource) { - stw_framebuffer_present_locked(stwfb->fb->hDC, stwfb->fb, resource); + stw_framebuffer_present_locked(hdc, stwfb->fb, resource); } return TRUE; @@ -176,7 +177,7 @@ stw_st_framebuffer_flush_front(struct st_framebuffer_iface *stfb, pipe_mutex_lock(stwfb->fb->mutex); - return stw_st_framebuffer_present_locked(&stwfb->base, statt); + return stw_st_framebuffer_present_locked(stwfb->fb->hDC, &stwfb->base, statt); } /** @@ -220,7 +221,7 @@ stw_st_destroy_framebuffer_locked(struct st_framebuffer_iface *stfb) * Swap the buffers of the given framebuffer. */ boolean -stw_st_swap_framebuffer_locked(struct st_framebuffer_iface *stfb) +stw_st_swap_framebuffer_locked(HDC hdc, struct st_framebuffer_iface *stfb) { struct stw_st_framebuffer *stwfb = stw_st_framebuffer(stfb); unsigned front = ST_ATTACHMENT_FRONT_LEFT, back = ST_ATTACHMENT_BACK_LEFT; @@ -245,7 +246,7 @@ stw_st_swap_framebuffer_locked(struct st_framebuffer_iface *stfb) stwfb->texture_mask = mask; front = ST_ATTACHMENT_FRONT_LEFT; - return stw_st_framebuffer_present_locked(&stwfb->base, front); + return stw_st_framebuffer_present_locked(hdc, &stwfb->base, front); } /** diff --git a/src/gallium/state_trackers/wgl/stw_st.h b/src/gallium/state_trackers/wgl/stw_st.h index 23771d8bef6..945d3508b48 100644 --- a/src/gallium/state_trackers/wgl/stw_st.h +++ b/src/gallium/state_trackers/wgl/stw_st.h @@ -28,6 +28,8 @@ #ifndef STW_ST_H #define STW_ST_H +#include <windows.h> + #include "state_tracker/st_api.h" struct stw_framebuffer; @@ -42,6 +44,6 @@ void stw_st_destroy_framebuffer_locked(struct st_framebuffer_iface *stfb); boolean -stw_st_swap_framebuffer_locked(struct st_framebuffer_iface *stfb); +stw_st_swap_framebuffer_locked(HDC hdc, struct st_framebuffer_iface *stfb); #endif /* STW_ST_H */ diff --git a/src/gallium/targets/Makefile.dri b/src/gallium/targets/Makefile.dri index 3fb4cc6b861..6c6ad184fab 100644 --- a/src/gallium/targets/Makefile.dri +++ b/src/gallium/targets/Makefile.dri @@ -75,7 +75,7 @@ LIBNAME_STAGING = $(TOP)/$(LIB_DIR)/gallium/$(LIBNAME) default: depend symlinks $(TOP)/$(LIB_DIR)/gallium $(LIBNAME) $(LIBNAME_STAGING) $(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(PIPE_DRIVERS) Makefile \ - $(TOP)/src/mesa/drivers/dri/Makefile.template $(TOP)/src/mesa/drivers/dri/common/dri_test.o + $(TOP)/src/mesa/drivers/dri/Makefile.targets $(TOP)/src/mesa/drivers/dri/common/dri_test.o $(MKLIB) -o [email protected] -noprefix -linker '$(CXX)' -ldflags '$(LDFLAGS)' \ $(OBJECTS) $(PIPE_DRIVERS) \ -Wl,--start-group $(MESA_MODULES) -Wl,--end-group \ diff --git a/src/gallium/targets/egl/Makefile b/src/gallium/targets/egl/Makefile index a455b61af98..dd566bd9a06 100644 --- a/src/gallium/targets/egl/Makefile +++ b/src/gallium/targets/egl/Makefile @@ -52,7 +52,7 @@ egl_LIBS += $(TOP)/src/gallium/winsys/sw/wayland/libws_wayland.a egl_LIBS += $(TOP)/src/egl/wayland/wayland-drm/libwayland-drm.a endif ifneq ($(findstring drm, $(EGL_PLATFORMS)),) -egl_SYS += $(LIBDRM_LIB) +egl_SYS += $(LIBUDEV_LIBS) $(LIBDRM_LIB) endif ifneq ($(findstring fbdev, $(EGL_PLATFORMS)),) egl_LIBS += $(TOP)/src/gallium/winsys/sw/fbdev/libfbdev.a diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index 2a2c37ff606..cf8ae5185b4 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -57,6 +57,19 @@ static const struct r600_reg evergreen_config_reg_list[] = { {R_00913C_SPI_CONFIG_CNTL_1, 0, 0, 0}, }; + +static const struct r600_reg cayman_config_reg_list[] = { + {R_008958_VGT_PRIMITIVE_TYPE, 0, 0, 0}, + {R_008A14_PA_CL_ENHANCE, 0, 0, 0}, + {R_008C00_SQ_CONFIG, 0, 0, 0}, + {R_008C04_SQ_GPR_RESOURCE_MGMT_1, 0, 0, 0}, + {CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0, 0}, + {CM_R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0, 0}, + {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0, 0, 0}, + {R_009100_SPI_CONFIG_CNTL, 0, 0, 0}, + {R_00913C_SPI_CONFIG_CNTL_1, 0, 0, 0}, +}; + static const struct r600_reg evergreen_ctl_const_list[] = { {R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0, 0}, {R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0, 0}, @@ -189,6 +202,7 @@ static const struct r600_reg evergreen_context_reg_list[] = { {R_028610_PA_CL_UCP5_Y, 0, 0, 0}, {R_028614_PA_CL_UCP5_Z, 0, 0, 0}, {R_028618_PA_CL_UCP5_W, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, {R_02861C_SPI_VS_OUT_ID_0, 0, 0, 0}, {R_028620_SPI_VS_OUT_ID_1, 0, 0, 0}, {R_028624_SPI_VS_OUT_ID_2, 0, 0, 0}, @@ -199,6 +213,7 @@ static const struct r600_reg evergreen_context_reg_list[] = { {R_028638_SPI_VS_OUT_ID_7, 0, 0, 0}, {R_02863C_SPI_VS_OUT_ID_8, 0, 0, 0}, {R_028640_SPI_VS_OUT_ID_9, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, {R_028644_SPI_PS_INPUT_CNTL_0, 0, 0, 0}, {R_028648_SPI_PS_INPUT_CNTL_1, 0, 0, 0}, {R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0, 0}, @@ -231,6 +246,7 @@ static const struct r600_reg evergreen_context_reg_list[] = { {R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0, 0}, {R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0, 0}, {R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, {R_0286C4_SPI_VS_OUT_CONFIG, 0, 0, 0}, {R_0286C8_SPI_THREAD_GROUPING, 0, 0, 0}, {R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0, 0}, @@ -421,6 +437,385 @@ static const struct r600_reg evergreen_context_reg_list[] = { {R_028EAC_CB_COLOR11_DIM, 0, 0, 0}, }; +static const struct r600_reg cayman_context_reg_list[] = { + {R_028000_DB_RENDER_CONTROL, 0, 0, 0}, + {R_028004_DB_COUNT_CONTROL, 0, 0, 0}, + {R_028008_DB_DEPTH_VIEW, 0, 0, 0}, + {R_02800C_DB_RENDER_OVERRIDE, 0, 0, 0}, + {R_028010_DB_RENDER_OVERRIDE2, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028014_DB_HTILE_DATA_BASE, REG_FLAG_NEED_BO, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028028_DB_STENCIL_CLEAR, 0, 0, 0}, + {R_02802C_DB_DEPTH_CLEAR, 0, 0, 0}, + {R_028030_PA_SC_SCREEN_SCISSOR_TL, 0, 0, 0}, + {R_028034_PA_SC_SCREEN_SCISSOR_BR, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028040_DB_Z_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028044_DB_STENCIL_INFO, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028048_DB_Z_READ_BASE, REG_FLAG_NEED_BO, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_02804C_DB_STENCIL_READ_BASE, REG_FLAG_NEED_BO, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028050_DB_Z_WRITE_BASE, REG_FLAG_NEED_BO, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028054_DB_STENCIL_WRITE_BASE, REG_FLAG_NEED_BO, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028058_DB_DEPTH_SIZE, 0, 0, 0}, + {R_02805C_DB_DEPTH_SLICE, 0, 0, 0}, + {R_028140_ALU_CONST_BUFFER_SIZE_PS_0, REG_FLAG_DIRTY_ALWAYS, 0, 0}, + {R_028180_ALU_CONST_BUFFER_SIZE_VS_0, REG_FLAG_DIRTY_ALWAYS, 0, 0}, + {R_028200_PA_SC_WINDOW_OFFSET, 0, 0, 0}, + {R_028204_PA_SC_WINDOW_SCISSOR_TL, 0, 0, 0}, + {R_028208_PA_SC_WINDOW_SCISSOR_BR, 0, 0, 0}, + {R_02820C_PA_SC_CLIPRECT_RULE, 0, 0, 0}, + {R_028210_PA_SC_CLIPRECT_0_TL, 0, 0, 0}, + {R_028214_PA_SC_CLIPRECT_0_BR, 0, 0, 0}, + {R_028218_PA_SC_CLIPRECT_1_TL, 0, 0, 0}, + {R_02821C_PA_SC_CLIPRECT_1_BR, 0, 0, 0}, + {R_028220_PA_SC_CLIPRECT_2_TL, 0, 0, 0}, + {R_028224_PA_SC_CLIPRECT_2_BR, 0, 0, 0}, + {R_028228_PA_SC_CLIPRECT_3_TL, 0, 0, 0}, + {R_02822C_PA_SC_CLIPRECT_3_BR, 0, 0, 0}, + {R_028230_PA_SC_EDGERULE, 0, 0, 0}, + {R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0, 0, 0}, + {R_028238_CB_TARGET_MASK, 0, 0, 0}, + {R_02823C_CB_SHADER_MASK, 0, 0, 0}, + {R_028240_PA_SC_GENERIC_SCISSOR_TL, 0, 0, 0}, + {R_028244_PA_SC_GENERIC_SCISSOR_BR, 0, 0, 0}, + {R_028250_PA_SC_VPORT_SCISSOR_0_TL, 0, 0, 0}, + {R_028254_PA_SC_VPORT_SCISSOR_0_BR, 0, 0, 0}, + {R_028350_SX_MISC, 0, 0, 0}, + {R_028380_SQ_VTX_SEMANTIC_0, 0, 0, 0}, + {R_028384_SQ_VTX_SEMANTIC_1, 0, 0, 0}, + {R_028388_SQ_VTX_SEMANTIC_2, 0, 0, 0}, + {R_02838C_SQ_VTX_SEMANTIC_3, 0, 0, 0}, + {R_028390_SQ_VTX_SEMANTIC_4, 0, 0, 0}, + {R_028394_SQ_VTX_SEMANTIC_5, 0, 0, 0}, + {R_028398_SQ_VTX_SEMANTIC_6, 0, 0, 0}, + {R_02839C_SQ_VTX_SEMANTIC_7, 0, 0, 0}, + {R_0283A0_SQ_VTX_SEMANTIC_8, 0, 0, 0}, + {R_0283A4_SQ_VTX_SEMANTIC_9, 0, 0, 0}, + {R_0283A8_SQ_VTX_SEMANTIC_10, 0, 0, 0}, + {R_0283AC_SQ_VTX_SEMANTIC_11, 0, 0, 0}, + {R_0283B0_SQ_VTX_SEMANTIC_12, 0, 0, 0}, + {R_0283B4_SQ_VTX_SEMANTIC_13, 0, 0, 0}, + {R_0283B8_SQ_VTX_SEMANTIC_14, 0, 0, 0}, + {R_0283BC_SQ_VTX_SEMANTIC_15, 0, 0, 0}, + {R_0283C0_SQ_VTX_SEMANTIC_16, 0, 0, 0}, + {R_0283C4_SQ_VTX_SEMANTIC_17, 0, 0, 0}, + {R_0283C8_SQ_VTX_SEMANTIC_18, 0, 0, 0}, + {R_0283CC_SQ_VTX_SEMANTIC_19, 0, 0, 0}, + {R_0283D0_SQ_VTX_SEMANTIC_20, 0, 0, 0}, + {R_0283D4_SQ_VTX_SEMANTIC_21, 0, 0, 0}, + {R_0283D8_SQ_VTX_SEMANTIC_22, 0, 0, 0}, + {R_0283DC_SQ_VTX_SEMANTIC_23, 0, 0, 0}, + {R_0283E0_SQ_VTX_SEMANTIC_24, 0, 0, 0}, + {R_0283E4_SQ_VTX_SEMANTIC_25, 0, 0, 0}, + {R_0283E8_SQ_VTX_SEMANTIC_26, 0, 0, 0}, + {R_0283EC_SQ_VTX_SEMANTIC_27, 0, 0, 0}, + {R_0283F0_SQ_VTX_SEMANTIC_28, 0, 0, 0}, + {R_0283F4_SQ_VTX_SEMANTIC_29, 0, 0, 0}, + {R_0283F8_SQ_VTX_SEMANTIC_30, 0, 0, 0}, + {R_0283FC_SQ_VTX_SEMANTIC_31, 0, 0, 0}, + {R_0282D0_PA_SC_VPORT_ZMIN_0, 0, 0, 0}, + {R_0282D4_PA_SC_VPORT_ZMAX_0, 0, 0, 0}, + {R_028400_VGT_MAX_VTX_INDX, 0, 0, 0}, + {R_028404_VGT_MIN_VTX_INDX, 0, 0, 0}, + {R_028408_VGT_INDX_OFFSET, 0, 0, 0}, + {R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0, 0}, + {R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0}, + {R_028414_CB_BLEND_RED, 0, 0, 0}, + {R_028418_CB_BLEND_GREEN, 0, 0, 0}, + {R_02841C_CB_BLEND_BLUE, 0, 0, 0}, + {R_028420_CB_BLEND_ALPHA, 0, 0, 0}, + {R_028430_DB_STENCILREFMASK, 0, 0, 0}, + {R_028434_DB_STENCILREFMASK_BF, 0, 0, 0}, + {R_028438_SX_ALPHA_REF, 0, 0, 0}, + {R_02843C_PA_CL_VPORT_XSCALE_0, 0, 0, 0}, + {R_028440_PA_CL_VPORT_XOFFSET_0, 0, 0, 0}, + {R_028444_PA_CL_VPORT_YSCALE_0, 0, 0, 0}, + {R_028448_PA_CL_VPORT_YOFFSET_0, 0, 0, 0}, + {R_02844C_PA_CL_VPORT_ZSCALE_0, 0, 0, 0}, + {R_028450_PA_CL_VPORT_ZOFFSET_0, 0, 0, 0}, + {R_0285BC_PA_CL_UCP0_X, 0, 0, 0}, + {R_0285C0_PA_CL_UCP0_Y, 0, 0, 0}, + {R_0285C4_PA_CL_UCP0_Z, 0, 0, 0}, + {R_0285C8_PA_CL_UCP0_W, 0, 0, 0}, + {R_0285CC_PA_CL_UCP1_X, 0, 0, 0}, + {R_0285D0_PA_CL_UCP1_Y, 0, 0, 0}, + {R_0285D4_PA_CL_UCP1_Z, 0, 0, 0}, + {R_0285D8_PA_CL_UCP1_W, 0, 0, 0}, + {R_0285DC_PA_CL_UCP2_X, 0, 0, 0}, + {R_0285E0_PA_CL_UCP2_Y, 0, 0, 0}, + {R_0285E4_PA_CL_UCP2_Z, 0, 0, 0}, + {R_0285E8_PA_CL_UCP2_W, 0, 0, 0}, + {R_0285EC_PA_CL_UCP3_X, 0, 0, 0}, + {R_0285F0_PA_CL_UCP3_Y, 0, 0, 0}, + {R_0285F4_PA_CL_UCP3_Z, 0, 0, 0}, + {R_0285F8_PA_CL_UCP3_W, 0, 0, 0}, + {R_0285FC_PA_CL_UCP4_X, 0, 0, 0}, + {R_028600_PA_CL_UCP4_Y, 0, 0, 0}, + {R_028604_PA_CL_UCP4_Z, 0, 0, 0}, + {R_028608_PA_CL_UCP4_W, 0, 0, 0}, + {R_02860C_PA_CL_UCP5_X, 0, 0, 0}, + {R_028610_PA_CL_UCP5_Y, 0, 0, 0}, + {R_028614_PA_CL_UCP5_Z, 0, 0, 0}, + {R_028618_PA_CL_UCP5_W, 0, 0, 0}, + {R_02861C_SPI_VS_OUT_ID_0, 0, 0, 0}, + {R_028620_SPI_VS_OUT_ID_1, 0, 0, 0}, + {R_028624_SPI_VS_OUT_ID_2, 0, 0, 0}, + {R_028628_SPI_VS_OUT_ID_3, 0, 0, 0}, + {R_02862C_SPI_VS_OUT_ID_4, 0, 0, 0}, + {R_028630_SPI_VS_OUT_ID_5, 0, 0, 0}, + {R_028634_SPI_VS_OUT_ID_6, 0, 0, 0}, + {R_028638_SPI_VS_OUT_ID_7, 0, 0, 0}, + {R_02863C_SPI_VS_OUT_ID_8, 0, 0, 0}, + {R_028640_SPI_VS_OUT_ID_9, 0, 0, 0}, + {R_028644_SPI_PS_INPUT_CNTL_0, 0, 0, 0}, + {R_028648_SPI_PS_INPUT_CNTL_1, 0, 0, 0}, + {R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0, 0}, + {R_028650_SPI_PS_INPUT_CNTL_3, 0, 0, 0}, + {R_028654_SPI_PS_INPUT_CNTL_4, 0, 0, 0}, + {R_028658_SPI_PS_INPUT_CNTL_5, 0, 0, 0}, + {R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0, 0}, + {R_028660_SPI_PS_INPUT_CNTL_7, 0, 0, 0}, + {R_028664_SPI_PS_INPUT_CNTL_8, 0, 0, 0}, + {R_028668_SPI_PS_INPUT_CNTL_9, 0, 0, 0}, + {R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0, 0}, + {R_028670_SPI_PS_INPUT_CNTL_11, 0, 0, 0}, + {R_028674_SPI_PS_INPUT_CNTL_12, 0, 0, 0}, + {R_028678_SPI_PS_INPUT_CNTL_13, 0, 0, 0}, + {R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0, 0}, + {R_028680_SPI_PS_INPUT_CNTL_15, 0, 0, 0}, + {R_028684_SPI_PS_INPUT_CNTL_16, 0, 0, 0}, + {R_028688_SPI_PS_INPUT_CNTL_17, 0, 0, 0}, + {R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0, 0}, + {R_028690_SPI_PS_INPUT_CNTL_19, 0, 0, 0}, + {R_028694_SPI_PS_INPUT_CNTL_20, 0, 0, 0}, + {R_028698_SPI_PS_INPUT_CNTL_21, 0, 0, 0}, + {R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0, 0}, + {R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0, 0}, + {R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0, 0}, + {R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0, 0}, + {R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0, 0}, + {R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0, 0}, + {R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0, 0}, + {R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0, 0}, + {R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0, 0}, + {R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0, 0}, + {R_0286C4_SPI_VS_OUT_CONFIG, 0, 0, 0}, + {R_0286C8_SPI_THREAD_GROUPING, 0, 0, 0}, + {R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0, 0}, + {R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0, 0}, + {R_0286D4_SPI_INTERP_CONTROL_0, 0, 0, 0}, + {R_0286D8_SPI_INPUT_Z, 0, 0, 0}, + {R_0286DC_SPI_FOG_CNTL, 0, 0, 0}, + {R_0286E0_SPI_BARYC_CNTL, 0, 0, 0}, + {R_0286E4_SPI_PS_IN_CONTROL_2, 0, 0, 0}, + {R_0286E8_SPI_COMPUTE_INPUT_CNTL, 0, 0, 0}, + {R_028780_CB_BLEND0_CONTROL, 0, 0, 0}, + {R_028784_CB_BLEND1_CONTROL, 0, 0, 0}, + {R_028788_CB_BLEND2_CONTROL, 0, 0, 0}, + {R_02878C_CB_BLEND3_CONTROL, 0, 0, 0}, + {R_028790_CB_BLEND4_CONTROL, 0, 0, 0}, + {R_028794_CB_BLEND5_CONTROL, 0, 0, 0}, + {R_028798_CB_BLEND6_CONTROL, 0, 0, 0}, + {R_02879C_CB_BLEND7_CONTROL, 0, 0, 0}, + {R_028800_DB_DEPTH_CONTROL, 0, 0, 0}, + {CM_R_028804_DB_EQAA, 0, 0, 0}, + {R_028808_CB_COLOR_CONTROL, 0, 0, 0}, + {R_02880C_DB_SHADER_CONTROL, 0, 0, 0}, + {R_028810_PA_CL_CLIP_CNTL, 0, 0, 0}, + {R_028814_PA_SU_SC_MODE_CNTL, 0, 0, 0}, + {R_028818_PA_CL_VTE_CNTL, 0, 0, 0}, + {R_02881C_PA_CL_VS_OUT_CNTL, 0, 0, 0}, + {R_028820_PA_CL_NANINF_CNTL, 0, 0, 0}, + {R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, 0, 0, 0}, + {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {R_028844_SQ_PGM_RESOURCES_PS, 0, 0, 0}, + {R_028848_SQ_PGM_RESOURCES_2_PS, 0, 0, 0}, + {R_02884C_SQ_PGM_EXPORTS_PS, 0, 0, 0}, + {R_02885C_SQ_PGM_START_VS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {R_028860_SQ_PGM_RESOURCES_VS, 0, 0, 0}, + {R_028864_SQ_PGM_RESOURCES_2_VS, 0, 0, 0}, + {R_0288A4_SQ_PGM_START_FS, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {R_0288A8_SQ_PGM_RESOURCES_FS, 0, 0, 0}, + {CM_R_0288E8_SQ_LDS_ALLOC, 0, 0, 0}, + {R_0288EC_SQ_LDS_ALLOC_PS, 0, 0, 0}, + {R_028900_SQ_ESGS_RING_ITEMSIZE, 0, 0, 0}, + {R_028904_SQ_GSVS_RING_ITEMSIZE, 0, 0, 0}, + {R_028908_SQ_ESTMP_RING_ITEMSIZE, 0, 0, 0}, + {R_02890C_SQ_GSTMP_RING_ITEMSIZE, 0, 0, 0}, + {R_028910_SQ_VSTMP_RING_ITEMSIZE, 0, 0, 0}, + {R_028914_SQ_PSTMP_RING_ITEMSIZE, 0, 0, 0}, + {R_02891C_SQ_GS_VERT_ITEMSIZE, 0, 0, 0}, + {R_028920_SQ_GS_VERT_ITEMSIZE_1, 0, 0, 0}, + {R_028924_SQ_GS_VERT_ITEMSIZE_2, 0, 0, 0}, + {R_028928_SQ_GS_VERT_ITEMSIZE_3, 0, 0, 0}, + {R_028940_ALU_CONST_CACHE_PS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {R_028980_ALU_CONST_CACHE_VS_0, REG_FLAG_NEED_BO, S_0085F0_SH_ACTION_ENA(1), 0xFFFFFFFF}, + {R_028A00_PA_SU_POINT_SIZE, 0, 0, 0}, + {R_028A04_PA_SU_POINT_MINMAX, 0, 0, 0}, + {R_028A08_PA_SU_LINE_CNTL, 0, 0, 0}, + {R_028A10_VGT_OUTPUT_PATH_CNTL, 0, 0, 0}, + {R_028A14_VGT_HOS_CNTL, 0, 0, 0}, + {R_028A18_VGT_HOS_MAX_TESS_LEVEL, 0, 0, 0}, + {R_028A1C_VGT_HOS_MIN_TESS_LEVEL, 0, 0, 0}, + {R_028A20_VGT_HOS_REUSE_DEPTH, 0, 0, 0}, + {R_028A24_VGT_GROUP_PRIM_TYPE, 0, 0, 0}, + {R_028A28_VGT_GROUP_FIRST_DECR, 0, 0, 0}, + {R_028A2C_VGT_GROUP_DECR, 0, 0, 0}, + {R_028A30_VGT_GROUP_VECT_0_CNTL, 0, 0, 0}, + {R_028A34_VGT_GROUP_VECT_1_CNTL, 0, 0, 0}, + {R_028A38_VGT_GROUP_VECT_0_FMT_CNTL, 0, 0, 0}, + {R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL, 0, 0, 0}, + {R_028A40_VGT_GS_MODE, 0, 0, 0}, + {R_028A48_PA_SC_MODE_CNTL_0, 0, 0, 0}, + {R_028A4C_PA_SC_MODE_CNTL_1, 0, 0, 0}, + {R_028AB4_VGT_REUSE_OFF, 0, 0, 0}, + {R_028AB8_VGT_VTX_CNT_EN, 0, 0, 0}, + {R_028ABC_DB_HTILE_SURFACE, 0, 0, 0}, + {R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0, 0, 0}, + {R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0, 0, 0}, + {R_028AC8_DB_PRELOAD_CONTROL, 0, 0, 0}, + {R_028B54_VGT_SHADER_STAGES_EN, 0, 0, 0}, + {R_028B70_DB_ALPHA_TO_MASK, 0, 0, 0}, + {R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 0, 0, 0}, + {R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 0, 0, 0}, + {R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, 0, 0, 0}, + {R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, 0, 0, 0}, + {R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, 0, 0, 0}, + {R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, 0, 0, 0}, + {R_028B94_VGT_STRMOUT_CONFIG, 0, 0, 0}, + {R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0, 0, 0}, + {CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0, 0, 0}, + {CM_R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0, 0, 0}, + {CM_R_028BDC_PA_SC_LINE_CNTL, 0, 0, 0}, + {CM_R_028BE0_PA_SC_AA_CONFIG, 0, 0, 0}, + {CM_R_028BE4_PA_SU_VTX_CNTL, 0, 0, 0}, + {CM_R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0, 0, 0}, + {CM_R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0, 0, 0}, + {CM_R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0, 0, 0}, + {CM_R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, 0, 0, 0}, + {CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0, 0, 0}, + {CM_R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, 0, 0, 0}, + {CM_R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2, 0, 0, 0}, + {CM_R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3, 0, 0, 0}, + {CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0, 0, 0}, + {CM_R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, 0, 0, 0}, + {CM_R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2, 0, 0, 0}, + {CM_R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3, 0, 0, 0}, + {CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0, 0, 0}, + {CM_R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, 0, 0, 0}, + {CM_R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2, 0, 0, 0}, + {CM_R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3, 0, 0, 0}, + {CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0, 0, 0}, + {CM_R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, 0, 0, 0}, + {CM_R_028C30_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2, 0, 0, 0}, + {CM_R_028C34_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3, 0, 0, 0}, + {CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 0, 0, 0}, + {CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028C60_CB_COLOR0_BASE, REG_FLAG_NEED_BO, 0, 0}, + {R_028C64_CB_COLOR0_PITCH, 0, 0, 0}, + {R_028C68_CB_COLOR0_SLICE, 0, 0, 0}, + {R_028C6C_CB_COLOR0_VIEW, 0, 0, 0}, + {R_028C70_CB_COLOR0_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, + {R_028C74_CB_COLOR0_ATTRIB, REG_FLAG_NEED_BO, 0, 0}, + {R_028C78_CB_COLOR0_DIM, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028C9C_CB_COLOR1_BASE, REG_FLAG_NEED_BO, 0, 0}, + {R_028CA0_CB_COLOR1_PITCH, 0, 0, 0}, + {R_028CA4_CB_COLOR1_SLICE, 0, 0, 0}, + {R_028CA8_CB_COLOR1_VIEW, 0, 0, 0}, + {R_028CAC_CB_COLOR1_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, + {R_028CB0_CB_COLOR1_ATTRIB, REG_FLAG_NEED_BO, 0, 0}, + {R_028CB4_CB_COLOR1_DIM, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028CD8_CB_COLOR2_BASE, REG_FLAG_NEED_BO, 0, 0}, + {R_028CDC_CB_COLOR2_PITCH, 0, 0, 0}, + {R_028CE0_CB_COLOR2_SLICE, 0, 0, 0}, + {R_028CE4_CB_COLOR2_VIEW, 0, 0, 0}, + {R_028CE8_CB_COLOR2_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, + {R_028CEC_CB_COLOR2_ATTRIB, REG_FLAG_NEED_BO, 0, 0}, + {R_028CF0_CB_COLOR2_DIM, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028D14_CB_COLOR3_BASE, REG_FLAG_NEED_BO, 0, 0}, + {R_028D18_CB_COLOR3_PITCH, 0, 0, 0}, + {R_028D1C_CB_COLOR3_SLICE, 0, 0, 0}, + {R_028D20_CB_COLOR3_VIEW, 0, 0, 0}, + {R_028D24_CB_COLOR3_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, + {R_028D28_CB_COLOR3_ATTRIB, REG_FLAG_NEED_BO, 0, 0}, + {R_028D2C_CB_COLOR3_DIM, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028D50_CB_COLOR4_BASE, REG_FLAG_NEED_BO, 0, 0}, + {R_028D54_CB_COLOR4_PITCH, 0, 0, 0}, + {R_028D58_CB_COLOR4_SLICE, 0, 0, 0}, + {R_028D5C_CB_COLOR4_VIEW, 0, 0, 0}, + {R_028D60_CB_COLOR4_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, + {R_028D64_CB_COLOR4_ATTRIB, REG_FLAG_NEED_BO, 0, 0}, + {R_028D68_CB_COLOR4_DIM, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028D8C_CB_COLOR5_BASE, REG_FLAG_NEED_BO, 0, 0}, + {R_028D90_CB_COLOR5_PITCH, 0, 0, 0}, + {R_028D94_CB_COLOR5_SLICE, 0, 0, 0}, + {R_028D98_CB_COLOR5_VIEW, 0, 0, 0}, + {R_028D9C_CB_COLOR5_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, + {R_028DA0_CB_COLOR5_ATTRIB, REG_FLAG_NEED_BO, 0, 0}, + {R_028DA4_CB_COLOR5_DIM, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028DC8_CB_COLOR6_BASE, REG_FLAG_NEED_BO, 0, 0}, + {R_028DCC_CB_COLOR6_PITCH, 0, 0, 0}, + {R_028DD0_CB_COLOR6_SLICE, 0, 0, 0}, + {R_028DD4_CB_COLOR6_VIEW, 0, 0, 0}, + {R_028DD8_CB_COLOR6_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, + {R_028DDC_CB_COLOR6_ATTRIB, REG_FLAG_NEED_BO, 0, 0}, + {R_028DE0_CB_COLOR6_DIM, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028E04_CB_COLOR7_BASE, REG_FLAG_NEED_BO, 0, 0}, + {R_028E08_CB_COLOR7_PITCH, 0, 0, 0}, + {R_028E0C_CB_COLOR7_SLICE, 0, 0, 0}, + {R_028E10_CB_COLOR7_VIEW, 0, 0, 0}, + {R_028E14_CB_COLOR7_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, + {R_028E18_CB_COLOR7_ATTRIB, REG_FLAG_NEED_BO, 0, 0}, + {R_028E1C_CB_COLOR7_DIM, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028E40_CB_COLOR8_BASE, REG_FLAG_NEED_BO, 0, 0}, + {R_028E44_CB_COLOR8_PITCH, 0, 0, 0}, + {R_028E48_CB_COLOR8_SLICE, 0, 0, 0}, + {R_028E4C_CB_COLOR8_VIEW, 0, 0, 0}, + {R_028E50_CB_COLOR8_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, + {R_028E54_CB_COLOR8_ATTRIB, REG_FLAG_NEED_BO, 0, 0}, + {R_028E58_CB_COLOR8_DIM, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028E5C_CB_COLOR9_BASE, REG_FLAG_NEED_BO, 0, 0}, + {R_028E60_CB_COLOR9_PITCH, 0, 0, 0}, + {R_028E64_CB_COLOR9_SLICE, 0, 0, 0}, + {R_028E68_CB_COLOR9_VIEW, 0, 0, 0}, + {R_028E6C_CB_COLOR9_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, + {R_028E70_CB_COLOR9_ATTRIB, REG_FLAG_NEED_BO, 0, 0}, + {R_028E74_CB_COLOR9_DIM, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028E78_CB_COLOR10_BASE, REG_FLAG_NEED_BO, 0, 0}, + {R_028E7C_CB_COLOR10_PITCH, 0, 0, 0}, + {R_028E80_CB_COLOR10_SLICE, 0, 0, 0}, + {R_028E84_CB_COLOR10_VIEW, 0, 0, 0}, + {R_028E88_CB_COLOR10_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, + {R_028E8C_CB_COLOR10_ATTRIB, REG_FLAG_NEED_BO, 0, 0}, + {R_028E90_CB_COLOR10_DIM, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, + {R_028E94_CB_COLOR11_BASE, REG_FLAG_NEED_BO, 0, 0}, + {R_028E98_CB_COLOR11_PITCH, 0, 0, 0}, + {R_028E9C_CB_COLOR11_SLICE, 0, 0, 0}, + {R_028EA0_CB_COLOR11_VIEW, 0, 0, 0}, + {R_028EA4_CB_COLOR11_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF}, + {R_028EA8_CB_COLOR11_ATTRIB, REG_FLAG_NEED_BO, 0, 0}, + {R_028EAC_CB_COLOR11_DIM, 0, 0, 0}, +}; + /* SHADER RESOURCE R600/R700 */ static int evergreen_state_resource_init(struct r600_context *ctx, u32 offset) { @@ -483,8 +878,8 @@ static int evergreen_state_sampler_border_init(struct r600_context *ctx, u32 off return r; } /* set proper offset */ - range = &ctx->range[CTX_RANGE_ID(ctx, r600_shader_sampler_border[0].offset)]; - block = range->blocks[CTX_BLOCK_ID(ctx, r600_shader_sampler_border[0].offset)]; + range = &ctx->range[CTX_RANGE_ID(r600_shader_sampler_border[0].offset)]; + block = range->blocks[CTX_BLOCK_ID(r600_shader_sampler_border[0].offset)]; block->pm4[1] = (offset - EVERGREEN_CONFIG_REG_OFFSET) >> 2; return 0; } @@ -519,12 +914,20 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) } /* add blocks */ - r = r600_context_add_block(ctx, evergreen_config_reg_list, - Elements(evergreen_config_reg_list), PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET); + if (r600_get_family(radeon) == CHIP_CAYMAN) + r = r600_context_add_block(ctx, cayman_config_reg_list, + Elements(cayman_config_reg_list), PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET); + else + r = r600_context_add_block(ctx, evergreen_config_reg_list, + Elements(evergreen_config_reg_list), PKT3_SET_CONFIG_REG, EVERGREEN_CONFIG_REG_OFFSET); if (r) goto out_err; - r = r600_context_add_block(ctx, evergreen_context_reg_list, - Elements(evergreen_context_reg_list), PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET); + if (r600_get_family(radeon) == CHIP_CAYMAN) + r = r600_context_add_block(ctx, cayman_context_reg_list, + Elements(cayman_context_reg_list), PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET); + else + r = r600_context_add_block(ctx, evergreen_context_reg_list, + Elements(evergreen_context_reg_list), PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET); if (r) goto out_err; r = r600_context_add_block(ctx, evergreen_ctl_const_list, @@ -603,6 +1006,8 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) r = -ENOMEM; goto out_err; } + + r600_init_cs(ctx); /* save 16dwords space for fence mecanism */ ctx->pm4_ndwords -= 16; @@ -646,8 +1051,8 @@ static inline void evergreen_context_pipe_state_set_sampler(struct r600_context int i; int dirty; - range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; - block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; + range = &ctx->range[CTX_RANGE_ID(offset)]; + block = range->blocks[CTX_BLOCK_ID(offset)]; if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); LIST_DELINIT(&block->list); @@ -684,8 +1089,8 @@ static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_c int i; int dirty; - range = &ctx->range[CTX_RANGE_ID(ctx, fake_offset)]; - block = range->blocks[CTX_BLOCK_ID(ctx, fake_offset)]; + range = &ctx->range[CTX_RANGE_ID(fake_offset)]; + block = range->blocks[CTX_BLOCK_ID(fake_offset)]; if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); LIST_DELINIT(&block->list); diff --git a/src/gallium/winsys/r600/drm/r600_drm.c b/src/gallium/winsys/r600/drm/r600_drm.c index 311324f4f71..03fe385334c 100644 --- a/src/gallium/winsys/r600/drm/r600_drm.c +++ b/src/gallium/winsys/r600/drm/r600_drm.c @@ -320,6 +320,8 @@ static struct radeon *radeon_new(int fd, unsigned device) case CHIP_CYPRESS: case CHIP_HEMLOCK: case CHIP_PALM: + case CHIP_SUMO: + case CHIP_SUMO2: case CHIP_BARTS: case CHIP_TURKS: case CHIP_CAICOS: @@ -327,6 +329,11 @@ static struct radeon *radeon_new(int fd, unsigned device) /* set default group bytes, overridden by tiling info ioctl */ radeon->tiling_info.group_bytes = 512; break; + case CHIP_CAYMAN: + radeon->chip_class = CAYMAN; + /* set default group bytes, overridden by tiling info ioctl */ + radeon->tiling_info.group_bytes = 512; + break; default: fprintf(stderr, "%s unknown or unsupported chipset 0x%04X\n", __func__, radeon->device); diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index 0618d2329e6..af80aa67a44 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -40,6 +40,19 @@ #define GROUP_FORCE_NEW_BLOCK 0 +void r600_init_cs(struct r600_context *ctx) +{ + /* R6xx requires this packet at the start of each command buffer */ + if (ctx->radeon->family < CHIP_RV770) { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_START_3D_CMDBUF, 0, 0); + ctx->pm4[ctx->pm4_cdwords++] = 0x00000000; + } + /* All asics require this one */ + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_CONTEXT_CONTROL, 1, 0); + ctx->pm4[ctx->pm4_cdwords++] = 0x80000000; + ctx->pm4[ctx->pm4_cdwords++] = 0x80000000; +} + static void INLINE r600_context_update_fenced_list(struct r600_context *ctx) { for (int i = 0; i < ctx->creloc; i++) { @@ -82,6 +95,12 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, continue; } + /* ignore regs not on R600 on R600 */ + if ((reg[i].flags & REG_FLAG_NOT_R600) && ctx->radeon->family == CHIP_R600) { + n = 1; + continue; + } + /* register that need relocation are in their own group */ /* find number of consecutive registers */ n = 0; @@ -102,14 +121,14 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, } ctx->nblocks++; for (int j = 0; j < n; j++) { - range = &ctx->range[CTX_RANGE_ID(ctx, reg[i + j].offset)]; + range = &ctx->range[CTX_RANGE_ID(reg[i + j].offset)]; /* create block table if it doesn't exist */ if (!range->blocks) range->blocks = calloc(1 << HASH_SHIFT, sizeof(void *)); if (!range->blocks) return -1; - range->blocks[CTX_BLOCK_ID(ctx, reg[i + j].offset)] = block; + range->blocks[CTX_BLOCK_ID(reg[i + j].offset)] = block; } /* initialize block */ @@ -321,14 +340,14 @@ static const struct r600_reg r600_context_reg_list[] = { {R_0286DC_SPI_FOG_CNTL, 0, 0, 0}, {R_0286E0_SPI_FOG_FUNC_SCALE, 0, 0, 0}, {R_0286E4_SPI_FOG_FUNC_BIAS, 0, 0, 0}, - {R_028780_CB_BLEND0_CONTROL, 0, 0, 0}, - {R_028784_CB_BLEND1_CONTROL, 0, 0, 0}, - {R_028788_CB_BLEND2_CONTROL, 0, 0, 0}, - {R_02878C_CB_BLEND3_CONTROL, 0, 0, 0}, - {R_028790_CB_BLEND4_CONTROL, 0, 0, 0}, - {R_028794_CB_BLEND5_CONTROL, 0, 0, 0}, - {R_028798_CB_BLEND6_CONTROL, 0, 0, 0}, - {R_02879C_CB_BLEND7_CONTROL, 0, 0, 0}, + {R_028780_CB_BLEND0_CONTROL, REG_FLAG_NOT_R600, 0, 0}, + {R_028784_CB_BLEND1_CONTROL, REG_FLAG_NOT_R600, 0, 0}, + {R_028788_CB_BLEND2_CONTROL, REG_FLAG_NOT_R600, 0, 0}, + {R_02878C_CB_BLEND3_CONTROL, REG_FLAG_NOT_R600, 0, 0}, + {R_028790_CB_BLEND4_CONTROL, REG_FLAG_NOT_R600, 0, 0}, + {R_028794_CB_BLEND5_CONTROL, REG_FLAG_NOT_R600, 0, 0}, + {R_028798_CB_BLEND6_CONTROL, REG_FLAG_NOT_R600, 0, 0}, + {R_02879C_CB_BLEND7_CONTROL, REG_FLAG_NOT_R600, 0, 0}, {R_0287A0_CB_SHADER_CONTROL, 0, 0, 0}, {R_028800_DB_DEPTH_CONTROL, 0, 0, 0}, {R_028804_CB_BLEND_CONTROL, 0, 0, 0}, @@ -624,8 +643,8 @@ void r600_context_fini(struct r600_context *ctx) block = ctx->range[i].blocks[j]; if (block) { for (int k = 0, offset = block->start_offset; k < block->nreg; k++, offset += 4) { - range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; - range->blocks[CTX_BLOCK_ID(ctx, offset)] = NULL; + range = &ctx->range[CTX_RANGE_ID(offset)]; + range->blocks[CTX_BLOCK_ID(offset)] = NULL; } for (int k = 1; k <= block->nbo; k++) { r600_bo_reference(ctx->radeon, &block->reloc[k].bo, NULL); @@ -774,6 +793,8 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) r = -ENOMEM; goto out_err; } + + r600_init_cs(ctx); /* save 16dwords space for fence mecanism */ ctx->pm4_ndwords -= 16; @@ -886,8 +907,8 @@ void r600_context_reg(struct r600_context *ctx, unsigned new_val; int dirty; - range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; - block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; + range = &ctx->range[CTX_RANGE_ID(offset)]; + block = range->blocks[CTX_BLOCK_ID(offset)]; id = (offset - block->start_offset) >> 2; dirty = block->status & R600_BLOCK_STATUS_DIRTY; @@ -919,7 +940,6 @@ void r600_context_dirty_block(struct r600_context *ctx, struct r600_block *block void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state) { - struct r600_range *range; struct r600_block *block; unsigned new_val; int dirty; @@ -927,9 +947,8 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat unsigned id, reloc_id; struct r600_pipe_reg *reg = &state->regs[i]; - range = &ctx->range[CTX_RANGE_ID(ctx, reg->offset)]; - block = range->blocks[CTX_BLOCK_ID(ctx, reg->offset)]; - id = (reg->offset - block->start_offset) >> 2; + block = reg->block; + id = reg->id; dirty = block->status & R600_BLOCK_STATUS_DIRTY; @@ -963,8 +982,8 @@ void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_ int dirty; int num_regs = ctx->radeon->chip_class >= EVERGREEN ? 8 : 7; - range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; - block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; + range = &ctx->range[CTX_RANGE_ID(offset)]; + block = range->blocks[CTX_BLOCK_ID(offset)]; if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); if (block->reloc[1].bo) @@ -1057,8 +1076,8 @@ static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, int i; int dirty; - range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; - block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; + range = &ctx->range[CTX_RANGE_ID(offset)]; + block = range->blocks[CTX_BLOCK_ID(offset)]; if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); LIST_DELINIT(&block->list); @@ -1093,8 +1112,8 @@ static inline void r600_context_pipe_state_set_sampler_border(struct r600_contex int i; int dirty; - range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; - block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; + range = &ctx->range[CTX_RANGE_ID(offset)]; + block = range->blocks[CTX_BLOCK_ID(offset)]; if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); LIST_DELINIT(&block->list); @@ -1146,8 +1165,8 @@ struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset) struct r600_block *block; unsigned id; - range = &ctx->range[CTX_RANGE_ID(ctx, offset)]; - block = range->blocks[CTX_BLOCK_ID(ctx, offset)]; + range = &ctx->range[CTX_RANGE_ID(offset)]; + block = range->blocks[CTX_BLOCK_ID(offset)]; offset -= block->start_offset; id = block->pm4_bo_index[offset >> 2]; if (block->reloc[id].bo) { @@ -1159,11 +1178,16 @@ struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset) void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block) { int id; + int optional = block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS); + int cp_dwords = block->pm4_ndwords, start_dword; + int new_dwords; - if (block->nreg_dirty == 0 && block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS)) { + if (block->nreg_dirty == 0 && optional) { goto out; } + optional &= (block->nreg_dirty != block->nreg); + ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH; for (int j = 0; j < block->nreg; j++) { if (block->pm4_bo_index[j]) { @@ -1181,18 +1205,22 @@ void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block * } } ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; - memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, block->pm4_ndwords * 4); - ctx->pm4_cdwords += block->pm4_ndwords; - - if (block->nreg_dirty != block->nreg && block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS)) { - int new_dwords = block->nreg_dirty; - uint32_t oldword, newword; - ctx->pm4_cdwords -= block->pm4_ndwords; - newword = oldword = ctx->pm4[ctx->pm4_cdwords]; + + if (optional) { + new_dwords = block->nreg_dirty; + start_dword = ctx->pm4_cdwords; + cp_dwords = new_dwords + 2; + } + memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, cp_dwords * 4); + ctx->pm4_cdwords += cp_dwords; + + if (optional) { + uint32_t newword; + + newword = ctx->pm4[start_dword]; newword &= PKT_COUNT_C; newword |= PKT_COUNT_S(new_dwords); - ctx->pm4[ctx->pm4_cdwords] = newword; - ctx->pm4_cdwords += new_dwords + 2; + ctx->pm4[start_dword] = newword; } out: block->status ^= R600_BLOCK_STATUS_DIRTY; @@ -1231,7 +1259,7 @@ void r600_context_flush_dest_caches(struct r600_context *ctx) 0, cb[i]); } if (db) { - r600_context_bo_flush(ctx, S_0085F0_DB_ACTION_ENA(1), 0, db); + r600_context_bo_flush(ctx, S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1), 0, db); } ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY; @@ -1381,6 +1409,8 @@ void r600_context_flush(struct r600_context *ctx) ctx->pm4_cdwords = 0; ctx->flags = 0; + r600_init_cs(ctx); + /* resume queries */ r600_context_queries_resume(ctx); diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 78b8190d6f5..9be5c358f85 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -65,6 +65,7 @@ struct radeon { #define REG_FLAG_NEED_BO 1 #define REG_FLAG_DIRTY_ALWAYS 2 #define REG_FLAG_RV6XX_SBU 4 +#define REG_FLAG_NOT_R600 8 struct r600_reg { unsigned offset; @@ -165,6 +166,7 @@ int r600_setup_block_table(struct r600_context *ctx); void r600_context_reg(struct r600_context *ctx, unsigned offset, unsigned value, unsigned mask); +void r600_init_cs(struct r600_context *ctx); /* * r600_bo.c */ @@ -187,16 +189,6 @@ struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr, * helpers */ -/* each range covers 9 bits of dword space = 512 dwords = 2k bytes */ -/* there is a block entry for each register so 512 blocks */ -/* we have no registers to read/write below 0x8000 (0x2000 in dw space) */ -/* we use some fake offsets at 0x40000 to do evergreen sampler borders so take 0x42000 as a max bound*/ -#define RANGE_OFFSET_START 0x8000 -#define HASH_SHIFT 9 -#define NUM_RANGES (0x42000 - RANGE_OFFSET_START) / (4 << HASH_SHIFT) /* 128 << 9 = 64k */ - -#define CTX_RANGE_ID(ctx, offset) ((((offset - RANGE_OFFSET_START) >> 2) >> HASH_SHIFT) & 255) -#define CTX_BLOCK_ID(ctx, offset) (((offset - RANGE_OFFSET_START) >> 2) & ((1 << HASH_SHIFT) - 1)) /* * radeon_bo.c diff --git a/src/gallium/winsys/r600/drm/radeon_pciid.c b/src/gallium/winsys/r600/drm/radeon_pciid.c index 35db37aa1fd..5c41a10bdba 100644 --- a/src/gallium/winsys/r600/drm/radeon_pciid.c +++ b/src/gallium/winsys/r600/drm/radeon_pciid.c @@ -439,6 +439,17 @@ static const struct pci_id radeon_pci_id[] = { {0x1002, 0x9614, CHIP_RS780}, {0x1002, 0x9615, CHIP_RS780}, {0x1002, 0x9616, CHIP_RS780}, + {0x1002, 0x9640, CHIP_SUMO}, + {0x1002, 0x9641, CHIP_SUMO}, + {0x1002, 0x9642, CHIP_SUMO2}, + {0x1002, 0x9643, CHIP_SUMO2}, + {0x1002, 0x9644, CHIP_SUMO2}, + {0x1002, 0x9645, CHIP_SUMO2}, + {0x1002, 0x9647, CHIP_SUMO}, + {0x1002, 0x9648, CHIP_SUMO}, + {0x1002, 0x964a, CHIP_SUMO}, + {0x1002, 0x964e, CHIP_SUMO}, + {0x1002, 0x964f, CHIP_SUMO}, {0x1002, 0x9710, CHIP_RS880}, {0x1002, 0x9711, CHIP_RS880}, {0x1002, 0x9712, CHIP_RS880}, @@ -451,6 +462,21 @@ static const struct pci_id radeon_pci_id[] = { {0x1002, 0x9805, CHIP_PALM}, {0x1002, 0x9806, CHIP_PALM}, {0x1002, 0x9807, CHIP_PALM}, + {0x1002, 0x6700, CHIP_CAYMAN}, + {0x1002, 0x6701, CHIP_CAYMAN}, + {0x1002, 0x6702, CHIP_CAYMAN}, + {0x1002, 0x6703, CHIP_CAYMAN}, + {0x1002, 0x6704, CHIP_CAYMAN}, + {0x1002, 0x6705, CHIP_CAYMAN}, + {0x1002, 0x6706, CHIP_CAYMAN}, + {0x1002, 0x6707, CHIP_CAYMAN}, + {0x1002, 0x6708, CHIP_CAYMAN}, + {0x1002, 0x6709, CHIP_CAYMAN}, + {0x1002, 0x6718, CHIP_CAYMAN}, + {0x1002, 0x6719, CHIP_CAYMAN}, + {0x1002, 0x671C, CHIP_CAYMAN}, + {0x1002, 0x671D, CHIP_CAYMAN}, + {0x1002, 0x671F, CHIP_CAYMAN}, {0x1002, 0x6720, CHIP_BARTS}, {0x1002, 0x6721, CHIP_BARTS}, {0x1002, 0x6722, CHIP_BARTS}, |