34 files changed, 1040 insertions, 1047 deletions
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 792f2827ee6..66e9a0b7819 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -297,109 +297,6 @@ struct radeon_saved_cs {
 	unsigned			bo_count;
 };
 
-struct r600_common_context {
-	struct pipe_context b; /* base class */
-
-	struct radeon_winsys		*ws;
-	struct radeon_winsys_ctx	*ctx;
-	enum radeon_family		family;
-	enum chip_class			chip_class;
-	struct radeon_winsys_cs		*gfx_cs;
-	struct radeon_winsys_cs		*dma_cs;
-	struct pipe_fence_handle	*last_gfx_fence;
-	struct pipe_fence_handle	*last_sdma_fence;
-	struct r600_resource		*eop_bug_scratch;
-	struct u_upload_mgr		*cached_gtt_allocator;
-	unsigned			num_gfx_cs_flushes;
-	unsigned			initial_gfx_cs_size;
-	unsigned			gpu_reset_counter;
-	unsigned			last_dirty_tex_counter;
-	unsigned			last_compressed_colortex_counter;
-	unsigned			last_num_draw_calls;
-
-	struct threaded_context		*tc;
-	struct u_suballocator		*allocator_zeroed_memory;
-	struct slab_child_pool		pool_transfers;
-	struct slab_child_pool		pool_transfers_unsync; /* for threaded_context */
-
-	/* Current unaccounted memory usage. */
-	uint64_t			vram;
-	uint64_t			gtt;
-
-	/* Additional context states. */
-	unsigned flags; /* flush flags */
-
-	/* Queries. */
-	/* Maintain the list of active queries for pausing between IBs. */
-	int				num_occlusion_queries;
-	int				num_perfect_occlusion_queries;
-	struct list_head		active_queries;
-	unsigned			num_cs_dw_queries_suspend;
-	/* Misc stats. */
-	unsigned			num_draw_calls;
-	unsigned			num_decompress_calls;
-	unsigned			num_mrt_draw_calls;
-	unsigned			num_prim_restart_calls;
-	unsigned			num_spill_draw_calls;
-	unsigned			num_compute_calls;
-	unsigned			num_spill_compute_calls;
-	unsigned			num_dma_calls;
-	unsigned			num_cp_dma_calls;
-	unsigned			num_vs_flushes;
-	unsigned			num_ps_flushes;
-	unsigned			num_cs_flushes;
-	unsigned			num_cb_cache_flushes;
-	unsigned			num_db_cache_flushes;
-	unsigned			num_L2_invalidates;
-	unsigned			num_L2_writebacks;
-	unsigned			num_resident_handles;
-	uint64_t			num_alloc_tex_transfer_bytes;
-	unsigned			last_tex_ps_draw_ratio; /* for query */
-
-	/* Render condition. */
-	struct r600_atom		render_cond_atom;
-	struct pipe_query		*render_cond;
-	unsigned			render_cond_mode;
-	bool				render_cond_invert;
-	bool				render_cond_force_off; /* for u_blitter */
-
-	/* Statistics gathering for the DCC enablement heuristic. It can't be
-	 * in r600_texture because r600_texture can be shared by multiple
-	 * contexts. This is for back buffers only. We shouldn't get too many
-	 * of those.
-	 *
-	 * X11 DRI3 rotates among a finite set of back buffers. They should
-	 * all fit in this array. If they don't, separate DCC might never be
-	 * enabled by DCC stat gathering.
-	 */
-	struct {
-		struct r600_texture		*tex;
-		/* Query queue: 0 = usually active, 1 = waiting, 2 = readback. */
-		struct pipe_query		*ps_stats[3];
-		/* If all slots are used and another slot is needed,
-		 * the least recently used slot is evicted based on this. */
-		int64_t				last_use_timestamp;
-		bool				query_active;
-	} dcc_stats[5];
-
-	struct pipe_device_reset_callback device_reset_callback;
-	struct u_log_context		*log;
-
-	void				*query_result_shader;
-
-	/* Copy one resource to another using async DMA. */
-	void (*dma_copy)(struct pipe_context *ctx,
-			 struct pipe_resource *dst,
-			 unsigned dst_level,
-			 unsigned dst_x, unsigned dst_y, unsigned dst_z,
-			 struct pipe_resource *src,
-			 unsigned src_level,
-			 const struct pipe_box *src_box);
-
-	void (*dma_clear_buffer)(struct si_context *sctx, struct pipe_resource *dst,
-				 uint64_t offset, uint64_t size, unsigned value);
-};
-
 /* r600_perfcounters.c */
 void si_perfcounters_destroy(struct si_screen *sscreen);
 
diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c
index 3a680a32449..92336ed377d 100644
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -1364,7 +1364,7 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte
 						      ruvd_set_dtb set_dtb)
 {
 	struct si_context *sctx = (struct si_context*)context;
-	struct radeon_winsys *ws = sctx->b.ws;
+	struct radeon_winsys *ws = sctx->ws;
 	unsigned dpb_size;
 	unsigned width = templ->width, height = templ->height;
 	unsigned bs_buf_size;
@@ -1411,12 +1411,12 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte
 	dec->base.end_frame = ruvd_end_frame;
 	dec->base.flush = ruvd_flush;
 
-	dec->stream_type = profile2stream_type(dec, sctx->b.family);
+	dec->stream_type = profile2stream_type(dec, sctx->family);
 	dec->set_dtb = set_dtb;
 	dec->stream_handle = si_vid_alloc_stream_handle();
 	dec->screen = context->screen;
 	dec->ws = ws;
-	dec->cs = ws->cs_create(sctx->b.ctx, RING_UVD, NULL, NULL);
+	dec->cs = ws->cs_create(sctx->ctx, RING_UVD, NULL, NULL);
 	if (!dec->cs) {
 		RVID_ERR("Can't get command submission context.\n");
 		goto error;
@@ -1424,7 +1424,7 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte
 
 	for (i = 0; i < 16; i++)
 		 dec->render_pic_list[i] = NULL;
-	dec->fb_size = (sctx->b.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA :
+	dec->fb_size = (sctx->family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA :
 			FB_BUFFER_SIZE;
 	bs_buf_size = width * height * (512 / (16 * 16));
 	for (i = 0; i < NUM_BUFFERS; ++i) {
@@ -1457,7 +1457,7 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte
 		si_vid_clear_buffer(context, &dec->dpb);
 	}
 
-	if (dec->stream_type == RUVD_CODEC_H264_PERF && sctx->b.family >= CHIP_POLARIS10) {
+	if (dec->stream_type == RUVD_CODEC_H264_PERF && sctx->family >= CHIP_POLARIS10) {
 		unsigned ctx_size = calc_ctx_size_h264_perf(dec);
 		if (!si_vid_create_buffer(dec->screen, &dec->ctx, ctx_size, PIPE_USAGE_DEFAULT)) {
 			RVID_ERR("Can't allocated context buffer.\n");
@@ -1466,7 +1466,7 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte
 		si_vid_clear_buffer(context, &dec->ctx);
 	}
 
-	if (sctx->b.family >= CHIP_POLARIS10 && sctx->screen->info.drm_minor >= 3) {
+	if (sctx->family >= CHIP_POLARIS10 && sctx->screen->info.drm_minor >= 3) {
 		if (!si_vid_create_buffer(dec->screen, &dec->sessionctx,
 					UVD_SESSION_CONTEXT_SIZE,
 					PIPE_USAGE_DEFAULT)) {
@@ -1476,7 +1476,7 @@ struct pipe_video_codec *si_common_uvd_create_decoder(struct pipe_context *conte
 		si_vid_clear_buffer(context, &dec->sessionctx);
 	}
 
-	if (sctx->b.family >= CHIP_VEGA10) {
+	if (sctx->family >= CHIP_VEGA10) {
 		dec->reg.data0 = RUVD_GPCOM_VCPU_DATA0_SOC15;
 		dec->reg.data1 = RUVD_GPCOM_VCPU_DATA1_SOC15;
 		dec->reg.cmd = RUVD_GPCOM_VCPU_CMD_SOC15;
diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c b/src/gallium/drivers/radeon/radeon_uvd_enc.c
index 94bd26a08b5..521d08f304c 100644
--- a/src/gallium/drivers/radeon/radeon_uvd_enc.c
+++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c
@@ -285,7 +285,7 @@ radeon_uvd_create_encoder(struct pipe_context *context,
                           radeon_uvd_enc_get_buffer get_buffer)
 {
    struct si_screen *sscreen = (struct si_screen *) context->screen;
-   struct r600_common_context *rctx = (struct r600_common_context *) context;
+   struct si_context *sctx = (struct si_context *) context;
    struct radeon_uvd_encoder *enc;
    struct pipe_video_buffer *tmp_buf, templat = { };
    struct radeon_surf *tmp_surf;
@@ -314,7 +314,7 @@ radeon_uvd_create_encoder(struct pipe_context *context,
    enc->screen = context->screen;
    enc->ws = ws;
    enc->cs =
-      ws->cs_create(rctx->ctx, RING_UVD_ENC, radeon_uvd_enc_cs_flush, enc);
+      ws->cs_create(sctx->ctx, RING_UVD_ENC, radeon_uvd_enc_cs_flush, enc);
 
    if (!enc->cs) {
       RVID_ERR("Can't get command submission context.\n");
diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c
index 9eac217c1c0..57b9d349582 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -437,7 +437,7 @@ struct pipe_video_codec *si_vce_create_encoder(struct pipe_context *context,
 
 	enc->screen = context->screen;
 	enc->ws = ws;
-	enc->cs = ws->cs_create(sctx->b.ctx, RING_VCE, rvce_cs_flush, enc);
+	enc->cs = ws->cs_create(sctx->ctx, RING_VCE, rvce_cs_flush, enc);
 	if (!enc->cs) {
 		RVID_ERR("Can't get command submission context.\n");
 		goto error;
diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.c b/src/gallium/drivers/radeon/radeon_vcn_dec.c
index db4e24f9d76..48d79ffb9f9 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_dec.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_dec.c
@@ -1199,7 +1199,7 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
 					     const struct pipe_video_codec *templ)
 {
 	struct si_context *sctx = (struct si_context*)context;
-	struct radeon_winsys *ws = sctx->b.ws;
+	struct radeon_winsys *ws = sctx->ws;
 	unsigned width = templ->width, height = templ->height;
 	unsigned dpb_size, bs_buf_size, stream_type = 0;
 	struct radeon_decoder *dec;
@@ -1253,7 +1253,7 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
 	dec->stream_handle = si_vid_alloc_stream_handle();
 	dec->screen = context->screen;
 	dec->ws = ws;
-	dec->cs = ws->cs_create(sctx->b.ctx, RING_VCN_DEC, NULL, NULL);
+	dec->cs = ws->cs_create(sctx->ctx, RING_VCN_DEC, NULL, NULL);
 	if (!dec->cs) {
 		RVID_ERR("Can't get command submission context.\n");
 		goto error;
diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c b/src/gallium/drivers/radeon/radeon_vcn_enc.c
index 388a33388b3..8f9af73c664 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c
@@ -262,7 +262,7 @@ struct pipe_video_codec *radeon_create_encoder(struct pipe_context *context,
 		radeon_enc_get_buffer get_buffer)
 {
 	struct si_screen *sscreen = (struct si_screen *)context->screen;
-	struct r600_common_context *rctx = (struct r600_common_context*)context;
+	struct si_context *sctx = (struct si_context*)context;
 	struct radeon_encoder *enc;
 	struct pipe_video_buffer *tmp_buf, templat = {};
 	struct radeon_surf *tmp_surf;
@@ -286,7 +286,7 @@ struct pipe_video_codec *radeon_create_encoder(struct pipe_context *context,
 	enc->bits_in_shifter = 0;
 	enc->screen = context->screen;
 	enc->ws = ws;
-	enc->cs = ws->cs_create(rctx->ctx, RING_VCN_ENC, radeon_enc_cs_flush, enc);
+	enc->cs = ws->cs_create(sctx->ctx, RING_VCN_ENC, radeon_enc_cs_flush, enc);
 
 	if (!enc->cs) {
 		RVID_ERR("Can't get command submission context.\n");
diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c
index b2238fb113e..a2947df9590 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -121,8 +121,8 @@ void si_vid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffe
 {
 	struct si_context *sctx = (struct si_context*)context;
 
-	sctx->b.dma_clear_buffer(sctx, &buffer->res->b.b, 0,
-				 buffer->res->buf->size, 0);
+	sctx->dma_clear_buffer(sctx, &buffer->res->b.b, 0,
+			       buffer->res->buf->size, 0);
 	context->flush(context, NULL, 0);
 }
 
@@ -134,7 +134,7 @@ void si_vid_join_surfaces(struct si_context *sctx,
 			  struct pb_buffer** buffers[VL_NUM_COMPONENTS],
 			  struct radeon_surf *surfaces[VL_NUM_COMPONENTS])
 {
-	struct radeon_winsys *ws = sctx->b.ws;;
+	struct radeon_winsys *ws = sctx->ws;;
 	unsigned best_tiling, best_wh, off;
 	unsigned size, alignment;
 	struct pb_buffer *pb;
@@ -146,7 +146,7 @@ void si_vid_join_surfaces(struct si_context *sctx,
 		if (!surfaces[i])
 			continue;
 
-		if (sctx->b.chip_class < GFX9) {
+		if (sctx->chip_class < GFX9) {
 			/* choose the smallest bank w/h for now */
 			wh = surfaces[i]->u.legacy.bankw * surfaces[i]->u.legacy.bankh;
 			if (wh < best_wh) {
@@ -163,7 +163,7 @@ void si_vid_join_surfaces(struct si_context *sctx,
 		/* adjust the texture layer offsets */
 		off = align(off, surfaces[i]->surf_alignment);
 
-		if (sctx->b.chip_class < GFX9) {
+		if (sctx->chip_class < GFX9) {
 			/* copy the tiling parameters */
 			surfaces[i]->u.legacy.bankw = surfaces[best_tiling]->u.legacy.bankw;
 			surfaces[i]->u.legacy.bankh = surfaces[best_tiling]->u.legacy.bankh;
diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
index 8aaf489936f..690e7ff5499 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -33,7 +33,7 @@ static void cik_sdma_copy_buffer(struct si_context *ctx,
 				 uint64_t src_offset,
 				 uint64_t size)
 {
-	struct radeon_winsys_cs *cs = ctx->b.dma_cs;
+	struct radeon_winsys_cs *cs = ctx->dma_cs;
 	unsigned i, ncopy, csize;
 	struct r600_resource *rdst = r600_resource(dst);
 	struct r600_resource *rsrc = r600_resource(src);
@@ -55,7 +55,7 @@ static void cik_sdma_copy_buffer(struct si_context *ctx,
 		radeon_emit(cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
 						CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
 						0));
-		radeon_emit(cs, ctx->b.chip_class >= GFX9 ? csize - 1 : csize);
+		radeon_emit(cs, ctx->chip_class >= GFX9 ? csize - 1 : csize);
 		radeon_emit(cs, 0); /* src/dst endian swap */
 		radeon_emit(cs, src_offset);
 		radeon_emit(cs, src_offset >> 32);
@@ -73,13 +73,13 @@ static void cik_sdma_clear_buffer(struct si_context *sctx,
 				  uint64_t size,
 				  unsigned clear_value)
 {
-	struct radeon_winsys_cs *cs = sctx->b.dma_cs;
+	struct radeon_winsys_cs *cs = sctx->dma_cs;
 	unsigned i, ncopy, csize;
 	struct r600_resource *rdst = r600_resource(dst);
 
 	if (!cs || offset % 4 != 0 || size % 4 != 0 ||
 	    dst->flags & PIPE_RESOURCE_FLAG_SPARSE) {
-		sctx->b.b.clear_buffer(&sctx->b.b, dst, offset, size, &clear_value, 4);
+		sctx->b.clear_buffer(&sctx->b, dst, offset, size, &clear_value, 4);
 		return;
 	}
 
@@ -101,7 +101,7 @@ static void cik_sdma_clear_buffer(struct si_context *sctx,
 		radeon_emit(cs, offset);
 		radeon_emit(cs, offset >> 32);
 		radeon_emit(cs, clear_value);
-		radeon_emit(cs, sctx->b.chip_class >= GFX9 ? csize - 1 : csize);
+		radeon_emit(cs, sctx->chip_class >= GFX9 ? csize - 1 : csize);
 		offset += csize;
 		size -= csize;
 	}
@@ -221,16 +221,16 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
 	    copy_height <= (1 << 14) &&
 	    copy_depth <= (1 << 11) &&
 	    /* HW limitation - CIK: */
-	    (sctx->b.chip_class != CIK ||
+	    (sctx->chip_class != CIK ||
 	     (copy_width < (1 << 14) &&
 	      copy_height < (1 << 14) &&
 	      copy_depth < (1 << 11))) &&
 	    /* HW limitation - some CIK parts: */
-	    ((sctx->b.family != CHIP_BONAIRE &&
-	      sctx->b.family != CHIP_KAVERI) ||
+	    ((sctx->family != CHIP_BONAIRE &&
+	      sctx->family != CHIP_KAVERI) ||
 	     (srcx + copy_width != (1 << 14) &&
 	      srcy + copy_height != (1 << 14)))) {
-		struct radeon_winsys_cs *cs = sctx->b.dma_cs;
+		struct radeon_winsys_cs *cs = sctx->dma_cs;
 
 		si_need_dma_space(sctx, 13, &rdst->resource, &rsrc->resource);
 
@@ -247,7 +247,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
 		radeon_emit(cs, dstx | (dsty << 16));
 		radeon_emit(cs, dstz | ((dst_pitch - 1) << 16));
 		radeon_emit(cs, dst_slice_pitch - 1);
-		if (sctx->b.chip_class == CIK) {
+		if (sctx->chip_class == CIK) {
 			radeon_emit(cs, copy_width | (copy_height << 16));
 			radeon_emit(cs, copy_depth);
 		} else {
@@ -298,22 +298,22 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
 			copy_width_aligned = align(copy_width, xalign);
 
 		/* HW limitations. */
-		if ((sctx->b.family == CHIP_BONAIRE ||
-		     sctx->b.family == CHIP_KAVERI) &&
+		if ((sctx->family == CHIP_BONAIRE ||
+		     sctx->family == CHIP_KAVERI) &&
 		    linear_pitch - 1 == 0x3fff &&
 		    bpp == 16)
 			return false;
 
-		if (sctx->b.chip_class == CIK &&
+		if (sctx->chip_class == CIK &&
 		    (copy_width_aligned == (1 << 14) ||
 		     copy_height == (1 << 14) ||
 		     copy_depth == (1 << 11)))
 			return false;
 
-		if ((sctx->b.family == CHIP_BONAIRE ||
-		     sctx->b.family == CHIP_KAVERI ||
-		     sctx->b.family == CHIP_KABINI ||
-		     sctx->b.family == CHIP_MULLINS) &&
+		if ((sctx->family == CHIP_BONAIRE ||
+		     sctx->family == CHIP_KAVERI ||
+		     sctx->family == CHIP_KABINI ||
+		     sctx->family == CHIP_MULLINS) &&
 		    (tiled_x + copy_width == (1 << 14) ||
 		     tiled_y + copy_height == (1 << 14)))
 			return false;
@@ -392,7 +392,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
 		    copy_width_aligned <= (1 << 14) &&
 		    copy_height <= (1 << 14) &&
 		    copy_depth <= (1 << 11)) {
-			struct radeon_winsys_cs *cs = sctx->b.dma_cs;
+			struct radeon_winsys_cs *cs = sctx->dma_cs;
 			uint32_t direction = linear == rdst ? 1u << 31 : 0;
 
 			si_need_dma_space(sctx, 14, &rdst->resource, &rsrc->resource);
@@ -411,7 +411,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
 			radeon_emit(cs, linear_x | (linear_y << 16));
 			radeon_emit(cs, linear_z | ((linear_pitch - 1) << 16));
 			radeon_emit(cs, linear_slice_pitch - 1);
-			if (sctx->b.chip_class == CIK) {
+			if (sctx->chip_class == CIK) {
 				radeon_emit(cs, copy_width_aligned | (copy_height << 16));
 				radeon_emit(cs, copy_depth);
 			} else {
@@ -436,7 +436,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
 	    srcy % 8 == 0 &&
 	    /* this can either be equal, or display->rotated (VI+ only) */
 	    (src_micro_mode == dst_micro_mode ||
-	     (sctx->b.chip_class >= VI &&
+	     (sctx->chip_class >= VI &&
 	      src_micro_mode == V_009910_ADDR_SURF_DISPLAY_MICRO_TILING &&
 	      dst_micro_mode == V_009910_ADDR_SURF_ROTATED_MICRO_TILING))) {
 		assert(src_pitch % 8 == 0);
@@ -475,19 +475,19 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
 		    copy_width_aligned % 8 == 0 &&
 		    copy_height_aligned % 8 == 0 &&
 		    /* HW limitation - CIK: */
-		    (sctx->b.chip_class != CIK ||
+		    (sctx->chip_class != CIK ||
 		     (copy_width_aligned < (1 << 14) &&
 		      copy_height_aligned < (1 << 14) &&
 		      copy_depth < (1 << 11))) &&
 		    /* HW limitation - some CIK parts: */
-		    ((sctx->b.family != CHIP_BONAIRE &&
-		      sctx->b.family != CHIP_KAVERI &&
-		      sctx->b.family != CHIP_KABINI &&
-		      sctx->b.family != CHIP_MULLINS) ||
+		    ((sctx->family != CHIP_BONAIRE &&
+		      sctx->family != CHIP_KAVERI &&
+		      sctx->family != CHIP_KABINI &&
+		      sctx->family != CHIP_MULLINS) ||
 		     (srcx + copy_width_aligned != (1 << 14) &&
 		      srcy + copy_height_aligned != (1 << 14) &&
 		      dstx + copy_width != (1 << 14)))) {
-			struct radeon_winsys_cs *cs = sctx->b.dma_cs;
+			struct radeon_winsys_cs *cs = sctx->dma_cs;
 
 			si_need_dma_space(sctx, 15, &rdst->resource, &rsrc->resource);
 
@@ -505,7 +505,7 @@ static bool cik_sdma_copy_texture(struct si_context *sctx,
 			radeon_emit(cs, dstz | (dst_pitch_tile_max << 16));
 			radeon_emit(cs, dst_slice_tile_max);
 			radeon_emit(cs, encode_tile_info(sctx, rdst, dst_level, false));
-			if (sctx->b.chip_class == CIK) {
+			if (sctx->chip_class == CIK) {
 				radeon_emit(cs, copy_width_aligned |
 						(copy_height_aligned << 16));
 				radeon_emit(cs, copy_depth);
@@ -531,7 +531,7 @@ static void cik_sdma_copy(struct pipe_context *ctx,
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
-	if (!sctx->b.dma_cs ||
+	if (!sctx->dma_cs ||
 	    src->flags & PIPE_RESOURCE_FLAG_SPARSE ||
 	    dst->flags & PIPE_RESOURCE_FLAG_SPARSE)
 		goto fallback;
@@ -541,7 +541,7 @@ static void cik_sdma_copy(struct pipe_context *ctx,
 		return;
 	}
 
-	if ((sctx->b.chip_class == CIK || sctx->b.chip_class == VI) &&
+	if ((sctx->chip_class == CIK || sctx->chip_class == VI) &&
 	    cik_sdma_copy_texture(sctx, dst, dst_level, dstx, dsty, dstz,
 				  src, src_level, src_box))
 		return;
@@ -553,6 +553,6 @@ fallback:
 
 void cik_init_sdma_functions(struct si_context *sctx)
 {
-	sctx->b.dma_copy = cik_sdma_copy;
-	sctx->b.dma_clear_buffer = cik_sdma_clear_buffer;
+	sctx->dma_copy = cik_sdma_copy;
+	sctx->dma_clear_buffer = cik_sdma_clear_buffer;
 }
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index a88a5e22ca4..40f512a682d 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -74,12 +74,12 @@ void si_blitter_begin(struct si_context *sctx, enum si_blitter_op op)
 	}
 
 	if (op & SI_DISABLE_RENDER_COND)
-		sctx->b.render_cond_force_off = true;
+		sctx->render_cond_force_off = true;
 }
 
 void si_blitter_end(struct si_context *sctx)
 {
-	sctx->b.render_cond_force_off = false;
+	sctx->render_cond_force_off = false;
 
 	/* Restore shader pointers because the VS blit shader changed all
 	 * non-global VS user SGPRs. */
@@ -132,10 +132,10 @@ si_blit_dbcb_copy(struct si_context *sctx,
 			surf_tmpl.u.tex.first_layer = layer;
 			surf_tmpl.u.tex.last_layer = layer;
 
-			zsurf = sctx->b.b.create_surface(&sctx->b.b, &src->resource.b.b, &surf_tmpl);
+			zsurf = sctx->b.create_surface(&sctx->b, &src->resource.b.b, &surf_tmpl);
 
 			surf_tmpl.format = dst->resource.b.b.format;
-			cbsurf = sctx->b.b.create_surface(&sctx->b.b, &dst->resource.b.b, &surf_tmpl);
+			cbsurf = sctx->b.create_surface(&sctx->b, &dst->resource.b.b, &surf_tmpl);
 
 			for (sample = first_sample; sample <= last_sample; sample++) {
 				if (sample != sctx->dbcb_copy_sample) {
@@ -230,7 +230,7 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
 			surf_tmpl.u.tex.first_layer = layer;
 			surf_tmpl.u.tex.last_layer = layer;
 
-			zsurf = sctx->b.b.create_surface(&sctx->b.b, &texture->resource.b.b, &surf_tmpl);
+			zsurf = sctx->b.create_surface(&sctx->b, &texture->resource.b.b, &surf_tmpl);
 
 			si_blitter_begin(sctx, SI_DECOMPRESS);
 			util_blitter_custom_depth_stencil(sctx->blitter, zsurf, NULL, ~0,
@@ -330,8 +330,8 @@ si_decompress_depth(struct si_context *sctx,
 		}
 	}
 
-	if (unlikely(sctx->b.log))
-		u_log_printf(sctx->b.log,
+	if (unlikely(sctx->log))
+		u_log_printf(sctx->log,
 			     "\n------------------------------------------------\n"
 			     "Decompress Depth (levels %u - %u, levels Z: 0x%x S: 0x%x)\n\n",
 			     first_level, last_level, levels_z, levels_s);
@@ -341,7 +341,7 @@ si_decompress_depth(struct si_context *sctx,
 	 */
 	if (copy_planes &&
 	    (tex->flushed_depth_texture ||
-	     si_init_flushed_depth_texture(&sctx->b.b, &tex->resource.b.b, NULL))) {
+	     si_init_flushed_depth_texture(&sctx->b, &tex->resource.b.b, NULL))) {
 		struct r600_texture *dst = tex->flushed_depth_texture;
 		unsigned fully_copied_levels;
 		unsigned levels = 0;
@@ -454,8 +454,8 @@ static void si_blit_decompress_color(struct si_context *sctx,
 	if (!level_mask)
 		return;
 
-	if (unlikely(sctx->b.log))
-		u_log_printf(sctx->b.log,
+	if (unlikely(sctx->log))
+		u_log_printf(sctx->log,
 			     "\n------------------------------------------------\n"
 			     "Decompress Color (levels %u - %u, mask 0x%x)\n\n",
 			     first_level, last_level, level_mask);
@@ -493,12 +493,12 @@ static void si_blit_decompress_color(struct si_context *sctx,
 			surf_tmpl.u.tex.level = level;
 			surf_tmpl.u.tex.first_layer = layer;
 			surf_tmpl.u.tex.last_layer = layer;
-			cbsurf = sctx->b.b.create_surface(&sctx->b.b, &rtex->resource.b.b, &surf_tmpl);
+			cbsurf = sctx->b.create_surface(&sctx->b, &rtex->resource.b.b, &surf_tmpl);
 
 			/* Required before and after FMASK and DCC_DECOMPRESS. */
 			if (custom_blend == sctx->custom_blend_fmask_decompress ||
 			    custom_blend == sctx->custom_blend_dcc_decompress)
-				sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
+				sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
 
 			si_blitter_begin(sctx, SI_DECOMPRESS);
 			util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend);
@@ -506,7 +506,7 @@ static void si_blit_decompress_color(struct si_context *sctx,
 
 			if (custom_blend == sctx->custom_blend_fmask_decompress ||
 			    custom_blend == sctx->custom_blend_dcc_decompress)
-				sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
+				sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
 
 			pipe_surface_reference(&cbsurf, NULL);
 		}
@@ -775,8 +775,8 @@ void si_decompress_textures(struct si_context *sctx, unsigned shader_mask)
 
 	/* Update the compressed_colortex_mask if necessary. */
 	compressed_colortex_counter = p_atomic_read(&sctx->screen->compressed_colortex_counter);
-	if (compressed_colortex_counter != sctx->b.last_compressed_colortex_counter) {
-		sctx->b.last_compressed_colortex_counter = compressed_colortex_counter;
+	if (compressed_colortex_counter != sctx->last_compressed_colortex_counter) {
+		sctx->last_compressed_colortex_counter = compressed_colortex_counter;
 		si_update_needs_color_decompress_masks(sctx);
 	}
 
@@ -1067,7 +1067,7 @@ static void si_do_CB_resolve(struct si_context *sctx,
 			     enum pipe_format format)
 {
 	/* Required before and after CB_RESOLVE. */
-	sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
+	sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
 
 	si_blitter_begin(sctx, SI_COLOR_RESOLVE |
 			 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
@@ -1147,7 +1147,7 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
 		 */
 		if (vi_dcc_enabled(dst, info->dst.level)) {
 			/* TODO: Implement per-level DCC clears for GFX9. */
-			if (sctx->b.chip_class >= GFX9 &&
+			if (sctx->chip_class >= GFX9 &&
 			    info->dst.resource->last_level != 0)
 				goto resolve_to_temp;
 
@@ -1225,9 +1225,9 @@ static void si_blit(struct pipe_context *ctx,
 	 * on failure (recursion).
 	 */
 	if (rdst->surface.is_linear &&
-	    sctx->b.dma_copy &&
+	    sctx->dma_copy &&
 	    util_can_blit_via_copy_region(info, false)) {
-		sctx->b.dma_copy(ctx, info->dst.resource, info->dst.level,
+		sctx->dma_copy(ctx, info->dst.resource, info->dst.level,
 				 info->dst.box.x, info->dst.box.y,
 				 info->dst.box.z,
 				 info->src.resource, info->src.level,
@@ -1335,8 +1335,8 @@ void si_decompress_dcc(struct si_context *sctx, struct r600_texture *rtex)
 
 void si_init_blit_functions(struct si_context *sctx)
 {
-	sctx->b.b.resource_copy_region = si_resource_copy_region;
-	sctx->b.b.blit = si_blit;
-	sctx->b.b.flush_resource = si_flush_resource;
-	sctx->b.b.generate_mipmap = si_generate_mipmap;
+	sctx->b.resource_copy_region = si_resource_copy_region;
+	sctx->b.blit = si_blit;
+	sctx->b.flush_resource = si_flush_resource;
+	sctx->b.generate_mipmap = si_generate_mipmap;
 }
diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c
index 0af97f4c427..1420702d8d4 100644
--- a/src/gallium/drivers/radeonsi/si_buffer.c
+++ b/src/gallium/drivers/radeonsi/si_buffer.c
@@ -32,11 +32,11 @@ bool si_rings_is_buffer_referenced(struct si_context *sctx,
 				   struct pb_buffer *buf,
 				   enum radeon_bo_usage usage)
 {
-	if (sctx->b.ws->cs_is_buffer_referenced(sctx->b.gfx_cs, buf, usage)) {
+	if (sctx->ws->cs_is_buffer_referenced(sctx->gfx_cs, buf, usage)) {
 		return true;
 	}
-	if (radeon_emitted(sctx->b.dma_cs, 0) &&
-	    sctx->b.ws->cs_is_buffer_referenced(sctx->b.dma_cs, buf, usage)) {
+	if (radeon_emitted(sctx->dma_cs, 0) &&
+	    sctx->ws->cs_is_buffer_referenced(sctx->dma_cs, buf, usage)) {
 		return true;
 	}
 	return false;
@@ -52,7 +52,7 @@ void *si_buffer_map_sync_with_rings(struct si_context *sctx,
 	assert(!(resource->flags & RADEON_FLAG_SPARSE));
 
 	if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
-		return sctx->b.ws->buffer_map(resource->buf, NULL, usage);
+		return sctx->ws->buffer_map(resource->buf, NULL, usage);
 	}
 
 	if (!(usage & PIPE_TRANSFER_WRITE)) {
@@ -60,8 +60,8 @@ void *si_buffer_map_sync_with_rings(struct si_context *sctx,
 		rusage = RADEON_USAGE_WRITE;
 	}
 
-	if (radeon_emitted(sctx->b.gfx_cs, sctx->b.initial_gfx_cs_size) &&
-	    sctx->b.ws->cs_is_buffer_referenced(sctx->b.gfx_cs,
+	if (radeon_emitted(sctx->gfx_cs, sctx->initial_gfx_cs_size) &&
+	    sctx->ws->cs_is_buffer_referenced(sctx->gfx_cs,
 						resource->buf, rusage)) {
 		if (usage & PIPE_TRANSFER_DONTBLOCK) {
 			si_flush_gfx_cs(sctx, PIPE_FLUSH_ASYNC, NULL);
@@ -71,8 +71,8 @@ void *si_buffer_map_sync_with_rings(struct si_context *sctx,
 			busy = true;
 		}
 	}
-	if (radeon_emitted(sctx->b.dma_cs, 0) &&
-	    sctx->b.ws->cs_is_buffer_referenced(sctx->b.dma_cs,
+	if (radeon_emitted(sctx->dma_cs, 0) &&
+	    sctx->ws->cs_is_buffer_referenced(sctx->dma_cs,
 						resource->buf, rusage)) {
 		if (usage & PIPE_TRANSFER_DONTBLOCK) {
 			si_flush_dma_cs(sctx, PIPE_FLUSH_ASYNC, NULL);
@@ -83,20 +83,20 @@ void *si_buffer_map_sync_with_rings(struct si_context *sctx,
 		}
 	}
 
-	if (busy || !sctx->b.ws->buffer_wait(resource->buf, 0, rusage)) {
+	if (busy || !sctx->ws->buffer_wait(resource->buf, 0, rusage)) {
 		if (usage & PIPE_TRANSFER_DONTBLOCK) {
 			return NULL;
 		} else {
 			/* We will be wait for the GPU. Wait for any offloaded
 			 * CS flush to complete to avoid busy-waiting in the winsys. */
-			sctx->b.ws->cs_sync_flush(sctx->b.gfx_cs);
-			if (sctx->b.dma_cs)
-				sctx->b.ws->cs_sync_flush(sctx->b.dma_cs);
+			sctx->ws->cs_sync_flush(sctx->gfx_cs);
+			if (sctx->dma_cs)
+				sctx->ws->cs_sync_flush(sctx->dma_cs);
 		}
 	}
 
 	/* Setting the CS to NULL will prevent doing checks we have done already. */
-	return sctx->b.ws->buffer_map(resource->buf, NULL, usage);
+	return sctx->ws->buffer_map(resource->buf, NULL, usage);
 }
 
 void si_init_resource_fields(struct si_screen *sscreen,
@@ -280,7 +280,7 @@ si_invalidate_buffer(struct si_context *sctx,
 
 	/* Check if mapping this buffer would cause waiting for the GPU. */
 	if (si_rings_is_buffer_referenced(sctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
-	    !sctx->b.ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
+	    !sctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
 		uint64_t old_va = rbuffer->gpu_address;
 
 		/* Reallocate the buffer in the same pipe_resource. */
@@ -342,9 +342,9 @@ static void *si_buffer_get_transfer(struct pipe_context *ctx,
 	struct r600_transfer *transfer;
 
 	if (usage & TC_TRANSFER_MAP_THREADED_UNSYNC)
-		transfer = slab_alloc(&sctx->b.pool_transfers_unsync);
+		transfer = slab_alloc(&sctx->pool_transfers_unsync);
 	else
-		transfer = slab_alloc(&sctx->b.pool_transfers);
+		transfer = slab_alloc(&sctx->pool_transfers);
 
 	transfer->b.b.resource = NULL;
 	pipe_resource_reference(&transfer->b.b.resource, resource);
@@ -445,7 +445,7 @@ static void *si_buffer_transfer_map(struct pipe_context *ctx,
 		if (rbuffer->flags & RADEON_FLAG_SPARSE ||
 		    force_discard_range ||
 		    si_rings_is_buffer_referenced(sctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
-		    !sctx->b.ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
+		    !sctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
 			/* Do a wait-free write-only transfer using a temporary buffer. */
 			unsigned offset;
 			struct r600_resource *staging = NULL;
@@ -482,7 +482,7 @@ static void *si_buffer_transfer_map(struct pipe_context *ctx,
 				box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT));
 		if (staging) {
 			/* Copy the VRAM buffer to the staging buffer. */
-			sctx->b.dma_copy(ctx, &staging->b.b, 0,
+			sctx->dma_copy(ctx, &staging->b.b, 0,
 				       box->x % SI_MAP_BUFFER_ALIGNMENT,
 				       0, 0, resource, 0, box);
 
@@ -568,7 +568,7 @@ static void si_buffer_transfer_unmap(struct pipe_context *ctx,
 
 	/* Don't use pool_transfers_unsync. We are always in the driver
 	 * thread. */
-	slab_free(&sctx->b.pool_transfers, transfer);
+	slab_free(&sctx->pool_transfers, transfer);
 }
 
 static void si_buffer_subdata(struct pipe_context *ctx,
@@ -722,23 +722,23 @@ static bool si_resource_commit(struct pipe_context *pctx,
 	 * (b) wait for threaded submit to finish, including those that were
 	 *     triggered by some other, earlier operation.
 	 */
-	if (radeon_emitted(ctx->b.gfx_cs, ctx->b.initial_gfx_cs_size) &&
-	    ctx->b.ws->cs_is_buffer_referenced(ctx->b.gfx_cs,
+	if (radeon_emitted(ctx->gfx_cs, ctx->initial_gfx_cs_size) &&
+	    ctx->ws->cs_is_buffer_referenced(ctx->gfx_cs,
 					       res->buf, RADEON_USAGE_READWRITE)) {
 		si_flush_gfx_cs(ctx, PIPE_FLUSH_ASYNC, NULL);
 	}
-	if (radeon_emitted(ctx->b.dma_cs, 0) &&
-	    ctx->b.ws->cs_is_buffer_referenced(ctx->b.dma_cs,
+	if (radeon_emitted(ctx->dma_cs, 0) &&
+	    ctx->ws->cs_is_buffer_referenced(ctx->dma_cs,
 					       res->buf, RADEON_USAGE_READWRITE)) {
 		si_flush_dma_cs(ctx, PIPE_FLUSH_ASYNC, NULL);
 	}
 
-	ctx->b.ws->cs_sync_flush(ctx->b.dma_cs);
-	ctx->b.ws->cs_sync_flush(ctx->b.gfx_cs);
+	ctx->ws->cs_sync_flush(ctx->dma_cs);
+	ctx->ws->cs_sync_flush(ctx->gfx_cs);
 
 	assert(resource->target == PIPE_BUFFER);
 
-	return ctx->b.ws->buffer_commit(res->buf, box->x, box->width, commit);
+	return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit);
 }
 
 void si_init_screen_buffer_functions(struct si_screen *sscreen)
@@ -750,11 +750,11 @@ void si_init_screen_buffer_functions(struct si_screen *sscreen)
 
 void si_init_buffer_functions(struct si_context *sctx)
 {
-	sctx->b.b.invalidate_resource = si_invalidate_resource;
-	sctx->b.b.transfer_map = u_transfer_map_vtbl;
-	sctx->b.b.transfer_flush_region = u_transfer_flush_region_vtbl;
-	sctx->b.b.transfer_unmap = u_transfer_unmap_vtbl;
-	sctx->b.b.texture_subdata = u_default_texture_subdata;
-	sctx->b.b.buffer_subdata = si_buffer_subdata;
-	sctx->b.b.resource_commit = si_resource_commit;
+	sctx->b.invalidate_resource = si_invalidate_resource;
+	sctx->b.transfer_map = u_transfer_map_vtbl;
+	sctx->b.transfer_flush_region = u_transfer_flush_region_vtbl;
+	sctx->b.transfer_unmap = u_transfer_unmap_vtbl;
+	sctx->b.texture_subdata = u_default_texture_subdata;
+	sctx->b.buffer_subdata = si_buffer_subdata;
+	sctx->b.resource_commit = si_resource_commit;
 }
diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c
index d2529971861..5be78ca3e6f 100644
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -208,7 +208,7 @@ void vi_dcc_clear_level(struct si_context *sctx,
 		dcc_offset = rtex->dcc_offset;
 	}
 
-	if (sctx->b.chip_class >= GFX9) {
+	if (sctx->chip_class >= GFX9) {
 		/* Mipmap level clears aren't implemented. */
 		assert(rtex->resource.b.b.last_level == 0);
 		/* MSAA needs a different clear size. */
@@ -354,7 +354,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
 	return;
 #endif
 
-	if (sctx->b.render_cond)
+	if (sctx->render_cond)
 		return;
 
 	for (i = 0; i < fb->nr_cbufs; i++) {
@@ -396,7 +396,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
 			continue;
 
 		/* fast color clear with 1D tiling doesn't work on old kernels and CIK */
-		if (sctx->b.chip_class == CIK &&
+		if (sctx->chip_class == CIK &&
 		    tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
 		    sctx->screen->info.drm_major == 2 &&
 		    sctx->screen->info.drm_minor < 38) {
@@ -406,7 +406,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
 		/* Fast clear is the most appropriate place to enable DCC for
 		 * displayable surfaces.
 		 */
-		if (sctx->b.chip_class >= VI &&
+		if (sctx->chip_class >= VI &&
 		    !(sctx->screen->debug_flags & DBG(NO_DCC_FB))) {
 			vi_separate_dcc_try_enable(sctx, tex);
 
@@ -416,7 +416,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
 			 * enable separate DCC.
 			 */
 			if (tex->dcc_gather_statistics &&
-			    sctx->b.family == CHIP_STONEY)
+			    sctx->family == CHIP_STONEY)
 				tex->num_slow_clears++;
 		}
 
@@ -441,7 +441,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
 				continue;
 
 			/* This can only occur with MSAA. */
-			if (sctx->b.chip_class == VI &&
+			if (sctx->chip_class == VI &&
 			    !tex->surface.u.legacy.level[level].dcc_fast_clear_size)
 				continue;
 
@@ -481,7 +481,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
 			}
 
 			/* RB+ doesn't work with CMASK fast clear on Stoney. */
-			if (sctx->b.family == CHIP_STONEY)
+			if (sctx->family == CHIP_STONEY)
 				continue;
 
 			/* ensure CMASK is enabled */
@@ -601,7 +601,7 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
 		 * This hack decreases back-to-back ClearDepth performance.
 		 */
 		if (sctx->screen->clear_db_cache_before_clear) {
-			sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
+			sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
 		}
 	}
 
@@ -727,8 +727,8 @@ static void si_clear_texture(struct pipe_context *pipe,
 
 void si_init_clear_functions(struct si_context *sctx)
 {
-	sctx->b.b.clear = si_clear;
-	sctx->b.b.clear_render_target = si_clear_render_target;
-	sctx->b.b.clear_depth_stencil = si_clear_depth_stencil;
-	sctx->b.b.clear_texture = si_clear_texture;
+	sctx->b.clear = si_clear;
+	sctx->b.clear_render_target = si_clear_render_target;
+	sctx->b.clear_depth_stencil = si_clear_depth_stencil;
+	sctx->b.clear_texture = si_clear_texture;
 }
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index c2e2d613773..dfede47605f 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -292,7 +292,7 @@ static void si_set_global_binding(
 
 static void si_initialize_compute(struct si_context *sctx)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	uint64_t bc_va;
 
 	radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
@@ -300,7 +300,7 @@ static void si_initialize_compute(struct si_context *sctx)
 	radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
 	radeon_emit(cs, S_00B85C_SH0_CU_EN(0xffff) | S_00B85C_SH1_CU_EN(0xffff));
 
-	if (sctx->b.chip_class >= CIK) {
+	if (sctx->chip_class >= CIK) {
 		/* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */
 		radeon_set_sh_reg_seq(cs,
 		                     R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2);
@@ -315,7 +315,7 @@ static void si_initialize_compute(struct si_context *sctx)
 	 * kernel if we want to use something other than the default value,
 	 * which is now 0x22f.
 	 */
-	if (sctx->b.chip_class <= SI) {
+	if (sctx->chip_class <= SI) {
 		/* XXX: This should be:
 		 * (number of compute units) * 4 * (waves per simd) - 1 */
 
@@ -326,7 +326,7 @@ static void si_initialize_compute(struct si_context *sctx)
 	/* Set the pointer to border colors. */
 	bc_va = sctx->border_color_buffer->gpu_address;
 
-	if (sctx->b.chip_class >= CIK) {
+	if (sctx->chip_class >= CIK) {
 		radeon_set_uconfig_reg_seq(cs, R_030E00_TA_CS_BC_BASE_ADDR, 2);
 		radeon_emit(cs, bc_va >> 8);  /* R_030E00_TA_CS_BC_BASE_ADDR */
 		radeon_emit(cs, S_030E04_ADDRESS(bc_va >> 40)); /* R_030E04_TA_CS_BC_BASE_ADDR_HI */
@@ -387,7 +387,7 @@ static bool si_switch_compute_shader(struct si_context *sctx,
 				     const amd_kernel_code_t *code_object,
 				     unsigned offset)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	struct si_shader_config inline_config = {0};
 	struct si_shader_config *config;
 	uint64_t shader_va;
@@ -414,7 +414,7 @@ static bool si_switch_compute_shader(struct si_context *sctx,
 		* allocated in the shader and 4 bytes allocated by the state
 		* tracker, then we will set LDS_SIZE to 512 bytes rather than 256.
 		*/
-		if (sctx->b.chip_class <= SI) {
+		if (sctx->chip_class <= SI) {
 			lds_blocks += align(program->local_size, 256) >> 8;
 		} else {
 			lds_blocks += align(program->local_size, 512) >> 9;
@@ -437,7 +437,7 @@ static bool si_switch_compute_shader(struct si_context *sctx,
 			    config->scratch_bytes_per_wave *
 			    sctx->scratch_waves);
 
-		radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 			      shader->scratch_bo, RADEON_USAGE_READWRITE,
 			      RADEON_PRIO_SCRATCH_BUFFER);
 	}
@@ -449,7 +449,7 @@ static bool si_switch_compute_shader(struct si_context *sctx,
 	 * command. However, that would add more complexity and we're likely
 	 * to get a shader state change in that case anyway.
 	 */
-	if (sctx->b.chip_class >= CIK) {
+	if (sctx->chip_class >= CIK) {
 		cik_prefetch_TC_L2_async(sctx, &program->shader.bo->b.b,
 					 0, program->shader.bo->b.b.width0);
 	}
@@ -461,7 +461,7 @@ static bool si_switch_compute_shader(struct si_context *sctx,
 		shader_va += sizeof(amd_kernel_code_t);
 	}
 
-	radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs, shader->bo,
+	radeon_add_to_buffer_list(sctx, sctx->gfx_cs, shader->bo,
 	                          RADEON_USAGE_READ, RADEON_PRIO_SHADER_BINARY);
 
 	radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
@@ -491,7 +491,7 @@ static void setup_scratch_rsrc_user_sgprs(struct si_context *sctx,
 					  const amd_kernel_code_t *code_object,
 					  unsigned user_sgpr)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	uint64_t scratch_va = sctx->compute_scratch_buffer->gpu_address;
 
 	unsigned max_private_element_size = AMD_HSA_BITS_GET(
@@ -509,12 +509,12 @@ static void setup_scratch_rsrc_user_sgprs(struct si_context *sctx,
 		S_008F0C_INDEX_STRIDE(3) |
 		S_008F0C_ADD_TID_ENABLE(1);
 
-	if (sctx->b.chip_class >= GFX9) {
+	if (sctx->chip_class >= GFX9) {
 		assert(max_private_element_size == 1); /* always 4 bytes on GFX9 */
 	} else {
 		scratch_dword3 |= S_008F0C_ELEMENT_SIZE(max_private_element_size);
 
-		if (sctx->b.chip_class < VI) {
+		if (sctx->chip_class < VI) {
 			/* BUF_DATA_FORMAT is ignored, but it cannot be
 			 * BUF_DATA_FORMAT_INVALID. */
 			scratch_dword3 |=
@@ -536,7 +536,7 @@ static void si_setup_user_sgprs_co_v2(struct si_context *sctx,
 				      uint64_t kernel_args_va)
 {
 	struct si_compute *program = sctx->cs_shader_state.program;
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 
 	static const enum amd_code_property_mask_t workgroup_count_masks [] = {
 		AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X,
@@ -577,7 +577,7 @@ static void si_setup_user_sgprs_co_v2(struct si_context *sctx,
 
 		dispatch.kernarg_address = kernel_args_va;
 
-		u_upload_data(sctx->b.b.const_uploader, 0, sizeof(dispatch),
+		u_upload_data(sctx->b.const_uploader, 0, sizeof(dispatch),
                               256, &dispatch, &dispatch_offset,
                               (struct pipe_resource**)&dispatch_buf);
 
@@ -585,7 +585,7 @@ static void si_setup_user_sgprs_co_v2(struct si_context *sctx,
 			fprintf(stderr, "Error: Failed to allocate dispatch "
 					"packet.");
 		}
-		radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs, dispatch_buf,
+		radeon_add_to_buffer_list(sctx, sctx->gfx_cs, dispatch_buf,
 				  RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER);
 
 		dispatch_va = dispatch_buf->gpu_address + dispatch_offset;
@@ -625,7 +625,7 @@ static bool si_upload_compute_input(struct si_context *sctx,
 				    const amd_kernel_code_t *code_object,
 				    const struct pipe_grid_info *info)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	struct si_compute *program = sctx->cs_shader_state.program;
 	struct r600_resource *input_buffer = NULL;
 	unsigned kernel_args_size;
@@ -639,7 +639,7 @@ static bool si_upload_compute_input(struct si_context *sctx,
 	/* The extra num_work_size_bytes are for work group / work item size information */
 	kernel_args_size = program->input_size + num_work_size_bytes;
 
-	u_upload_alloc(sctx->b.b.const_uploader, 0, kernel_args_size,
+	u_upload_alloc(sctx->b.const_uploader, 0, kernel_args_size,
 		       sctx->screen->info.tcc_cache_line_size,
 		       &kernel_args_offset,
 		       (struct pipe_resource**)&input_buffer, &kernel_args_ptr);
@@ -668,7 +668,7 @@ static bool si_upload_compute_input(struct si_context *sctx,
 	}
 
 
-	radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs, input_buffer,
+	radeon_add_to_buffer_list(sctx, sctx->gfx_cs, input_buffer,
 				  RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER);
 
 	if (code_object) {
@@ -689,7 +689,7 @@ static void si_setup_tgsi_grid(struct si_context *sctx,
                                 const struct pipe_grid_info *info)
 {
 	struct si_compute *program = sctx->cs_shader_state.program;
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	unsigned grid_size_reg = R_00B900_COMPUTE_USER_DATA_0 +
 				 4 * SI_NUM_RESOURCE_SGPRS;
 	unsigned block_size_reg = grid_size_reg +
@@ -702,7 +702,7 @@ static void si_setup_tgsi_grid(struct si_context *sctx,
 			uint64_t va = base_va + info->indirect_offset;
 			int i;
 
-			radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+			radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 					 (struct r600_resource *)info->indirect,
 					 RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
 
@@ -736,14 +736,14 @@ static void si_emit_dispatch_packets(struct si_context *sctx,
                                      const struct pipe_grid_info *info)
 {
 	struct si_screen *sscreen = sctx->screen;
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
-	bool render_cond_bit = sctx->b.render_cond && !sctx->b.render_cond_force_off;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
+	bool render_cond_bit = sctx->render_cond && !sctx->render_cond_force_off;
 	unsigned waves_per_threadgroup =
 		DIV_ROUND_UP(info->block[0] * info->block[1] * info->block[2], 64);
 	unsigned compute_resource_limits =
 		S_00B854_SIMD_DEST_CNTL(waves_per_threadgroup % 4 == 0);
 
-	if (sctx->b.chip_class >= CIK) {
+	if (sctx->chip_class >= CIK) {
 		unsigned num_cu_per_se = sscreen->info.num_good_compute_units /
 					 sscreen->info.max_se;
 
@@ -768,12 +768,12 @@ static void si_emit_dispatch_packets(struct si_context *sctx,
 		S_00B800_FORCE_START_AT_000(1) |
 		/* If the KMD allows it (there is a KMD hw register for it),
 		 * allow launching waves out-of-order. (same as Vulkan) */
-		S_00B800_ORDER_MODE(sctx->b.chip_class >= CIK);
+		S_00B800_ORDER_MODE(sctx->chip_class >= CIK);
 
 	if (info->indirect) {
 		uint64_t base_va = r600_resource(info->indirect)->gpu_address;
 
-		radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 		                 (struct r600_resource *)info->indirect,
 		                 RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
 
@@ -813,22 +813,22 @@ static void si_launch_grid(
 	 * Only SI and certain CIK chips are affected.
 	 */
 	bool cs_regalloc_hang =
-		(sctx->b.chip_class == SI ||
-		 sctx->b.family == CHIP_BONAIRE ||
-		 sctx->b.family == CHIP_KABINI) &&
+		(sctx->chip_class == SI ||
+		 sctx->family == CHIP_BONAIRE ||
+		 sctx->family == CHIP_KABINI) &&
 		info->block[0] * info->block[1] * info->block[2] > 256;
 
 	if (cs_regalloc_hang)
-		sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+		sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
 				 SI_CONTEXT_CS_PARTIAL_FLUSH;
 
 	if (program->ir_type != PIPE_SHADER_IR_NATIVE &&
 	    program->shader.compilation_failed)
 		return;
 
-	if (sctx->b.last_num_draw_calls != sctx->b.num_draw_calls) {
+	if (sctx->last_num_draw_calls != sctx->num_draw_calls) {
 		si_update_fb_dirtiness_after_rendering(sctx);
-		sctx->b.last_num_draw_calls = sctx->b.num_draw_calls;
+		sctx->last_num_draw_calls = sctx->num_draw_calls;
 	}
 
 	si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
@@ -841,9 +841,9 @@ static void si_launch_grid(
 		si_context_add_resource_size(sctx, info->indirect);
 
 		/* Indirect buffers use TC L2 on GFX9, but not older hw. */
-		if (sctx->b.chip_class <= VI &&
+		if (sctx->chip_class <= VI &&
 		    r600_resource(info->indirect)->TC_L2_dirty) {
-			sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+			sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
 			r600_resource(info->indirect)->TC_L2_dirty = false;
 		}
 	}
@@ -853,7 +853,7 @@ static void si_launch_grid(
 	if (!sctx->cs_shader_state.initialized)
 		si_initialize_compute(sctx);
 
-	if (sctx->b.flags)
+	if (sctx->flags)
 		si_emit_cache_flush(sctx);
 
 	if (!si_switch_compute_shader(sctx, program, &program->shader,
@@ -882,7 +882,7 @@ static void si_launch_grid(
 		if (!buffer) {
 			continue;
 		}
-		radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs, buffer,
+		radeon_add_to_buffer_list(sctx, sctx->gfx_cs, buffer,
 					  RADEON_USAGE_READWRITE,
 					  RADEON_PRIO_COMPUTE_GLOBAL);
 	}
@@ -894,16 +894,16 @@ static void si_launch_grid(
 
 	if (unlikely(sctx->current_saved_cs)) {
 		si_trace_emit(sctx);
-		si_log_compute_state(sctx, sctx->b.log);
+		si_log_compute_state(sctx, sctx->log);
 	}
 
 	sctx->compute_is_busy = true;
-	sctx->b.num_compute_calls++;
+	sctx->num_compute_calls++;
 	if (sctx->cs_shader_state.uses_scratch)
-		sctx->b.num_spill_compute_calls++;
+		sctx->num_spill_compute_calls++;
 
 	if (cs_regalloc_hang)
-		sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
+		sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
 }
 
 void si_destroy_compute(struct si_compute *program)
@@ -940,11 +940,11 @@ static void si_set_compute_resources(struct pipe_context * ctx_,
 
 void si_init_compute_functions(struct si_context *sctx)
 {
-	sctx->b.b.create_compute_state = si_create_compute_state;
-	sctx->b.b.delete_compute_state = si_delete_compute_state;
-	sctx->b.b.bind_compute_state = si_bind_compute_state;
+	sctx->b.create_compute_state = si_create_compute_state;
+	sctx->b.delete_compute_state = si_delete_compute_state;
+	sctx->b.bind_compute_state = si_bind_compute_state;
 /*	 ctx->context.create_sampler_view = evergreen_compute_create_sampler_view; */
-	sctx->b.b.set_compute_resources = si_set_compute_resources;
-	sctx->b.b.set_global_binding = si_set_global_binding;
-	sctx->b.b.launch_grid = si_launch_grid;
+	sctx->b.set_compute_resources = si_set_compute_resources;
+	sctx->b.set_global_binding = si_set_global_binding;
+	sctx->b.launch_grid = si_launch_grid;
 }
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index b6274af10d9..e2d261d7e09 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -45,7 +45,7 @@
 /* The max number of bytes that can be copied per packet. */
 static inline unsigned cp_dma_max_byte_count(struct si_context *sctx)
 {
-	unsigned max = sctx->b.chip_class >= GFX9 ?
+	unsigned max = sctx->chip_class >= GFX9 ?
 			       S_414_BYTE_COUNT_GFX9(~0u) :
 			       S_414_BYTE_COUNT_GFX6(~0u);
 
@@ -62,13 +62,13 @@ static void si_emit_cp_dma(struct si_context *sctx, uint64_t dst_va,
 			   uint64_t src_va, unsigned size, unsigned flags,
 			   enum si_coherency coher)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	uint32_t header = 0, command = 0;
 
 	assert(size);
 	assert(size <= cp_dma_max_byte_count(sctx));
 
-	if (sctx->b.chip_class >= GFX9)
+	if (sctx->chip_class >= GFX9)
 		command |= S_414_BYTE_COUNT_GFX9(size);
 	else
 		command |= S_414_BYTE_COUNT_GFX6(size);
@@ -77,7 +77,7 @@ static void si_emit_cp_dma(struct si_context *sctx, uint64_t dst_va,
 	if (flags & CP_DMA_SYNC)
 		header |= S_411_CP_SYNC(1);
 	else {
-		if (sctx->b.chip_class >= GFX9)
+		if (sctx->chip_class >= GFX9)
 			command |= S_414_DISABLE_WR_CONFIRM_GFX9(1);
 		else
 			command |= S_414_DISABLE_WR_CONFIRM_GFX6(1);
@@ -87,7 +87,7 @@ static void si_emit_cp_dma(struct si_context *sctx, uint64_t dst_va,
 		command |= S_414_RAW_WAIT(1);
 
 	/* Src and dst flags. */
-	if (sctx->b.chip_class >= GFX9 && !(flags & CP_DMA_CLEAR) &&
+	if (sctx->chip_class >= GFX9 && !(flags & CP_DMA_CLEAR) &&
 	    src_va == dst_va)
 		header |= S_411_DSL_SEL(V_411_NOWHERE); /* prefetch only */
 	else if (flags & CP_DMA_USE_L2)
@@ -98,7 +98,7 @@ static void si_emit_cp_dma(struct si_context *sctx, uint64_t dst_va,
 	else if (flags & CP_DMA_USE_L2)
 		header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
 
-	if (sctx->b.chip_class >= CIK) {
+	if (sctx->chip_class >= CIK) {
 		radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
 		radeon_emit(cs, header);
 		radeon_emit(cs, src_va);	/* SRC_ADDR_LO [31:0] */
@@ -137,7 +137,7 @@ static unsigned get_flush_flags(struct si_context *sctx, enum si_coherency coher
 	case SI_COHERENCY_SHADER:
 		return SI_CONTEXT_INV_SMEM_L1 |
 		       SI_CONTEXT_INV_VMEM_L1 |
-		       (sctx->b.chip_class == SI ? SI_CONTEXT_INV_GLOBAL_L2 : 0);
+		       (sctx->chip_class == SI ? SI_CONTEXT_INV_GLOBAL_L2 : 0);
 	case SI_COHERENCY_CB_META:
 		return SI_CONTEXT_FLUSH_AND_INV_CB;
 	}
@@ -145,8 +145,8 @@ static unsigned get_flush_flags(struct si_context *sctx, enum si_coherency coher
 
 static unsigned get_tc_l2_flag(struct si_context *sctx, enum si_coherency coher)
 {
-	if ((sctx->b.chip_class >= GFX9 && coher == SI_COHERENCY_CB_META) ||
-	    (sctx->b.chip_class >= CIK && coher == SI_COHERENCY_SHADER))
+	if ((sctx->chip_class >= GFX9 && coher == SI_COHERENCY_CB_META) ||
+	    (sctx->chip_class >= CIK && coher == SI_COHERENCY_SHADER))
 		return CP_DMA_USE_L2;
 
 	return 0;
@@ -175,11 +175,11 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst
 
 	/* This must be done after need_cs_space. */
 	if (!(user_flags & SI_CPDMA_SKIP_BO_LIST_UPDATE)) {
-		radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 					  (struct r600_resource*)dst,
 					  RADEON_USAGE_WRITE, RADEON_PRIO_CP_DMA);
 		if (src)
-			radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+			radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 						  (struct r600_resource*)src,
 						  RADEON_USAGE_READ, RADEON_PRIO_CP_DMA);
 	}
@@ -187,7 +187,7 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst
 	/* Flush the caches for the first copy only.
 	 * Also wait for the previous CP DMA operations.
 	 */
-	if (!(user_flags & SI_CPDMA_SKIP_GFX_SYNC) && sctx->b.flags)
+	if (!(user_flags & SI_CPDMA_SKIP_GFX_SYNC) && sctx->flags)
 		si_emit_cache_flush(sctx);
 
 	if (!(user_flags & SI_CPDMA_SKIP_SYNC_BEFORE) && *is_first)
@@ -207,7 +207,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
 		     uint64_t offset, uint64_t size, unsigned value,
 		     enum si_coherency coher)
 {
-	struct radeon_winsys *ws = sctx->b.ws;
+	struct radeon_winsys *ws = sctx->ws;
 	struct r600_resource *rdst = r600_resource(dst);
 	unsigned tc_l2_flag = get_tc_l2_flag(sctx, coher);
 	unsigned flush_flags = get_flush_flags(sctx, coher);
@@ -227,7 +227,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
 
 	/* dma_clear_buffer can use clear_buffer on failure. Make sure that
 	 * doesn't happen. We don't want an infinite recursion: */
-	if (sctx->b.dma_cs &&
+	if (sctx->dma_cs &&
 	    !(dst->flags & PIPE_RESOURCE_FLAG_SPARSE) &&
 	    (offset % 4 == 0) &&
 	    /* CP DMA is very slow. Always use SDMA for big clears. This
@@ -239,9 +239,9 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
 	      * si_emit_framebuffer_state (in a draw call) adds them.
 	      * For example, DeusEx:MD has 21 buffer clears per frame and all
 	      * of them are moved to SDMA thanks to this. */
-	     !ws->cs_is_buffer_referenced(sctx->b.gfx_cs, rdst->buf,
+	     !ws->cs_is_buffer_referenced(sctx->gfx_cs, rdst->buf,
 				          RADEON_USAGE_READWRITE))) {
-		sctx->b.dma_clear_buffer(sctx, dst, offset, dma_clear_size, value);
+		sctx->dma_clear_buffer(sctx, dst, offset, dma_clear_size, value);
 
 		offset += dma_clear_size;
 		size -= dma_clear_size;
@@ -252,7 +252,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
 		size -= dma_clear_size;
 
 		/* Flush the caches. */
-		sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+		sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
 				 SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags;
 
 		while (dma_clear_size) {
@@ -274,7 +274,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
 
 		/* If it's not a framebuffer fast clear... */
 		if (coher == SI_COHERENCY_SHADER)
-			sctx->b.num_cp_dma_calls++;
+			sctx->num_cp_dma_calls++;
 	}
 
 	if (size) {
@@ -285,7 +285,7 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
 		assert(dst->target == PIPE_BUFFER);
 		assert(size < 4);
 
-		pipe_buffer_write(&sctx->b.b, dst, offset, size, &value);
+		pipe_buffer_write(&sctx->b, dst, offset, size, &value);
 	}
 }
 
@@ -422,8 +422,8 @@ void si_copy_buffer(struct si_context *sctx,
 	src_offset += r600_resource(src)->gpu_address;
 
 	/* The workarounds aren't needed on Fiji and beyond. */
-	if (sctx->b.family <= CHIP_CARRIZO ||
-	    sctx->b.family == CHIP_STONEY) {
+	if (sctx->family <= CHIP_CARRIZO ||
+	    sctx->family == CHIP_STONEY) {
 		/* If the size is not aligned, we must add a dummy copy at the end
 		 * just to align the internal counter. Otherwise, the DMA engine
 		 * would slow down by an order of magnitude for following copies.
@@ -445,7 +445,7 @@ void si_copy_buffer(struct si_context *sctx,
 
 	/* Flush the caches. */
 	if (!(user_flags & SI_CPDMA_SKIP_GFX_SYNC))
-		sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+		sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
 				 SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags;
 
 	/* This is the main part doing the copying. Src is always aligned. */
@@ -490,13 +490,13 @@ void si_copy_buffer(struct si_context *sctx,
 
 	/* If it's not a prefetch... */
 	if (dst_offset != src_offset)
-		sctx->b.num_cp_dma_calls++;
+		sctx->num_cp_dma_calls++;
 }
 
 void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf,
 			      uint64_t offset, unsigned size)
 {
-	assert(sctx->b.chip_class >= CIK);
+	assert(sctx->chip_class >= CIK);
 
 	si_copy_buffer(sctx, buf, buf, offset, offset, size, SI_CPDMA_SKIP_ALL);
 }
@@ -523,7 +523,7 @@ static void cik_prefetch_VBO_descriptors(struct si_context *sctx)
 void cik_emit_prefetch_L2(struct si_context *sctx)
 {
 	/* Prefetch shaders and VBO descriptors to TC L2. */
-	if (sctx->b.chip_class >= GFX9) {
+	if (sctx->chip_class >= GFX9) {
 		/* Choose the right spot for the VBO prefetch. */
 		if (sctx->tes_shader.cso) {
 			if (sctx->prefetch_L2_mask & SI_PREFETCH_HS)
@@ -588,5 +588,5 @@ void cik_emit_prefetch_L2(struct si_context *sctx)
 
 void si_init_cp_dma_functions(struct si_context *sctx)
 {
-	sctx->b.b.clear_buffer = si_pipe_clear_buffer;
+	sctx->b.clear_buffer = si_pipe_clear_buffer;
 }
diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index f2c32e1484e..00e0722c821 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -284,11 +284,11 @@ file_error:
 static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f,
 				unsigned offset)
 {
-	struct radeon_winsys *ws = sctx->b.ws;
+	struct radeon_winsys *ws = sctx->ws;
 	uint32_t value;
 
 	if (ws->read_registers(ws, offset, 1, &value))
-		ac_dump_reg(f, sctx->b.chip_class, offset, value, ~0);
+		ac_dump_reg(f, sctx->chip_class, offset, value, ~0);
 }
 
 static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
@@ -314,7 +314,7 @@ static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
 	si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
 	si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
 	si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
-	if (sctx->b.chip_class <= VI) {
+	if (sctx->chip_class <= VI) {
 		si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
 		si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
 		si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
@@ -399,7 +399,7 @@ static void si_log_chunk_type_cs_print(void *data, FILE *f)
 	 * waited for the context, so this buffer should be idle.
 	 * If the GPU is hung, there is no point in waiting for it.
 	 */
-	uint32_t *map = ctx->b.ws->buffer_map(scs->trace_buf->buf,
+	uint32_t *map = ctx->ws->buffer_map(scs->trace_buf->buf,
 					      NULL,
 					      PIPE_TRANSFER_UNSYNCHRONIZED |
 					      PIPE_TRANSFER_READ);
@@ -410,25 +410,25 @@ static void si_log_chunk_type_cs_print(void *data, FILE *f)
 		if (chunk->gfx_begin == 0) {
 			if (ctx->init_config)
 				ac_parse_ib(f, ctx->init_config->pm4, ctx->init_config->ndw,
-					    NULL, 0, "IB2: Init config", ctx->b.chip_class,
+					    NULL, 0, "IB2: Init config", ctx->chip_class,
 					    NULL, NULL);
 
 			if (ctx->init_config_gs_rings)
 				ac_parse_ib(f, ctx->init_config_gs_rings->pm4,
 					    ctx->init_config_gs_rings->ndw,
-					    NULL, 0, "IB2: Init GS rings", ctx->b.chip_class,
+					    NULL, 0, "IB2: Init GS rings", ctx->chip_class,
 					    NULL, NULL);
 		}
 
 		if (scs->flushed) {
 			ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin,
 				    chunk->gfx_end - chunk->gfx_begin,
-				    &last_trace_id, map ? 1 : 0, "IB", ctx->b.chip_class,
+				    &last_trace_id, map ? 1 : 0, "IB", ctx->chip_class,
 				    NULL, NULL);
 		} else {
-			si_parse_current_ib(f, ctx->b.gfx_cs, chunk->gfx_begin,
+			si_parse_current_ib(f, ctx->gfx_cs, chunk->gfx_begin,
 					    chunk->gfx_end, &last_trace_id, map ? 1 : 0,
-					    "IB", ctx->b.chip_class);
+					    "IB", ctx->chip_class);
 		}
 	}
 
@@ -451,7 +451,7 @@ static void si_log_cs(struct si_context *ctx, struct u_log_context *log,
 	assert(ctx->current_saved_cs);
 
 	struct si_saved_cs *scs = ctx->current_saved_cs;
-	unsigned gfx_cur = ctx->b.gfx_cs->prev_dw + ctx->b.gfx_cs->current.cdw;
+	unsigned gfx_cur = ctx->gfx_cs->prev_dw + ctx->gfx_cs->current.cdw;
 
 	if (!dump_bo_list &&
 	    gfx_cur == scs->gfx_last_dw)
@@ -478,10 +478,10 @@ void si_auto_log_cs(void *data, struct u_log_context *log)
 
 void si_log_hw_flush(struct si_context *sctx)
 {
-	if (!sctx->b.log)
+	if (!sctx->log)
 		return;
 
-	si_log_cs(sctx, sctx->b.log, true);
+	si_log_cs(sctx, sctx->log, true);
 }
 
 static const char *priority_to_string(enum radeon_bo_priority priority)
@@ -1044,8 +1044,8 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
 {
 	struct si_context *sctx = (struct si_context*)ctx;
 
-	if (sctx->b.log)
-		u_log_flush(sctx->b.log);
+	if (sctx->log)
+		u_log_flush(sctx->log);
 
 	if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
 		si_dump_debug_registers(sctx, f);
@@ -1112,12 +1112,12 @@ static void si_dump_dma(struct si_context *sctx,
 void si_check_vm_faults(struct si_context *sctx,
 			struct radeon_saved_cs *saved, enum ring_type ring)
 {
-	struct pipe_screen *screen = sctx->b.b.screen;
+	struct pipe_screen *screen = sctx->b.screen;
 	FILE *f;
 	uint64_t addr;
 	char cmd_line[4096];
 
-	if (!ac_vm_fault_occured(sctx->b.chip_class,
+	if (!ac_vm_fault_occured(sctx->chip_class,
 				 &sctx->dmesg_timestamp, &addr))
 		return;
 
@@ -1166,12 +1166,12 @@ void si_check_vm_faults(struct si_context *sctx,
 
 void si_init_debug_functions(struct si_context *sctx)
 {
-	sctx->b.b.dump_debug_state = si_dump_debug_state;
+	sctx->b.dump_debug_state = si_dump_debug_state;
 
 	/* Set the initial dmesg timestamp for this context, so that
 	 * only new messages will be checked for VM faults.
 	 */
 	if (sctx->screen->debug_flags & DBG(CHECK_VM))
-		ac_vm_fault_occured(sctx->b.chip_class,
+		ac_vm_fault_occured(sctx->chip_class,
 				    &sctx->dmesg_timestamp, NULL);
 }
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 223724b29b8..4beeb2db6c2 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -168,7 +168,7 @@ static bool si_upload_descriptors(struct si_context *sctx,
 
 	uint32_t *ptr;
 	unsigned buffer_offset;
-	u_upload_alloc(sctx->b.b.const_uploader, first_slot_offset, upload_size,
+	u_upload_alloc(sctx->b.const_uploader, first_slot_offset, upload_size,
 		       si_optimal_tcc_alignment(sctx, upload_size),
 		       &buffer_offset, (struct pipe_resource**)&desc->buffer,
 		       (void**)&ptr);
@@ -181,7 +181,7 @@ static bool si_upload_descriptors(struct si_context *sctx,
 				upload_size);
 	desc->gpu_list = ptr - first_slot_offset / 4;
 
-	radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs, desc->buffer,
+	radeon_add_to_buffer_list(sctx, sctx->gfx_cs, desc->buffer,
                             RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
 
 	/* The shader pointer should point to slot 0. */
@@ -202,7 +202,7 @@ si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc
 	if (!desc->buffer)
 		return;
 
-	radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs, desc->buffer,
+	radeon_add_to_buffer_list(sctx, sctx->gfx_cs, desc->buffer,
 				  RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
 }
 
@@ -733,7 +733,7 @@ static void si_set_shader_image_desc(struct si_context *ctx,
 				si_decompress_dcc(ctx, tex);
 		}
 
-		if (ctx->b.chip_class >= GFX9) {
+		if (ctx->chip_class >= GFX9) {
 			/* Always set the base address. The swizzle modes don't
 			 * allow setting mipmap level offsets as the base.
 			 */
@@ -926,7 +926,7 @@ void si_update_ps_colorbuf0_slot(struct si_context *sctx)
 		si_set_shader_image_desc(sctx, &view, true, desc, desc + 8);
 
 		pipe_resource_reference(&buffers->buffers[slot], &tex->resource.b.b);
-		radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 					  &tex->resource, RADEON_USAGE_READ,
 					  RADEON_PRIO_SHADER_RW_IMAGE);
 		buffers->enabled_mask |= 1u << slot;
@@ -1031,7 +1031,7 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
 	while (mask) {
 		int i = u_bit_scan(&mask);
 
-		radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 			r600_resource(buffers->buffers[i]),
 			i < SI_NUM_SHADER_BUFFERS ? buffers->shader_usage :
 						    buffers->shader_usage_constbuf,
@@ -1076,14 +1076,14 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
 		if (!sctx->vertex_buffer[vb].buffer.resource)
 			continue;
 
-		radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 				      (struct r600_resource*)sctx->vertex_buffer[vb].buffer.resource,
 				      RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
 	}
 
 	if (!sctx->vb_descriptors_buffer)
 		return;
-	radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+	radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 				  sctx->vb_descriptors_buffer, RADEON_USAGE_READ,
 				  RADEON_PRIO_DESCRIPTORS);
 }
@@ -1111,7 +1111,7 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 	 * directly through a staging buffer and don't go through
 	 * the fine-grained upload path.
 	 */
-	u_upload_alloc(sctx->b.b.const_uploader, 0,
+	u_upload_alloc(sctx->b.const_uploader, 0,
 		       desc_list_byte_size,
 		       si_optimal_tcc_alignment(sctx, desc_list_byte_size),
 		       &sctx->vb_descriptors_offset,
@@ -1124,7 +1124,7 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 	}
 
 	sctx->vb_descriptors_gpu_list = ptr;
-	radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+	radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 				  sctx->vb_descriptors_buffer, RADEON_USAGE_READ,
 				  RADEON_PRIO_DESCRIPTORS);
 
@@ -1148,7 +1148,7 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 		uint64_t va = rbuffer->gpu_address + offset;
 
 		int64_t num_records = (int64_t)rbuffer->b.b.width0 - offset;
-		if (sctx->b.chip_class != VI && vb->stride) {
+		if (sctx->chip_class != VI && vb->stride) {
 			/* Round up by rounding down and adding 1 */
 			num_records = (num_records - velems->format_size[i]) /
 				      vb->stride + 1;
@@ -1162,7 +1162,7 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 		desc[3] = velems->rsrc_word3[i];
 
 		if (first_vb_use_mask & (1 << i)) {
-			radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+			radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 					      (struct r600_resource*)vb->buffer.resource,
 					      RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
 		}
@@ -1200,7 +1200,7 @@ void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuf
 {
 	void *tmp;
 
-	u_upload_alloc(sctx->b.b.const_uploader, 0, size,
+	u_upload_alloc(sctx->b.const_uploader, 0, size,
 		       si_optimal_tcc_alignment(sctx, size),
 		       const_offset,
 		       (struct pipe_resource**)rbuffer, &tmp);
@@ -1219,7 +1219,7 @@ static void si_set_constant_buffer(struct si_context *sctx,
 
 	/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
 	 * with a NULL buffer). We need to use a dummy buffer instead. */
-	if (sctx->b.chip_class == CIK &&
+	if (sctx->chip_class == CIK &&
 	    (!input || (!input->buffer && !input->user_buffer)))
 		input = &sctx->null_const_buf;
 
@@ -1448,7 +1448,7 @@ void si_set_ring_buffer(struct si_context *sctx, uint slot,
 			break;
 		}
 
-		if (sctx->b.chip_class >= VI && stride)
+		if (sctx->chip_class >= VI && stride)
 			num_records *= stride;
 
 		/* Set the descriptor. */
@@ -1467,13 +1467,13 @@ void si_set_ring_buffer(struct si_context *sctx, uint slot,
 			  S_008F0C_INDEX_STRIDE(index_stride) |
 			  S_008F0C_ADD_TID_ENABLE(add_tid);
 
-		if (sctx->b.chip_class >= GFX9)
+		if (sctx->chip_class >= GFX9)
 			assert(!swizzle || element_size == 1); /* always 4 bytes on GFX9 */
 		else
 			desc[3] |= S_008F0C_ELEMENT_SIZE(element_size);
 
 		pipe_resource_reference(&buffers->buffers[slot], buffer);
-		radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 				      (struct r600_resource*)buffer,
 				      buffers->shader_usage, buffers->priority);
 		buffers->enabled_mask |= 1u << slot;
@@ -1809,7 +1809,7 @@ static void si_upload_bindless_descriptor(struct si_context *sctx,
 					  unsigned num_dwords)
 {
 	struct si_descriptors *desc = &sctx->bindless_descriptors;
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	unsigned desc_slot_offset = desc_slot * 16;
 	uint32_t *data;
 	uint64_t va;
@@ -1834,7 +1834,7 @@ static void si_upload_bindless_descriptors(struct si_context *sctx)
 	/* Wait for graphics/compute to be idle before updating the resident
 	 * descriptors directly in memory, in case the GPU is using them.
 	 */
-	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+	sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
 			 SI_CONTEXT_CS_PARTIAL_FLUSH;
 	si_emit_cache_flush(sctx);
 
@@ -1861,7 +1861,7 @@ static void si_upload_bindless_descriptors(struct si_context *sctx)
 	}
 
 	/* Invalidate L1 because it doesn't know that L2 changed. */
-	sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1;
+	sctx->flags |= SI_CONTEXT_INV_SMEM_L1;
 	si_emit_cache_flush(sctx);
 
 	sctx->bindless_descriptors_dirty = false;
@@ -2027,7 +2027,7 @@ void si_shader_change_notify(struct si_context *sctx)
 {
 	/* VS can be bound as VS, ES, or LS. */
 	if (sctx->tes_shader.cso) {
-		if (sctx->b.chip_class >= GFX9) {
+		if (sctx->chip_class >= GFX9) {
 			si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
 					      R_00B430_SPI_SHADER_USER_DATA_LS_0);
 		} else {
@@ -2079,7 +2079,7 @@ static void si_emit_shader_pointer(struct si_context *sctx,
 				   struct si_descriptors *desc,
 				   unsigned sh_base)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	unsigned sh_offset = sh_base + desc->shader_userdata_offset;
 
 	si_emit_shader_pointer_head(cs, sh_offset, 1);
@@ -2093,7 +2093,7 @@ static void si_emit_consecutive_shader_pointers(struct si_context *sctx,
 	if (!sh_base)
 		return;
 
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	unsigned mask = sctx->shader_pointers_dirty & pointer_mask;
 
 	while (mask) {
@@ -2117,7 +2117,7 @@ static void si_emit_disjoint_shader_pointers(struct si_context *sctx,
 	if (!sh_base)
 		return;
 
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	unsigned mask = sctx->shader_pointers_dirty & pointer_mask;
 
 	while (mask) {
@@ -2132,7 +2132,7 @@ static void si_emit_disjoint_shader_pointers(struct si_context *sctx,
 static void si_emit_global_shader_pointers(struct si_context *sctx,
 					   struct si_descriptors *descs)
 {
-	if (sctx->b.chip_class == GFX9) {
+	if (sctx->chip_class == GFX9) {
 		/* Broadcast it to all shader stages. */
 		si_emit_shader_pointer(sctx, descs,
 				       R_00B530_SPI_SHADER_USER_DATA_COMMON_0);
@@ -2169,7 +2169,7 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx,
 					    sh_base[PIPE_SHADER_TESS_EVAL]);
 	si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT),
 					    sh_base[PIPE_SHADER_FRAGMENT]);
-	if (HAVE_32BIT_POINTERS || sctx->b.chip_class <= VI) {
+	if (HAVE_32BIT_POINTERS || sctx->chip_class <= VI) {
 		si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
 						    sh_base[PIPE_SHADER_TESS_CTRL]);
 		si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
@@ -2185,13 +2185,13 @@ void si_emit_graphics_shader_pointers(struct si_context *sctx,
 		~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
 
 	if (sctx->vertex_buffer_pointer_dirty) {
-		struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+		struct radeon_winsys_cs *cs = sctx->gfx_cs;
 
 		/* Find the location of the VB descriptor pointer. */
 		/* TODO: In the future, the pointer will be packed in unused
 		 *       bits of the first 2 VB descriptors. */
 		unsigned sh_dw_offset = SI_VS_NUM_USER_SGPR;
-		if (sctx->b.chip_class >= GFX9) {
+		if (sctx->chip_class >= GFX9) {
 			if (sctx->tes_shader.cso)
 				sh_dw_offset = GFX9_TCS_NUM_USER_SGPR;
 			else if (sctx->gs_shader.cso)
@@ -2660,7 +2660,7 @@ void si_all_resident_buffers_begin_new_cs(struct si_context *sctx)
 					   false, false);
 	}
 
-	sctx->b.num_resident_handles += num_resident_tex_handles +
+	sctx->num_resident_handles += num_resident_tex_handles +
 					num_resident_img_handles;
 }
 
@@ -2675,7 +2675,7 @@ void si_init_all_descriptors(struct si_context *sctx)
 #endif
 
 	for (i = 0; i < SI_NUM_SHADERS; i++) {
-		bool is_2nd = sctx->b.chip_class >= GFX9 &&
+		bool is_2nd = sctx->chip_class >= GFX9 &&
 				     (i == PIPE_SHADER_TESS_CTRL ||
 				      i == PIPE_SHADER_GEOMETRY);
 		unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS;
@@ -2748,18 +2748,18 @@ void si_init_all_descriptors(struct si_context *sctx)
 	sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
 
 	/* Set pipe_context functions. */
-	sctx->b.b.bind_sampler_states = si_bind_sampler_states;
-	sctx->b.b.set_shader_images = si_set_shader_images;
-	sctx->b.b.set_constant_buffer = si_pipe_set_constant_buffer;
-	sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
-	sctx->b.b.set_shader_buffers = si_set_shader_buffers;
-	sctx->b.b.set_sampler_views = si_set_sampler_views;
-	sctx->b.b.create_texture_handle = si_create_texture_handle;
-	sctx->b.b.delete_texture_handle = si_delete_texture_handle;
-	sctx->b.b.make_texture_handle_resident = si_make_texture_handle_resident;
-	sctx->b.b.create_image_handle = si_create_image_handle;
-	sctx->b.b.delete_image_handle = si_delete_image_handle;
-	sctx->b.b.make_image_handle_resident = si_make_image_handle_resident;
+	sctx->b.bind_sampler_states = si_bind_sampler_states;
+	sctx->b.set_shader_images = si_set_shader_images;
+	sctx->b.set_constant_buffer = si_pipe_set_constant_buffer;
+	sctx->b.set_polygon_stipple = si_set_polygon_stipple;
+	sctx->b.set_shader_buffers = si_set_shader_buffers;
+	sctx->b.set_sampler_views = si_set_sampler_views;
+	sctx->b.create_texture_handle = si_create_texture_handle;
+	sctx->b.delete_texture_handle = si_delete_texture_handle;
+	sctx->b.make_texture_handle_resident = si_make_texture_handle_resident;
+	sctx->b.create_image_handle = si_create_image_handle;
+	sctx->b.delete_image_handle = si_delete_image_handle;
+	sctx->b.make_image_handle_resident = si_make_image_handle_resident;
 
 	/* Shader user data. */
 	si_init_atom(sctx, &sctx->shader_pointers.atom, &sctx->atoms.s.shader_pointers,
@@ -2768,7 +2768,7 @@ void si_init_all_descriptors(struct si_context *sctx)
 	/* Set default and immutable mappings. */
 	si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
 
-	if (sctx->b.chip_class >= GFX9) {
+	if (sctx->chip_class >= GFX9) {
 		si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
 				      R_00B430_SPI_SHADER_USER_DATA_LS_0);
 		si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c
index 106fbd544cf..e3b5bb46208 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -35,7 +35,7 @@ static void si_dma_copy_buffer(struct si_context *ctx,
 				uint64_t src_offset,
 				uint64_t size)
 {
-	struct radeon_winsys_cs *cs = ctx->b.dma_cs;
+	struct radeon_winsys_cs *cs = ctx->dma_cs;
 	unsigned i, ncopy, count, max_size, sub_cmd, shift;
 	struct r600_resource *rdst = (struct r600_resource*)dst;
 	struct r600_resource *rsrc = (struct r600_resource*)src;
@@ -83,13 +83,13 @@ static void si_dma_clear_buffer(struct si_context *sctx,
 				uint64_t size,
 				unsigned clear_value)
 {
-	struct radeon_winsys_cs *cs = sctx->b.dma_cs;
+	struct radeon_winsys_cs *cs = sctx->dma_cs;
 	unsigned i, ncopy, csize;
 	struct r600_resource *rdst = r600_resource(dst);
 
 	if (!cs || offset % 4 != 0 || size % 4 != 0 ||
 	    dst->flags & PIPE_RESOURCE_FLAG_SPARSE) {
-		sctx->b.b.clear_buffer(&sctx->b.b, dst, offset, size, &clear_value, 4);
+		sctx->b.clear_buffer(&sctx->b, dst, offset, size, &clear_value, 4);
 		return;
 	}
 
@@ -131,7 +131,7 @@ static void si_dma_copy_tile(struct si_context *ctx,
 			     unsigned pitch,
 			     unsigned bpp)
 {
-	struct radeon_winsys_cs *cs = ctx->b.dma_cs;
+	struct radeon_winsys_cs *cs = ctx->dma_cs;
 	struct r600_texture *rsrc = (struct r600_texture*)src;
 	struct r600_texture *rdst = (struct r600_texture*)dst;
 	unsigned dst_mode = rdst->surface.u.legacy.level[dst_level].mode;
@@ -232,7 +232,7 @@ static void si_dma_copy(struct pipe_context *ctx,
 	unsigned src_x, src_y;
 	unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
 
-	if (sctx->b.dma_cs == NULL ||
+	if (sctx->dma_cs == NULL ||
 	    src->flags & PIPE_RESOURCE_FLAG_SPARSE ||
 	    dst->flags & PIPE_RESOURCE_FLAG_SPARSE) {
 		goto fallback;
@@ -324,6 +324,6 @@ fallback:
 
 void si_init_dma_functions(struct si_context *sctx)
 {
-	sctx->b.dma_copy = si_dma_copy;
-	sctx->b.dma_clear_buffer = si_dma_clear_buffer;
+	sctx->dma_copy = si_dma_copy;
+	sctx->dma_clear_buffer = si_dma_clear_buffer;
 }
diff --git a/src/gallium/drivers/radeonsi/si_dma_cs.c b/src/gallium/drivers/radeonsi/si_dma_cs.c
index 10d6d62c52e..7af7c5623b7 100644
--- a/src/gallium/drivers/radeonsi/si_dma_cs.c
+++ b/src/gallium/drivers/radeonsi/si_dma_cs.c
@@ -26,10 +26,10 @@
 
 static void si_dma_emit_wait_idle(struct si_context *sctx)
 {
-	struct radeon_winsys_cs *cs = sctx->b.dma_cs;
+	struct radeon_winsys_cs *cs = sctx->dma_cs;
 
 	/* NOP waits for idle on Evergreen and later. */
-	if (sctx->b.chip_class >= CIK)
+	if (sctx->chip_class >= CIK)
 		radeon_emit(cs, 0x00000000); /* NOP */
 	else
 		radeon_emit(cs, 0xf0000000); /* NOP */
@@ -38,8 +38,8 @@ static void si_dma_emit_wait_idle(struct si_context *sctx)
 void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
 		       struct r600_resource *dst, struct r600_resource *src)
 {
-	uint64_t vram = ctx->b.dma_cs->used_vram;
-	uint64_t gtt = ctx->b.dma_cs->used_gart;
+	uint64_t vram = ctx->dma_cs->used_vram;
+	uint64_t gtt = ctx->dma_cs->used_gart;
 
 	if (dst) {
 		vram += dst->vram_usage;
@@ -51,12 +51,12 @@ void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
 	}
 
 	/* Flush the GFX IB if DMA depends on it. */
-	if (radeon_emitted(ctx->b.gfx_cs, ctx->b.initial_gfx_cs_size) &&
+	if (radeon_emitted(ctx->gfx_cs, ctx->initial_gfx_cs_size) &&
 	    ((dst &&
-	      ctx->b.ws->cs_is_buffer_referenced(ctx->b.gfx_cs, dst->buf,
+	      ctx->ws->cs_is_buffer_referenced(ctx->gfx_cs, dst->buf,
 						 RADEON_USAGE_READWRITE)) ||
 	     (src &&
-	      ctx->b.ws->cs_is_buffer_referenced(ctx->b.gfx_cs, src->buf,
+	      ctx->ws->cs_is_buffer_referenced(ctx->gfx_cs, src->buf,
 						 RADEON_USAGE_WRITE))))
 		si_flush_gfx_cs(ctx, PIPE_FLUSH_ASYNC, NULL);
 
@@ -73,64 +73,64 @@ void si_need_dma_space(struct si_context *ctx, unsigned num_dw,
 	 * engine busy while uploads are being submitted.
 	 */
 	num_dw++; /* for emit_wait_idle below */
-	if (!ctx->b.ws->cs_check_space(ctx->b.dma_cs, num_dw) ||
-	    ctx->b.dma_cs->used_vram + ctx->b.dma_cs->used_gart > 64 * 1024 * 1024 ||
-	    !radeon_cs_memory_below_limit(ctx->screen, ctx->b.dma_cs, vram, gtt)) {
+	if (!ctx->ws->cs_check_space(ctx->dma_cs, num_dw) ||
+	    ctx->dma_cs->used_vram + ctx->dma_cs->used_gart > 64 * 1024 * 1024 ||
+	    !radeon_cs_memory_below_limit(ctx->screen, ctx->dma_cs, vram, gtt)) {
 		si_flush_dma_cs(ctx, PIPE_FLUSH_ASYNC, NULL);
-		assert((num_dw + ctx->b.dma_cs->current.cdw) <= ctx->b.dma_cs->current.max_dw);
+		assert((num_dw + ctx->dma_cs->current.cdw) <= ctx->dma_cs->current.max_dw);
 	}
 
 	/* Wait for idle if either buffer has been used in the IB before to
 	 * prevent read-after-write hazards.
 	 */
 	if ((dst &&
-	     ctx->b.ws->cs_is_buffer_referenced(ctx->b.dma_cs, dst->buf,
+	     ctx->ws->cs_is_buffer_referenced(ctx->dma_cs, dst->buf,
 						RADEON_USAGE_READWRITE)) ||
 	    (src &&
-	     ctx->b.ws->cs_is_buffer_referenced(ctx->b.dma_cs, src->buf,
+	     ctx->ws->cs_is_buffer_referenced(ctx->dma_cs, src->buf,
 						RADEON_USAGE_WRITE)))
 		si_dma_emit_wait_idle(ctx);
 
 	if (dst) {
-		radeon_add_to_buffer_list(ctx, ctx->b.dma_cs, dst,
+		radeon_add_to_buffer_list(ctx, ctx->dma_cs, dst,
 					  RADEON_USAGE_WRITE,
 					  RADEON_PRIO_SDMA_BUFFER);
 	}
 	if (src) {
-		radeon_add_to_buffer_list(ctx, ctx->b.dma_cs, src,
+		radeon_add_to_buffer_list(ctx, ctx->dma_cs, src,
 					  RADEON_USAGE_READ,
 					  RADEON_PRIO_SDMA_BUFFER);
 	}
 
 	/* this function is called before all DMA calls, so increment this. */
-	ctx->b.num_dma_calls++;
+	ctx->num_dma_calls++;
 }
 
 void si_flush_dma_cs(struct si_context *ctx, unsigned flags,
 		     struct pipe_fence_handle **fence)
 {
-	struct radeon_winsys_cs *cs = ctx->b.dma_cs;
+	struct radeon_winsys_cs *cs = ctx->dma_cs;
 	struct radeon_saved_cs saved;
 	bool check_vm = (ctx->screen->debug_flags & DBG(CHECK_VM)) != 0;
 
 	if (!radeon_emitted(cs, 0)) {
 		if (fence)
-			ctx->b.ws->fence_reference(fence, ctx->b.last_sdma_fence);
+			ctx->ws->fence_reference(fence, ctx->last_sdma_fence);
 		return;
 	}
 
 	if (check_vm)
-		si_save_cs(ctx->b.ws, cs, &saved, true);
+		si_save_cs(ctx->ws, cs, &saved, true);
 
-	ctx->b.ws->cs_flush(cs, flags, &ctx->b.last_sdma_fence);
+	ctx->ws->cs_flush(cs, flags, &ctx->last_sdma_fence);
 	if (fence)
-		ctx->b.ws->fence_reference(fence, ctx->b.last_sdma_fence);
+		ctx->ws->fence_reference(fence, ctx->last_sdma_fence);
 
 	if (check_vm) {
 		/* Use conservative timeout 800ms, after which we won't wait any
 		 * longer and assume the GPU is hung.
 		 */
-		ctx->b.ws->fence_wait(ctx->b.ws, ctx->b.last_sdma_fence, 800*1000*1000);
+		ctx->ws->fence_wait(ctx->ws, ctx->last_sdma_fence, 800*1000*1000);
 
 		si_check_vm_faults(ctx, &saved, RING_DMA);
 		si_clear_saved_cs(&saved);
@@ -143,7 +143,7 @@ void si_screen_clear_buffer(struct si_screen *sscreen, struct pipe_resource *dst
 	struct si_context *ctx = (struct si_context*)sscreen->aux_context;
 
 	mtx_lock(&sscreen->aux_context_lock);
-	ctx->b.dma_clear_buffer(ctx, dst, offset, size, value);
+	ctx->dma_clear_buffer(ctx, dst, offset, size, value);
 	sscreen->aux_context->flush(sscreen->aux_context, NULL, 0);
 	mtx_unlock(&sscreen->aux_context_lock);
 }
diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c
index 7aa7b1c4a32..26d6c43b34d 100644
--- a/src/gallium/drivers/radeonsi/si_fence.c
+++ b/src/gallium/drivers/radeonsi/si_fence.c
@@ -70,7 +70,7 @@ void si_gfx_write_event_eop(struct si_context *ctx,
 			    struct r600_resource *buf, uint64_t va,
 			    uint32_t new_fence, unsigned query_type)
 {
-	struct radeon_winsys_cs *cs = ctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = ctx->gfx_cs;
 	unsigned op = EVENT_TYPE(event) |
 		      EVENT_INDEX(5) |
 		      event_flags;
@@ -81,7 +81,7 @@ void si_gfx_write_event_eop(struct si_context *ctx,
 	if (data_sel != EOP_DATA_SEL_DISCARD)
 		sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
 
-	if (ctx->b.chip_class >= GFX9) {
+	if (ctx->chip_class >= GFX9) {
 		/* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
 		 * counters) must immediately precede every timestamp event to
 		 * prevent a GPU hang on GFX9.
@@ -89,11 +89,11 @@ void si_gfx_write_event_eop(struct si_context *ctx,
 		 * Occlusion queries don't need to do it here, because they
 		 * always do ZPASS_DONE before the timestamp.
 		 */
-		if (ctx->b.chip_class == GFX9 &&
+		if (ctx->chip_class == GFX9 &&
 		    query_type != PIPE_QUERY_OCCLUSION_COUNTER &&
 		    query_type != PIPE_QUERY_OCCLUSION_PREDICATE &&
 		    query_type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
-			struct r600_resource *scratch = ctx->b.eop_bug_scratch;
+			struct r600_resource *scratch = ctx->eop_bug_scratch;
 
 			assert(16 * ctx->screen->info.num_render_backends <=
 			       scratch->b.b.width0);
@@ -102,7 +102,7 @@ void si_gfx_write_event_eop(struct si_context *ctx,
 			radeon_emit(cs, scratch->gpu_address);
 			radeon_emit(cs, scratch->gpu_address >> 32);
 
-			radeon_add_to_buffer_list(ctx, ctx->b.gfx_cs, scratch,
+			radeon_add_to_buffer_list(ctx, ctx->gfx_cs, scratch,
 						  RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
 		}
 
@@ -115,9 +115,9 @@ void si_gfx_write_event_eop(struct si_context *ctx,
 		radeon_emit(cs, 0); /* immediate data hi */
 		radeon_emit(cs, 0); /* unused */
 	} else {
-		if (ctx->b.chip_class == CIK ||
-		    ctx->b.chip_class == VI) {
-			struct r600_resource *scratch = ctx->b.eop_bug_scratch;
+		if (ctx->chip_class == CIK ||
+		    ctx->chip_class == VI) {
+			struct r600_resource *scratch = ctx->eop_bug_scratch;
 			uint64_t va = scratch->gpu_address;
 
 			/* Two EOP events are required to make all engines go idle
@@ -131,7 +131,7 @@ void si_gfx_write_event_eop(struct si_context *ctx,
 			radeon_emit(cs, 0); /* immediate data */
 			radeon_emit(cs, 0); /* unused */
 
-			radeon_add_to_buffer_list(ctx, ctx->b.gfx_cs, scratch,
+			radeon_add_to_buffer_list(ctx, ctx->gfx_cs, scratch,
 						  RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
 		}
 
@@ -144,7 +144,7 @@ void si_gfx_write_event_eop(struct si_context *ctx,
 	}
 
 	if (buf) {
-		radeon_add_to_buffer_list(ctx, ctx->b.gfx_cs, buf, RADEON_USAGE_WRITE,
+		radeon_add_to_buffer_list(ctx, ctx->gfx_cs, buf, RADEON_USAGE_WRITE,
 					  RADEON_PRIO_QUERY);
 	}
 }
@@ -163,7 +163,7 @@ unsigned si_gfx_write_fence_dwords(struct si_screen *screen)
 void si_gfx_wait_fence(struct si_context *ctx,
 		       uint64_t va, uint32_t ref, uint32_t mask)
 {
-	struct radeon_winsys_cs *cs = ctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = ctx->gfx_cs;
 
 	radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
 	radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
@@ -177,17 +177,17 @@ void si_gfx_wait_fence(struct si_context *ctx,
 static void si_add_fence_dependency(struct si_context *sctx,
 				    struct pipe_fence_handle *fence)
 {
-	struct radeon_winsys *ws = sctx->b.ws;
+	struct radeon_winsys *ws = sctx->ws;
 
-	if (sctx->b.dma_cs)
-		ws->cs_add_fence_dependency(sctx->b.dma_cs, fence);
-	ws->cs_add_fence_dependency(sctx->b.gfx_cs, fence);
+	if (sctx->dma_cs)
+		ws->cs_add_fence_dependency(sctx->dma_cs, fence);
+	ws->cs_add_fence_dependency(sctx->gfx_cs, fence);
 }
 
 static void si_add_syncobj_signal(struct si_context *sctx,
 				  struct pipe_fence_handle *fence)
 {
-	sctx->b.ws->cs_add_syncobj_signal(sctx->b.gfx_cs, fence);
+	sctx->ws->cs_add_syncobj_signal(sctx->gfx_cs, fence);
 }
 
 static void si_fence_reference(struct pipe_screen *screen,
@@ -254,7 +254,7 @@ static void si_fine_fence_set(struct si_context *ctx,
 	assert(util_bitcount(flags & (PIPE_FLUSH_TOP_OF_PIPE | PIPE_FLUSH_BOTTOM_OF_PIPE)) == 1);
 
 	/* Use uncached system memory for the fence. */
-	u_upload_alloc(ctx->b.cached_gtt_allocator, 0, 4, 4,
+	u_upload_alloc(ctx->cached_gtt_allocator, 0, 4, 4,
 		       &fine->offset, (struct pipe_resource **)&fine->buf, (void **)&fence_ptr);
 	if (!fine->buf)
 		return;
@@ -263,10 +263,10 @@ static void si_fine_fence_set(struct si_context *ctx,
 
 	uint64_t fence_va = fine->buf->gpu_address + fine->offset;
 
-	radeon_add_to_buffer_list(ctx, ctx->b.gfx_cs, fine->buf,
+	radeon_add_to_buffer_list(ctx, ctx->gfx_cs, fine->buf,
 				  RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
 	if (flags & PIPE_FLUSH_TOP_OF_PIPE) {
-		struct radeon_winsys_cs *cs = ctx->b.gfx_cs;
+		struct radeon_winsys_cs *cs = ctx->gfx_cs;
 		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
 		radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
 			S_370_WR_CONFIRM(1) |
@@ -350,7 +350,7 @@ static boolean si_fence_finish(struct pipe_screen *screen,
 
 		sctx = (struct si_context *)threaded_context_unwrap_unsync(ctx);
 		if (rfence->gfx_unflushed.ctx == sctx &&
-		    rfence->gfx_unflushed.ib_index == sctx->b.num_gfx_cs_flushes) {
+		    rfence->gfx_unflushed.ib_index == sctx->num_gfx_cs_flushes) {
 			/* Section 4.1.2 (Signaling) of the OpenGL 4.6 (Core profile)
 			 * spec says:
 			 *
@@ -495,7 +495,7 @@ static void si_flush_from_st(struct pipe_context *ctx,
 {
 	struct pipe_screen *screen = ctx->screen;
 	struct si_context *sctx = (struct si_context *)ctx;
-	struct radeon_winsys *ws = sctx->b.ws;
+	struct radeon_winsys *ws = sctx->ws;
 	struct pipe_fence_handle *gfx_fence = NULL;
 	struct pipe_fence_handle *sdma_fence = NULL;
 	bool deferred_fence = false;
@@ -513,14 +513,14 @@ static void si_flush_from_st(struct pipe_context *ctx,
 	}
 
 	/* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */
-	if (sctx->b.dma_cs)
+	if (sctx->dma_cs)
 		si_flush_dma_cs(sctx, rflags, fence ? &sdma_fence : NULL);
 
-	if (!radeon_emitted(sctx->b.gfx_cs, sctx->b.initial_gfx_cs_size)) {
+	if (!radeon_emitted(sctx->gfx_cs, sctx->initial_gfx_cs_size)) {
 		if (fence)
-			ws->fence_reference(&gfx_fence, sctx->b.last_gfx_fence);
+			ws->fence_reference(&gfx_fence, sctx->last_gfx_fence);
 		if (!(flags & PIPE_FLUSH_DEFERRED))
-			ws->cs_sync_flush(sctx->b.gfx_cs);
+			ws->cs_sync_flush(sctx->gfx_cs);
 	} else {
 		/* Instead of flushing, create a deferred fence. Constraints:
 		 * - The state tracker must allow a deferred flush.
@@ -531,7 +531,7 @@ static void si_flush_from_st(struct pipe_context *ctx,
 		if (flags & PIPE_FLUSH_DEFERRED &&
 		    !(flags & PIPE_FLUSH_FENCE_FD) &&
 		    fence) {
-			gfx_fence = sctx->b.ws->cs_get_next_fence(sctx->b.gfx_cs);
+			gfx_fence = sctx->ws->cs_get_next_fence(sctx->gfx_cs);
 			deferred_fence = true;
 		} else {
 			si_flush_gfx_cs(sctx, rflags, fence ? &gfx_fence : NULL);
@@ -563,7 +563,7 @@ static void si_flush_from_st(struct pipe_context *ctx,
 
 		if (deferred_fence) {
 			multi_fence->gfx_unflushed.ctx = sctx;
-			multi_fence->gfx_unflushed.ib_index = sctx->b.num_gfx_cs_flushes;
+			multi_fence->gfx_unflushed.ib_index = sctx->num_gfx_cs_flushes;
 		}
 
 		multi_fence->fine = fine;
@@ -577,9 +577,9 @@ static void si_flush_from_st(struct pipe_context *ctx,
 	assert(!fine.buf);
 finish:
 	if (!(flags & PIPE_FLUSH_DEFERRED)) {
-		if (sctx->b.dma_cs)
-			ws->cs_sync_flush(sctx->b.dma_cs);
-		ws->cs_sync_flush(sctx->b.gfx_cs);
+		if (sctx->dma_cs)
+			ws->cs_sync_flush(sctx->dma_cs);
+		ws->cs_sync_flush(sctx->gfx_cs);
 	}
 }
 
@@ -638,10 +638,10 @@ static void si_fence_server_sync(struct pipe_context *ctx,
 
 void si_init_fence_functions(struct si_context *ctx)
 {
-	ctx->b.b.flush = si_flush_from_st;
-	ctx->b.b.create_fence_fd = si_create_fence_fd;
-	ctx->b.b.fence_server_sync = si_fence_server_sync;
-	ctx->b.b.fence_server_signal = si_fence_server_signal;
+	ctx->b.flush = si_flush_from_st;
+	ctx->b.create_fence_fd = si_create_fence_fd;
+	ctx->b.fence_server_sync = si_fence_server_sync;
+	ctx->b.fence_server_signal = si_fence_server_signal;
 }
 
 void si_init_screen_fence_functions(struct si_screen *screen)
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index b607becd3f6..f99bc324c98 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -30,7 +30,7 @@
 /* initialize */
 void si_need_gfx_cs_space(struct si_context *ctx)
 {
-	struct radeon_winsys_cs *cs = ctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = ctx->gfx_cs;
 
 	/* There is no need to flush the DMA IB here, because
 	 * r600_need_dma_space always flushes the GFX IB if there is
@@ -43,15 +43,15 @@ void si_need_gfx_cs_space(struct si_context *ctx)
 	 * that have been added (cs_add_buffer) and two counters in the pipe
 	 * driver for those that haven't been added yet.
 	 */
-	if (unlikely(!radeon_cs_memory_below_limit(ctx->screen, ctx->b.gfx_cs,
-						   ctx->b.vram, ctx->b.gtt))) {
-		ctx->b.gtt = 0;
-		ctx->b.vram = 0;
+	if (unlikely(!radeon_cs_memory_below_limit(ctx->screen, ctx->gfx_cs,
+						   ctx->vram, ctx->gtt))) {
+		ctx->gtt = 0;
+		ctx->vram = 0;
 		si_flush_gfx_cs(ctx, PIPE_FLUSH_ASYNC, NULL);
 		return;
 	}
-	ctx->b.gtt = 0;
-	ctx->b.vram = 0;
+	ctx->gtt = 0;
+	ctx->vram = 0;
 
 	/* If the IB is sufficiently large, don't count the space needed
 	 * and just flush if there is not enough space left.
@@ -59,21 +59,21 @@ void si_need_gfx_cs_space(struct si_context *ctx)
 	 * Also reserve space for stopping queries at the end of IB, because
 	 * the number of active queries is mostly unlimited.
 	 */
-	unsigned need_dwords = 2048 + ctx->b.num_cs_dw_queries_suspend;
-	if (!ctx->b.ws->cs_check_space(cs, need_dwords))
+	unsigned need_dwords = 2048 + ctx->num_cs_dw_queries_suspend;
+	if (!ctx->ws->cs_check_space(cs, need_dwords))
 		si_flush_gfx_cs(ctx, PIPE_FLUSH_ASYNC, NULL);
 }
 
 void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
 		     struct pipe_fence_handle **fence)
 {
-	struct radeon_winsys_cs *cs = ctx->b.gfx_cs;
-	struct radeon_winsys *ws = ctx->b.ws;
+	struct radeon_winsys_cs *cs = ctx->gfx_cs;
+	struct radeon_winsys *ws = ctx->ws;
 
 	if (ctx->gfx_flush_in_progress)
 		return;
 
-	if (!radeon_emitted(cs, ctx->b.initial_gfx_cs_size))
+	if (!radeon_emitted(cs, ctx->initial_gfx_cs_size))
 		return;
 
 	if (si_check_device_reset(ctx))
@@ -87,14 +87,14 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
 	 * This code is only needed when the driver flushes the GFX IB
 	 * internally, and it never asks for a fence handle.
 	 */
-	if (radeon_emitted(ctx->b.dma_cs, 0)) {
+	if (radeon_emitted(ctx->dma_cs, 0)) {
 		assert(fence == NULL); /* internal flushes only */
 		si_flush_dma_cs(ctx, flags, NULL);
 	}
 
 	ctx->gfx_flush_in_progress = true;
 
-	if (!LIST_IS_EMPTY(&ctx->b.active_queries))
+	if (!LIST_IS_EMPTY(&ctx->active_queries))
 		si_suspend_queries(ctx);
 
 	ctx->streamout.suspended = false;
@@ -103,12 +103,12 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
 		ctx->streamout.suspended = true;
 	}
 
-	ctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
+	ctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
 			SI_CONTEXT_PS_PARTIAL_FLUSH;
 
 	/* DRM 3.1.0 doesn't flush TC for VI correctly. */
-	if (ctx->b.chip_class == VI && ctx->screen->info.drm_minor <= 1)
-		ctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2 |
+	if (ctx->chip_class == VI && ctx->screen->info.drm_minor <= 1)
+		ctx->flags |= SI_CONTEXT_INV_GLOBAL_L2 |
 				SI_CONTEXT_INV_VMEM_L1;
 
 	si_emit_cache_flush(ctx);
@@ -124,20 +124,20 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
 	}
 
 	/* Flush the CS. */
-	ws->cs_flush(cs, flags, &ctx->b.last_gfx_fence);
+	ws->cs_flush(cs, flags, &ctx->last_gfx_fence);
 	if (fence)
-		ws->fence_reference(fence, ctx->b.last_gfx_fence);
+		ws->fence_reference(fence, ctx->last_gfx_fence);
 
 	/* This must be after cs_flush returns, since the context's API
 	 * thread can concurrently read this value in si_fence_finish. */
-	ctx->b.num_gfx_cs_flushes++;
+	ctx->num_gfx_cs_flushes++;
 
 	/* Check VM faults if needed. */
 	if (ctx->screen->debug_flags & DBG(CHECK_VM)) {
 		/* Use conservative timeout 800ms, after which we won't wait any
 		 * longer and assume the GPU is hung.
 		 */
-		ctx->b.ws->fence_wait(ctx->b.ws, ctx->b.last_gfx_fence, 800*1000*1000);
+		ctx->ws->fence_wait(ctx->ws, ctx->last_gfx_fence, 800*1000*1000);
 
 		si_check_vm_faults(ctx, &ctx->current_saved_cs->gfx, RING_GFX);
 	}
@@ -161,7 +161,7 @@ static void si_begin_gfx_cs_debug(struct si_context *ctx)
 	pipe_reference_init(&ctx->current_saved_cs->reference, 1);
 
 	ctx->current_saved_cs->trace_buf = (struct r600_resource*)
-				 pipe_buffer_create(ctx->b.b.screen, 0,
+				 pipe_buffer_create(ctx->b.screen, 0,
 						    PIPE_USAGE_STAGING, 8);
 	if (!ctx->current_saved_cs->trace_buf) {
 		free(ctx->current_saved_cs);
@@ -169,13 +169,13 @@ static void si_begin_gfx_cs_debug(struct si_context *ctx)
 		return;
 	}
 
-	pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->current_saved_cs->trace_buf->b.b,
+	pipe_buffer_write_nooverlap(&ctx->b, &ctx->current_saved_cs->trace_buf->b.b,
 				    0, sizeof(zeros), zeros);
 	ctx->current_saved_cs->trace_id = 0;
 
 	si_trace_emit(ctx);
 
-	radeon_add_to_buffer_list(ctx, ctx->b.gfx_cs, ctx->current_saved_cs->trace_buf,
+	radeon_add_to_buffer_list(ctx, ctx->gfx_cs, ctx->current_saved_cs->trace_buf,
 			      RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
 }
 
@@ -185,11 +185,11 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
 		si_begin_gfx_cs_debug(ctx);
 
 	/* Flush read caches at the beginning of CS not flushed by the kernel. */
-	if (ctx->b.chip_class >= CIK)
-		ctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
+	if (ctx->chip_class >= CIK)
+		ctx->flags |= SI_CONTEXT_INV_SMEM_L1 |
 				SI_CONTEXT_INV_ICACHE;
 
-	ctx->b.flags |= SI_CONTEXT_START_PIPELINE_STATS;
+	ctx->flags |= SI_CONTEXT_START_PIPELINE_STATS;
 
 	/* set all valid group as dirty so they get reemited on
 	 * next draw command
@@ -246,12 +246,12 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
 	if (!has_clear_state || ctx->blend_color.any_nonzeros)
 		si_mark_atom_dirty(ctx, &ctx->blend_color.atom);
 	si_mark_atom_dirty(ctx, &ctx->db_render_state);
-	if (ctx->b.chip_class >= GFX9)
+	if (ctx->chip_class >= GFX9)
 		si_mark_atom_dirty(ctx, &ctx->dpbb_state);
 	si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
 	si_mark_atom_dirty(ctx, &ctx->spi_map);
 	si_mark_atom_dirty(ctx, &ctx->streamout.enable_atom);
-	si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);
+	si_mark_atom_dirty(ctx, &ctx->render_cond_atom);
 	si_all_descriptors_begin_new_cs(ctx);
 	si_all_resident_buffers_begin_new_cs(ctx);
 
@@ -271,11 +271,11 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
 		si_streamout_buffers_dirty(ctx);
 	}
 
-	if (!LIST_IS_EMPTY(&ctx->b.active_queries))
+	if (!LIST_IS_EMPTY(&ctx->active_queries))
 		si_resume_queries(ctx);
 
-	assert(!ctx->b.gfx_cs->prev_dw);
-	ctx->b.initial_gfx_cs_size = ctx->b.gfx_cs->current.cdw;
+	assert(!ctx->gfx_cs->prev_dw);
+	ctx->initial_gfx_cs_size = ctx->gfx_cs->current.cdw;
 
 	/* Invalidate various draw states so that they are emitted before
 	 * the first draw call. */
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c
index 7ab995cacc9..ad62e55f227 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -425,7 +425,7 @@ static struct si_pc_block groups_gfx9[] = {
 static void si_pc_emit_instance(struct si_context *sctx,
 				int se, int instance)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	unsigned value = S_030800_SH_BROADCAST_WRITES(1);
 
 	if (se >= 0) {
@@ -446,7 +446,7 @@ static void si_pc_emit_instance(struct si_context *sctx,
 static void si_pc_emit_shaders(struct si_context *sctx,
 			       unsigned shaders)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 
 	radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2);
 	radeon_emit(cs, shaders & 0x7f);
@@ -459,7 +459,7 @@ static void si_pc_emit_select(struct si_context *sctx,
 {
 	struct si_pc_block *sigroup = (struct si_pc_block *)group->data;
 	struct si_pc_block_base *regs = sigroup->b;
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	unsigned idx;
 	unsigned layout_multi = regs->layout & SI_PC_MULTI_MASK;
 	unsigned dw;
@@ -552,9 +552,9 @@ static void si_pc_emit_select(struct si_context *sctx,
 static void si_pc_emit_start(struct si_context *sctx,
 			     struct r600_resource *buffer, uint64_t va)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 
-	radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs, buffer,
+	radeon_add_to_buffer_list(sctx, sctx->gfx_cs, buffer,
 				  RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
 
 	radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
@@ -578,7 +578,7 @@ static void si_pc_emit_start(struct si_context *sctx,
 static void si_pc_emit_stop(struct si_context *sctx,
 			    struct r600_resource *buffer, uint64_t va)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 
 	si_gfx_write_event_eop(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
 			       EOP_DATA_SEL_VALUE_32BIT,
@@ -601,7 +601,7 @@ static void si_pc_emit_read(struct si_context *sctx,
 {
 	struct si_pc_block *sigroup = (struct si_pc_block *)group->data;
 	struct si_pc_block_base *regs = sigroup->b;
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	unsigned idx;
 	unsigned reg = regs->counter0_lo;
 	unsigned reg_delta = 8;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 43afcf318dd..4928e6f5f3a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -136,69 +136,69 @@ static void si_destroy_context(struct pipe_context *context)
 		si_pm4_delete_state(sctx, vgt_shader_config, sctx->vgt_shader_config[i]);
 
 	if (sctx->fixed_func_tcs_shader.cso)
-		sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso);
+		sctx->b.delete_tcs_state(&sctx->b, sctx->fixed_func_tcs_shader.cso);
 	if (sctx->custom_dsa_flush)
-		sctx->b.b.delete_depth_stencil_alpha_state(&sctx->b.b, sctx->custom_dsa_flush);
+		sctx->b.delete_depth_stencil_alpha_state(&sctx->b, sctx->custom_dsa_flush);
 	if (sctx->custom_blend_resolve)
-		sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_resolve);
+		sctx->b.delete_blend_state(&sctx->b, sctx->custom_blend_resolve);
 	if (sctx->custom_blend_fmask_decompress)
-		sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_fmask_decompress);
+		sctx->b.delete_blend_state(&sctx->b, sctx->custom_blend_fmask_decompress);
 	if (sctx->custom_blend_eliminate_fastclear)
-		sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_eliminate_fastclear);
+		sctx->b.delete_blend_state(&sctx->b, sctx->custom_blend_eliminate_fastclear);
 	if (sctx->custom_blend_dcc_decompress)
-		sctx->b.b.delete_blend_state(&sctx->b.b, sctx->custom_blend_dcc_decompress);
+		sctx->b.delete_blend_state(&sctx->b, sctx->custom_blend_dcc_decompress);
 	if (sctx->vs_blit_pos)
-		sctx->b.b.delete_vs_state(&sctx->b.b, sctx->vs_blit_pos);
+		sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_pos);
 	if (sctx->vs_blit_pos_layered)
-		sctx->b.b.delete_vs_state(&sctx->b.b, sctx->vs_blit_pos_layered);
+		sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_pos_layered);
 	if (sctx->vs_blit_color)
-		sctx->b.b.delete_vs_state(&sctx->b.b, sctx->vs_blit_color);
+		sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_color);
 	if (sctx->vs_blit_color_layered)
-		sctx->b.b.delete_vs_state(&sctx->b.b, sctx->vs_blit_color_layered);
+		sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_color_layered);
 	if (sctx->vs_blit_texcoord)
-		sctx->b.b.delete_vs_state(&sctx->b.b, sctx->vs_blit_texcoord);
+		sctx->b.delete_vs_state(&sctx->b, sctx->vs_blit_texcoord);
 
 	if (sctx->blitter)
 		util_blitter_destroy(sctx->blitter);
 
 	/* Release DCC stats. */
-	for (int i = 0; i < ARRAY_SIZE(sctx->b.dcc_stats); i++) {
-		assert(!sctx->b.dcc_stats[i].query_active);
+	for (int i = 0; i < ARRAY_SIZE(sctx->dcc_stats); i++) {
+		assert(!sctx->dcc_stats[i].query_active);
 
-		for (int j = 0; j < ARRAY_SIZE(sctx->b.dcc_stats[i].ps_stats); j++)
-			if (sctx->b.dcc_stats[i].ps_stats[j])
-				sctx->b.b.destroy_query(&sctx->b.b,
-							sctx->b.dcc_stats[i].ps_stats[j]);
+		for (int j = 0; j < ARRAY_SIZE(sctx->dcc_stats[i].ps_stats); j++)
+			if (sctx->dcc_stats[i].ps_stats[j])
+				sctx->b.destroy_query(&sctx->b,
+							sctx->dcc_stats[i].ps_stats[j]);
 
-		r600_texture_reference(&sctx->b.dcc_stats[i].tex, NULL);
+		r600_texture_reference(&sctx->dcc_stats[i].tex, NULL);
 	}
 
-	if (sctx->b.query_result_shader)
-		sctx->b.b.delete_compute_state(&sctx->b.b, sctx->b.query_result_shader);
+	if (sctx->query_result_shader)
+		sctx->b.delete_compute_state(&sctx->b, sctx->query_result_shader);
 
-	if (sctx->b.gfx_cs)
-		sctx->b.ws->cs_destroy(sctx->b.gfx_cs);
-	if (sctx->b.dma_cs)
-		sctx->b.ws->cs_destroy(sctx->b.dma_cs);
-	if (sctx->b.ctx)
-		sctx->b.ws->ctx_destroy(sctx->b.ctx);
+	if (sctx->gfx_cs)
+		sctx->ws->cs_destroy(sctx->gfx_cs);
+	if (sctx->dma_cs)
+		sctx->ws->cs_destroy(sctx->dma_cs);
+	if (sctx->ctx)
+		sctx->ws->ctx_destroy(sctx->ctx);
 
-	if (sctx->b.b.stream_uploader)
-		u_upload_destroy(sctx->b.b.stream_uploader);
-	if (sctx->b.b.const_uploader)
-		u_upload_destroy(sctx->b.b.const_uploader);
-	if (sctx->b.cached_gtt_allocator)
-		u_upload_destroy(sctx->b.cached_gtt_allocator);
+	if (sctx->b.stream_uploader)
+		u_upload_destroy(sctx->b.stream_uploader);
+	if (sctx->b.const_uploader)
+		u_upload_destroy(sctx->b.const_uploader);
+	if (sctx->cached_gtt_allocator)
+		u_upload_destroy(sctx->cached_gtt_allocator);
 
-	slab_destroy_child(&sctx->b.pool_transfers);
-	slab_destroy_child(&sctx->b.pool_transfers_unsync);
+	slab_destroy_child(&sctx->pool_transfers);
+	slab_destroy_child(&sctx->pool_transfers_unsync);
 
-	if (sctx->b.allocator_zeroed_memory)
-		u_suballocator_destroy(sctx->b.allocator_zeroed_memory);
+	if (sctx->allocator_zeroed_memory)
+		u_suballocator_destroy(sctx->allocator_zeroed_memory);
 
-	sctx->b.ws->fence_reference(&sctx->b.last_gfx_fence, NULL);
-	sctx->b.ws->fence_reference(&sctx->b.last_sdma_fence, NULL);
-	r600_resource_reference(&sctx->b.eop_bug_scratch, NULL);
+	sctx->ws->fence_reference(&sctx->last_gfx_fence, NULL);
+	sctx->ws->fence_reference(&sctx->last_sdma_fence, NULL);
+	r600_resource_reference(&sctx->eop_bug_scratch, NULL);
 
 	LLVMDisposeTargetMachine(sctx->tm);
 
@@ -220,19 +220,19 @@ si_amdgpu_get_reset_status(struct pipe_context *ctx)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
-	return sctx->b.ws->ctx_query_reset_status(sctx->b.ctx);
+	return sctx->ws->ctx_query_reset_status(sctx->ctx);
 }
 
 static enum pipe_reset_status si_get_reset_status(struct pipe_context *ctx)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	unsigned latest = sctx->b.ws->query_value(sctx->b.ws,
+	unsigned latest = sctx->ws->query_value(sctx->ws,
 						  RADEON_GPU_RESET_COUNTER);
 
-	if (sctx->b.gpu_reset_counter == latest)
+	if (sctx->gpu_reset_counter == latest)
 		return PIPE_NO_RESET;
 
-	sctx->b.gpu_reset_counter = latest;
+	sctx->gpu_reset_counter = latest;
 	return PIPE_UNKNOWN_CONTEXT_RESET;
 }
 
@@ -242,27 +242,27 @@ static void si_set_device_reset_callback(struct pipe_context *ctx,
 	struct si_context *sctx = (struct si_context *)ctx;
 
 	if (cb)
-		sctx->b.device_reset_callback = *cb;
+		sctx->device_reset_callback = *cb;
 	else
-		memset(&sctx->b.device_reset_callback, 0,
-		       sizeof(sctx->b.device_reset_callback));
+		memset(&sctx->device_reset_callback, 0,
+		       sizeof(sctx->device_reset_callback));
 }
 
 bool si_check_device_reset(struct si_context *sctx)
 {
 	enum pipe_reset_status status;
 
-	if (!sctx->b.device_reset_callback.reset)
+	if (!sctx->device_reset_callback.reset)
 		return false;
 
-	if (!sctx->b.b.get_device_reset_status)
+	if (!sctx->b.get_device_reset_status)
 		return false;
 
-	status = sctx->b.b.get_device_reset_status(&sctx->b.b);
+	status = sctx->b.get_device_reset_status(&sctx->b);
 	if (status == PIPE_NO_RESET)
 		return false;
 
-	sctx->b.device_reset_callback.reset(sctx->b.device_reset_callback.data, status);
+	sctx->device_reset_callback.reset(sctx->device_reset_callback.data, status);
 	return true;
 }
 
@@ -281,8 +281,8 @@ static void si_emit_string_marker(struct pipe_context *ctx,
 
 	dd_parse_apitrace_marker(string, len, &sctx->apitrace_call_number);
 
-	if (sctx->b.log)
-		u_log_printf(sctx->b.log, "\nString marker: %*s\n", len, string);
+	if (sctx->log)
+		u_log_printf(sctx->log, "\nString marker: %*s\n", len, string);
 }
 
 static LLVMTargetMachineRef
@@ -316,7 +316,7 @@ static void si_set_log_context(struct pipe_context *ctx,
 			       struct u_log_context *log)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	sctx->b.log = log;
+	sctx->log = log;
 
 	if (log)
 		u_log_add_auto_logger(log, si_auto_log_cs, sctx);
@@ -336,81 +336,81 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	if (flags & PIPE_CONTEXT_DEBUG)
 		sscreen->record_llvm_ir = true; /* racy but not critical */
 
-	sctx->b.b.screen = screen; /* this must be set first */
-	sctx->b.b.priv = NULL;
-	sctx->b.b.destroy = si_destroy_context;
-	sctx->b.b.emit_string_marker = si_emit_string_marker;
-	sctx->b.b.set_debug_callback = si_set_debug_callback;
-	sctx->b.b.set_log_context = si_set_log_context;
+	sctx->b.screen = screen; /* this must be set first */
+	sctx->b.priv = NULL;
+	sctx->b.destroy = si_destroy_context;
+	sctx->b.emit_string_marker = si_emit_string_marker;
+	sctx->b.set_debug_callback = si_set_debug_callback;
+	sctx->b.set_log_context = si_set_log_context;
 	sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
 	sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
 
-	slab_create_child(&sctx->b.pool_transfers, &sscreen->pool_transfers);
-	slab_create_child(&sctx->b.pool_transfers_unsync, &sscreen->pool_transfers);
+	slab_create_child(&sctx->pool_transfers, &sscreen->pool_transfers);
+	slab_create_child(&sctx->pool_transfers_unsync, &sscreen->pool_transfers);
 
-	sctx->b.ws = sscreen->ws;
-	sctx->b.family = sscreen->info.family;
-	sctx->b.chip_class = sscreen->info.chip_class;
+	sctx->ws = sscreen->ws;
+	sctx->family = sscreen->info.family;
+	sctx->chip_class = sscreen->info.chip_class;
 
 	if (sscreen->info.drm_major == 2 && sscreen->info.drm_minor >= 43) {
-		sctx->b.b.get_device_reset_status = si_get_reset_status;
-		sctx->b.gpu_reset_counter =
-				sctx->b.ws->query_value(sctx->b.ws,
+		sctx->b.get_device_reset_status = si_get_reset_status;
+		sctx->gpu_reset_counter =
+				sctx->ws->query_value(sctx->ws,
 							RADEON_GPU_RESET_COUNTER);
 	}
 
-	sctx->b.b.set_device_reset_callback = si_set_device_reset_callback;
+	sctx->b.set_device_reset_callback = si_set_device_reset_callback;
 
 	si_init_context_texture_functions(sctx);
 	si_init_query_functions(sctx);
 
-	if (sctx->b.chip_class == CIK ||
-	    sctx->b.chip_class == VI ||
-	    sctx->b.chip_class == GFX9) {
-		sctx->b.eop_bug_scratch = (struct r600_resource*)
+	if (sctx->chip_class == CIK ||
+	    sctx->chip_class == VI ||
+	    sctx->chip_class == GFX9) {
+		sctx->eop_bug_scratch = (struct r600_resource*)
 					  pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT,
 							     16 * sscreen->info.num_render_backends);
-		if (!sctx->b.eop_bug_scratch)
+		if (!sctx->eop_bug_scratch)
 			goto fail;
 	}
 
-	sctx->b.allocator_zeroed_memory =
-			u_suballocator_create(&sctx->b.b, sscreen->info.gart_page_size,
+	sctx->allocator_zeroed_memory =
+			u_suballocator_create(&sctx->b, sscreen->info.gart_page_size,
 					      0, PIPE_USAGE_DEFAULT, 0, true);
-	if (!sctx->b.allocator_zeroed_memory)
+	if (!sctx->allocator_zeroed_memory)
 		goto fail;
 
-	sctx->b.b.stream_uploader = u_upload_create(&sctx->b.b, 1024 * 1024,
+	sctx->b.stream_uploader = u_upload_create(&sctx->b, 1024 * 1024,
 						    0, PIPE_USAGE_STREAM,
 						    SI_RESOURCE_FLAG_READ_ONLY);
-	if (!sctx->b.b.stream_uploader)
+	if (!sctx->b.stream_uploader)
 		goto fail;
 
-	sctx->b.b.const_uploader = u_upload_create(&sctx->b.b, 128 * 1024,
+	sctx->b.const_uploader = u_upload_create(&sctx->b, 128 * 1024,
 						   0, PIPE_USAGE_DEFAULT,
 						   SI_RESOURCE_FLAG_32BIT |
 						   (sscreen->cpdma_prefetch_writes_memory ?
 							    0 : SI_RESOURCE_FLAG_READ_ONLY));
-	if (!sctx->b.b.const_uploader)
+	if (!sctx->b.const_uploader)
 		goto fail;
 
-	sctx->b.cached_gtt_allocator = u_upload_create(&sctx->b.b, 16 * 1024,
+	sctx->cached_gtt_allocator = u_upload_create(&sctx->b, 16 * 1024,
 						       0, PIPE_USAGE_STAGING, 0);
-	if (!sctx->b.cached_gtt_allocator)
+	if (!sctx->cached_gtt_allocator)
 		goto fail;
 
-	sctx->b.ctx = sctx->b.ws->ctx_create(sctx->b.ws);
-	if (!sctx->b.ctx)
+	sctx->ctx = sctx->ws->ctx_create(sctx->ws);
+	if (!sctx->ctx)
 		goto fail;
 
 	if (sscreen->info.num_sdma_rings && !(sscreen->debug_flags & DBG(NO_ASYNC_DMA))) {
-		sctx->b.dma_cs = sctx->b.ws->cs_create(sctx->b.ctx, RING_DMA,
+		sctx->dma_cs = sctx->ws->cs_create(sctx->ctx, RING_DMA,
 						       (void*)si_flush_dma_cs,
 						       sctx);
 	}
 
 	if (sscreen->info.drm_major == 3)
-		sctx->b.b.get_device_reset_status = si_amdgpu_get_reset_status;
+		sctx->b.get_device_reset_status = si_amdgpu_get_reset_status;
 
 	si_init_buffer_functions(sctx);
 	si_init_clear_functions(sctx);
@@ -422,14 +422,14 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	si_init_streamout_functions(sctx);
 
 	if (sscreen->info.has_hw_decode) {
-		sctx->b.b.create_video_codec = si_uvd_create_decoder;
-		sctx->b.b.create_video_buffer = si_video_buffer_create;
+		sctx->b.create_video_codec = si_uvd_create_decoder;
+		sctx->b.create_video_buffer = si_video_buffer_create;
 	} else {
-		sctx->b.b.create_video_codec = vl_create_decoder;
-		sctx->b.b.create_video_buffer = vl_video_buffer_create;
+		sctx->b.create_video_codec = vl_create_decoder;
+		sctx->b.create_video_buffer = vl_video_buffer_create;
 	}
 
-	sctx->b.gfx_cs = ws->cs_create(sctx->b.ctx, RING_GFX,
+	sctx->gfx_cs = ws->cs_create(sctx->ctx, RING_GFX,
 				       (void*)si_flush_gfx_cs, sctx);
 
 	/* Border colors. */
@@ -458,15 +458,15 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	si_init_viewport_functions(sctx);
 	si_init_ia_multi_vgt_param_table(sctx);
 
-	if (sctx->b.chip_class >= CIK)
+	if (sctx->chip_class >= CIK)
 		cik_init_sdma_functions(sctx);
 	else
 		si_init_dma_functions(sctx);
 
 	if (sscreen->debug_flags & DBG(FORCE_DMA))
-		sctx->b.b.resource_copy_region = sctx->b.dma_copy;
+		sctx->b.resource_copy_region = sctx->dma_copy;
 
-	sctx->blitter = util_blitter_create(&sctx->b.b);
+	sctx->blitter = util_blitter_create(&sctx->b);
 	if (sctx->blitter == NULL)
 		goto fail;
 	sctx->blitter->draw_rectangle = si_draw_rectangle;
@@ -477,14 +477,14 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	/* these must be last */
 	si_begin_new_gfx_cs(sctx);
 
-	if (sctx->b.chip_class >= GFX9) {
+	if (sctx->chip_class >= GFX9) {
 		sctx->wait_mem_scratch = (struct r600_resource*)
 			pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4);
 		if (!sctx->wait_mem_scratch)
 			goto fail;
 
 		/* Initialize the memory. */
-		struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+		struct radeon_winsys_cs *cs = sctx->gfx_cs;
 		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
 		radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
 			    S_370_WR_CONFIRM(1) |
@@ -496,7 +496,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 
 	/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
 	 * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
-	if (sctx->b.chip_class == CIK) {
+	if (sctx->chip_class == CIK) {
 		sctx->null_const_buf.buffer =
 			si_aligned_buffer_create(screen,
 						 SI_RESOURCE_FLAG_32BIT,
@@ -508,7 +508,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
 			for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
-				sctx->b.b.set_constant_buffer(&sctx->b.b, shader, i,
+				sctx->b.set_constant_buffer(&sctx->b, shader, i,
 							      &sctx->null_const_buf);
 			}
 		}
@@ -564,10 +564,10 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	util_dynarray_init(&sctx->resident_img_needs_color_decompress, NULL);
 	util_dynarray_init(&sctx->resident_tex_needs_depth_decompress, NULL);
 
-	return &sctx->b.b;
+	return &sctx->b;
 fail:
 	fprintf(stderr, "radeonsi: Failed to create a context.\n");
-	si_destroy_context(&sctx->b.b);
+	si_destroy_context(&sctx->b);
 	return NULL;
 }
 
@@ -599,7 +599,7 @@ static struct pipe_context *si_pipe_create_context(struct pipe_screen *screen,
 	return threaded_context_create(ctx, &sscreen->pool_transfers,
 				       si_replace_buffer_storage,
 				       sscreen->info.drm_major >= 3 ? si_create_fence : NULL,
-				       &((struct si_context*)ctx)->b.tc);
+				       &((struct si_context*)ctx)->tc);
 }
 
 /*
@@ -744,7 +744,7 @@ static void si_test_vmfault(struct si_screen *sscreen)
 		puts("VM fault test: CP - done.");
 	}
 	if (sscreen->debug_flags & DBG(TEST_VMFAULT_SDMA)) {
-		sctx->b.dma_clear_buffer(sctx, buf, 0, 4, 0);
+		sctx->dma_clear_buffer(sctx, buf, 0, 4, 0);
 		ctx->flush(ctx, NULL, 0);
 		puts("VM fault test: SDMA - done.");
 	}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 29ee25abf56..a794633d14b 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -506,7 +506,26 @@ struct si_saved_cs {
 };
 
 struct si_context {
-	struct r600_common_context	b;
+	struct pipe_context		b; /* base class */
+
+	enum radeon_family		family;
+	enum chip_class			chip_class;
+
+	struct radeon_winsys		*ws;
+	struct radeon_winsys_ctx	*ctx;
+	struct radeon_winsys_cs		*gfx_cs;
+	struct radeon_winsys_cs		*dma_cs;
+	struct pipe_fence_handle	*last_gfx_fence;
+	struct pipe_fence_handle	*last_sdma_fence;
+	struct r600_resource		*eop_bug_scratch;
+	struct u_upload_mgr		*cached_gtt_allocator;
+	struct threaded_context		*tc;
+	struct u_suballocator		*allocator_zeroed_memory;
+	struct slab_child_pool		pool_transfers;
+	struct slab_child_pool		pool_transfers_unsync; /* for threaded_context */
+	struct pipe_device_reset_callback device_reset_callback;
+	struct u_log_context		*log;
+	void				*query_result_shader;
 	struct blitter_context		*blitter;
 	void				*custom_dsa_flush;
 	void				*custom_blend_resolve;
@@ -529,6 +548,17 @@ struct si_context {
 	bool				gfx_flush_in_progress:1;
 	bool				compute_is_busy:1;
 
+	unsigned			num_gfx_cs_flushes;
+	unsigned			initial_gfx_cs_size;
+	unsigned			gpu_reset_counter;
+	unsigned			last_dirty_tex_counter;
+	unsigned			last_compressed_colortex_counter;
+	unsigned			last_num_draw_calls;
+	unsigned			flags; /* flush flags */
+	/* Current unaccounted memory usage. */
+	uint64_t			vram;
+	uint64_t			gtt;
+
 	/* Atoms (direct states). */
 	union si_state_atoms		atoms;
 	unsigned			dirty_atoms; /* mask */
@@ -713,6 +743,72 @@ struct si_context {
 	float			sample_locations_4x[4][2];
 	float			sample_locations_8x[8][2];
 	float			sample_locations_16x[16][2];
+
+	/* Misc stats. */
+	unsigned			num_draw_calls;
+	unsigned			num_decompress_calls;
+	unsigned			num_mrt_draw_calls;
+	unsigned			num_prim_restart_calls;
+	unsigned			num_spill_draw_calls;
+	unsigned			num_compute_calls;
+	unsigned			num_spill_compute_calls;
+	unsigned			num_dma_calls;
+	unsigned			num_cp_dma_calls;
+	unsigned			num_vs_flushes;
+	unsigned			num_ps_flushes;
+	unsigned			num_cs_flushes;
+	unsigned			num_cb_cache_flushes;
+	unsigned			num_db_cache_flushes;
+	unsigned			num_L2_invalidates;
+	unsigned			num_L2_writebacks;
+	unsigned			num_resident_handles;
+	uint64_t			num_alloc_tex_transfer_bytes;
+	unsigned			last_tex_ps_draw_ratio; /* for query */
+
+	/* Queries. */
+	/* Maintain the list of active queries for pausing between IBs. */
+	int				num_occlusion_queries;
+	int				num_perfect_occlusion_queries;
+	struct list_head		active_queries;
+	unsigned			num_cs_dw_queries_suspend;
+
+	/* Render condition. */
+	struct r600_atom		render_cond_atom;
+	struct pipe_query		*render_cond;
+	unsigned			render_cond_mode;
+	bool				render_cond_invert;
+	bool				render_cond_force_off; /* for u_blitter */
+
+	/* Statistics gathering for the DCC enablement heuristic. It can't be
+	 * in r600_texture because r600_texture can be shared by multiple
+	 * contexts. This is for back buffers only. We shouldn't get too many
+	 * of those.
+	 *
+	 * X11 DRI3 rotates among a finite set of back buffers. They should
+	 * all fit in this array. If they don't, separate DCC might never be
+	 * enabled by DCC stat gathering.
+	 */
+	struct {
+		struct r600_texture		*tex;
+		/* Query queue: 0 = usually active, 1 = waiting, 2 = readback. */
+		struct pipe_query		*ps_stats[3];
+		/* If all slots are used and another slot is needed,
+		 * the least recently used slot is evicted based on this. */
+		int64_t				last_use_timestamp;
+		bool				query_active;
+	} dcc_stats[5];
+
+	/* Copy one resource to another using async DMA. */
+	void (*dma_copy)(struct pipe_context *ctx,
+			 struct pipe_resource *dst,
+			 unsigned dst_level,
+			 unsigned dst_x, unsigned dst_y, unsigned dst_z,
+			 struct pipe_resource *src,
+			 unsigned src_level,
+			 const struct pipe_box *src_box);
+
+	void (*dma_clear_buffer)(struct si_context *sctx, struct pipe_resource *dst,
+				 uint64_t offset, uint64_t size, unsigned value);
 };
 
 /* cik_sdma.c */
@@ -955,8 +1051,8 @@ si_context_add_resource_size(struct si_context *sctx, struct pipe_resource *r)
 
 	if (res) {
 		/* Add memory usage for need_gfx_cs_space */
-		sctx->b.vram += res->vram_usage;
-		sctx->b.gtt += res->gart_usage;
+		sctx->vram += res->vram_usage;
+		sctx->gtt += res->gart_usage;
 	}
 }
 
@@ -1067,21 +1163,21 @@ static inline void
 si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples,
 			   bool shaders_read_metadata)
 {
-	sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
-			 SI_CONTEXT_INV_VMEM_L1;
+	sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
+		       SI_CONTEXT_INV_VMEM_L1;
 
-	if (sctx->b.chip_class >= GFX9) {
+	if (sctx->chip_class >= GFX9) {
 		/* Single-sample color is coherent with shaders on GFX9, but
 		 * L2 metadata must be flushed if shaders read metadata.
 		 * (DCC, CMASK).
 		 */
 		if (num_samples >= 2)
-			sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+			sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2;
 		else if (shaders_read_metadata)
-			sctx->b.flags |= SI_CONTEXT_INV_L2_METADATA;
+			sctx->flags |= SI_CONTEXT_INV_L2_METADATA;
 	} else {
 		/* SI-CI-VI */
-		sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+		sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2;
 	}
 }
 
@@ -1089,21 +1185,21 @@ static inline void
 si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples,
 			   bool include_stencil, bool shaders_read_metadata)
 {
-	sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
-			 SI_CONTEXT_INV_VMEM_L1;
+	sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
+		       SI_CONTEXT_INV_VMEM_L1;
 
-	if (sctx->b.chip_class >= GFX9) {
+	if (sctx->chip_class >= GFX9) {
 		/* Single-sample depth (not stencil) is coherent with shaders
 		 * on GFX9, but L2 metadata must be flushed if shaders read
 		 * metadata.
 		 */
 		if (num_samples >= 2 || include_stencil)
-			sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+			sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2;
 		else if (shaders_read_metadata)
-			sctx->b.flags |= SI_CONTEXT_INV_L2_METADATA;
+			sctx->flags |= SI_CONTEXT_INV_L2_METADATA;
 	} else {
 		/* SI-CI-VI */
-		sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+		sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2;
 	}
 }
 
@@ -1192,7 +1288,7 @@ static inline void radeon_add_to_buffer_list(struct si_context *sctx,
 					     enum radeon_bo_priority priority)
 {
 	assert(usage);
-	sctx->b.ws->cs_add_buffer(
+	sctx->ws->cs_add_buffer(
 		cs, rbo->buf,
 		(enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED),
 		rbo->domains, priority);
@@ -1223,12 +1319,12 @@ radeon_add_to_gfx_buffer_list_check_mem(struct si_context *sctx,
 					bool check_mem)
 {
 	if (check_mem &&
-	    !radeon_cs_memory_below_limit(sctx->screen, sctx->b.gfx_cs,
-					  sctx->b.vram + rbo->vram_usage,
-					  sctx->b.gtt + rbo->gart_usage))
+	    !radeon_cs_memory_below_limit(sctx->screen, sctx->gfx_cs,
+					  sctx->vram + rbo->vram_usage,
+					  sctx->gtt + rbo->gart_usage))
 		si_flush_gfx_cs(sctx, PIPE_FLUSH_ASYNC, NULL);
 
-	radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs, rbo, usage, priority);
+	radeon_add_to_buffer_list(sctx, sctx->gfx_cs, rbo, usage, priority);
 }
 
 #define PRINT_ERR(fmt, args...) \
diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c
index a05cc25c05c..d68a38375f5 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -123,10 +123,10 @@ void si_pm4_free_state(struct si_context *sctx,
 
 void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 
 	for (int i = 0; i < state->nbo; ++i) {
-		radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs, state->bo[i],
+		radeon_add_to_buffer_list(sctx, sctx->gfx_cs, state->bo[i],
 				      state->bo_usage[i], state->bo_priority[i]);
 	}
 
@@ -135,7 +135,7 @@ void si_pm4_emit(struct si_context *sctx, struct si_pm4_state *state)
 	} else {
 		struct r600_resource *ib = state->indirect_buffer;
 
-		radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs, ib,
+		radeon_add_to_buffer_list(sctx, sctx->gfx_cs, ib,
 					  RADEON_USAGE_READ,
                                           RADEON_PRIO_IB2);
 
@@ -155,11 +155,11 @@ void si_pm4_reset_emitted(struct si_context *sctx)
 void si_pm4_upload_indirect_buffer(struct si_context *sctx,
 				   struct si_pm4_state *state)
 {
-	struct pipe_screen *screen = sctx->b.b.screen;
+	struct pipe_screen *screen = sctx->b.screen;
 	unsigned aligned_ndw = align(state->ndw, 8);
 
 	/* only supported on CIK and later */
-	if (sctx->b.chip_class < CIK)
+	if (sctx->chip_class < CIK)
 		return;
 
 	assert(state->ndw);
@@ -183,6 +183,6 @@ void si_pm4_upload_indirect_buffer(struct si_context *sctx,
 			state->pm4[i] = 0xffff1000; /* type3 nop packet */
 	}
 
-	pipe_buffer_write(&sctx->b.b, &state->indirect_buffer->b.b,
+	pipe_buffer_write(&sctx->b, &state->indirect_buffer->b.b,
 			  0, aligned_ndw *4, state->pm4);
 }
diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c
index 67c323aa959..3de47c056b2 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -103,64 +103,64 @@ static bool si_query_sw_begin(struct si_context *sctx,
 	case PIPE_QUERY_GPU_FINISHED:
 		break;
 	case SI_QUERY_DRAW_CALLS:
-		query->begin_result = sctx->b.num_draw_calls;
+		query->begin_result = sctx->num_draw_calls;
 		break;
 	case SI_QUERY_DECOMPRESS_CALLS:
-		query->begin_result = sctx->b.num_decompress_calls;
+		query->begin_result = sctx->num_decompress_calls;
 		break;
 	case SI_QUERY_MRT_DRAW_CALLS:
-		query->begin_result = sctx->b.num_mrt_draw_calls;
+		query->begin_result = sctx->num_mrt_draw_calls;
 		break;
 	case SI_QUERY_PRIM_RESTART_CALLS:
-		query->begin_result = sctx->b.num_prim_restart_calls;
+		query->begin_result = sctx->num_prim_restart_calls;
 		break;
 	case SI_QUERY_SPILL_DRAW_CALLS:
-		query->begin_result = sctx->b.num_spill_draw_calls;
+		query->begin_result = sctx->num_spill_draw_calls;
 		break;
 	case SI_QUERY_COMPUTE_CALLS:
-		query->begin_result = sctx->b.num_compute_calls;
+		query->begin_result = sctx->num_compute_calls;
 		break;
 	case SI_QUERY_SPILL_COMPUTE_CALLS:
-		query->begin_result = sctx->b.num_spill_compute_calls;
+		query->begin_result = sctx->num_spill_compute_calls;
 		break;
 	case SI_QUERY_DMA_CALLS:
-		query->begin_result = sctx->b.num_dma_calls;
+		query->begin_result = sctx->num_dma_calls;
 		break;
 	case SI_QUERY_CP_DMA_CALLS:
-		query->begin_result = sctx->b.num_cp_dma_calls;
+		query->begin_result = sctx->num_cp_dma_calls;
 		break;
 	case SI_QUERY_NUM_VS_FLUSHES:
-		query->begin_result = sctx->b.num_vs_flushes;
+		query->begin_result = sctx->num_vs_flushes;
 		break;
 	case SI_QUERY_NUM_PS_FLUSHES:
-		query->begin_result = sctx->b.num_ps_flushes;
+		query->begin_result = sctx->num_ps_flushes;
 		break;
 	case SI_QUERY_NUM_CS_FLUSHES:
-		query->begin_result = sctx->b.num_cs_flushes;
+		query->begin_result = sctx->num_cs_flushes;
 		break;
 	case SI_QUERY_NUM_CB_CACHE_FLUSHES:
-		query->begin_result = sctx->b.num_cb_cache_flushes;
+		query->begin_result = sctx->num_cb_cache_flushes;
 		break;
 	case SI_QUERY_NUM_DB_CACHE_FLUSHES:
-		query->begin_result = sctx->b.num_db_cache_flushes;
+		query->begin_result = sctx->num_db_cache_flushes;
 		break;
 	case SI_QUERY_NUM_L2_INVALIDATES:
-		query->begin_result = sctx->b.num_L2_invalidates;
+		query->begin_result = sctx->num_L2_invalidates;
 		break;
 	case SI_QUERY_NUM_L2_WRITEBACKS:
-		query->begin_result = sctx->b.num_L2_writebacks;
+		query->begin_result = sctx->num_L2_writebacks;
 		break;
 	case SI_QUERY_NUM_RESIDENT_HANDLES:
-		query->begin_result = sctx->b.num_resident_handles;
+		query->begin_result = sctx->num_resident_handles;
 		break;
 	case SI_QUERY_TC_OFFLOADED_SLOTS:
-		query->begin_result = sctx->b.tc ? sctx->b.tc->num_offloaded_slots : 0;
+		query->begin_result = sctx->tc ? sctx->tc->num_offloaded_slots : 0;
 		break;
 	case SI_QUERY_TC_DIRECT_SLOTS:
-		query->begin_result = sctx->b.tc ? sctx->b.tc->num_direct_slots : 0;
+		query->begin_result = sctx->tc ? sctx->tc->num_direct_slots : 0;
 		break;
 	case SI_QUERY_TC_NUM_SYNCS:
-		query->begin_result = sctx->b.tc ? sctx->b.tc->num_syncs : 0;
+		query->begin_result = sctx->tc ? sctx->tc->num_syncs : 0;
 		break;
 	case SI_QUERY_REQUESTED_VRAM:
 	case SI_QUERY_REQUESTED_GTT:
@@ -184,23 +184,23 @@ static bool si_query_sw_begin(struct si_context *sctx,
 	case SI_QUERY_NUM_EVICTIONS:
 	case SI_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: {
 		enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
-		query->begin_result = sctx->b.ws->query_value(sctx->b.ws, ws_id);
+		query->begin_result = sctx->ws->query_value(sctx->ws, ws_id);
 		break;
 	}
 	case SI_QUERY_GFX_BO_LIST_SIZE:
 		ws_id = winsys_id_from_type(query->b.type);
-		query->begin_result = sctx->b.ws->query_value(sctx->b.ws, ws_id);
-		query->begin_time = sctx->b.ws->query_value(sctx->b.ws,
+		query->begin_result = sctx->ws->query_value(sctx->ws, ws_id);
+		query->begin_time = sctx->ws->query_value(sctx->ws,
 							  RADEON_NUM_GFX_IBS);
 		break;
 	case SI_QUERY_CS_THREAD_BUSY:
 		ws_id = winsys_id_from_type(query->b.type);
-		query->begin_result = sctx->b.ws->query_value(sctx->b.ws, ws_id);
+		query->begin_result = sctx->ws->query_value(sctx->ws, ws_id);
 		query->begin_time = os_time_get_nano();
 		break;
 	case SI_QUERY_GALLIUM_THREAD_BUSY:
 		query->begin_result =
-			sctx->b.tc ? util_queue_get_thread_time_nano(&sctx->b.tc->queue, 0) : 0;
+			sctx->tc ? util_queue_get_thread_time_nano(&sctx->tc->queue, 0) : 0;
 		query->begin_time = os_time_get_nano();
 		break;
 	case SI_QUERY_GPU_LOAD:
@@ -260,67 +260,67 @@ static bool si_query_sw_end(struct si_context *sctx,
 	case PIPE_QUERY_TIMESTAMP_DISJOINT:
 		break;
 	case PIPE_QUERY_GPU_FINISHED:
-		sctx->b.b.flush(&sctx->b.b, &query->fence, PIPE_FLUSH_DEFERRED);
+		sctx->b.flush(&sctx->b, &query->fence, PIPE_FLUSH_DEFERRED);
 		break;
 	case SI_QUERY_DRAW_CALLS:
-		query->end_result = sctx->b.num_draw_calls;
+		query->end_result = sctx->num_draw_calls;
 		break;
 	case SI_QUERY_DECOMPRESS_CALLS:
-		query->end_result = sctx->b.num_decompress_calls;
+		query->end_result = sctx->num_decompress_calls;
 		break;
 	case SI_QUERY_MRT_DRAW_CALLS:
-		query->end_result = sctx->b.num_mrt_draw_calls;
+		query->end_result = sctx->num_mrt_draw_calls;
 		break;
 	case SI_QUERY_PRIM_RESTART_CALLS:
-		query->end_result = sctx->b.num_prim_restart_calls;
+		query->end_result = sctx->num_prim_restart_calls;
 		break;
 	case SI_QUERY_SPILL_DRAW_CALLS:
-		query->end_result = sctx->b.num_spill_draw_calls;
+		query->end_result = sctx->num_spill_draw_calls;
 		break;
 	case SI_QUERY_COMPUTE_CALLS:
-		query->end_result = sctx->b.num_compute_calls;
+		query->end_result = sctx->num_compute_calls;
 		break;
 	case SI_QUERY_SPILL_COMPUTE_CALLS:
-		query->end_result = sctx->b.num_spill_compute_calls;
+		query->end_result = sctx->num_spill_compute_calls;
 		break;
 	case SI_QUERY_DMA_CALLS:
-		query->end_result = sctx->b.num_dma_calls;
+		query->end_result = sctx->num_dma_calls;
 		break;
 	case SI_QUERY_CP_DMA_CALLS:
-		query->end_result = sctx->b.num_cp_dma_calls;
+		query->end_result = sctx->num_cp_dma_calls;
 		break;
 	case SI_QUERY_NUM_VS_FLUSHES:
-		query->end_result = sctx->b.num_vs_flushes;
+		query->end_result = sctx->num_vs_flushes;
 		break;
 	case SI_QUERY_NUM_PS_FLUSHES:
-		query->end_result = sctx->b.num_ps_flushes;
+		query->end_result = sctx->num_ps_flushes;
 		break;
 	case SI_QUERY_NUM_CS_FLUSHES:
-		query->end_result = sctx->b.num_cs_flushes;
+		query->end_result = sctx->num_cs_flushes;
 		break;
 	case SI_QUERY_NUM_CB_CACHE_FLUSHES:
-		query->end_result = sctx->b.num_cb_cache_flushes;
+		query->end_result = sctx->num_cb_cache_flushes;
 		break;
 	case SI_QUERY_NUM_DB_CACHE_FLUSHES:
-		query->end_result = sctx->b.num_db_cache_flushes;
+		query->end_result = sctx->num_db_cache_flushes;
 		break;
 	case SI_QUERY_NUM_L2_INVALIDATES:
-		query->end_result = sctx->b.num_L2_invalidates;
+		query->end_result = sctx->num_L2_invalidates;
 		break;
 	case SI_QUERY_NUM_L2_WRITEBACKS:
-		query->end_result = sctx->b.num_L2_writebacks;
+		query->end_result = sctx->num_L2_writebacks;
 		break;
 	case SI_QUERY_NUM_RESIDENT_HANDLES:
-		query->end_result = sctx->b.num_resident_handles;
+		query->end_result = sctx->num_resident_handles;
 		break;
 	case SI_QUERY_TC_OFFLOADED_SLOTS:
-		query->end_result = sctx->b.tc ? sctx->b.tc->num_offloaded_slots : 0;
+		query->end_result = sctx->tc ? sctx->tc->num_offloaded_slots : 0;
 		break;
 	case SI_QUERY_TC_DIRECT_SLOTS:
-		query->end_result = sctx->b.tc ? sctx->b.tc->num_direct_slots : 0;
+		query->end_result = sctx->tc ? sctx->tc->num_direct_slots : 0;
 		break;
 	case SI_QUERY_TC_NUM_SYNCS:
-		query->end_result = sctx->b.tc ? sctx->b.tc->num_syncs : 0;
+		query->end_result = sctx->tc ? sctx->tc->num_syncs : 0;
 		break;
 	case SI_QUERY_REQUESTED_VRAM:
 	case SI_QUERY_REQUESTED_GTT:
@@ -341,23 +341,23 @@ static bool si_query_sw_end(struct si_context *sctx,
 	case SI_QUERY_NUM_EVICTIONS:
 	case SI_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: {
 		enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
-		query->end_result = sctx->b.ws->query_value(sctx->b.ws, ws_id);
+		query->end_result = sctx->ws->query_value(sctx->ws, ws_id);
 		break;
 	}
 	case SI_QUERY_GFX_BO_LIST_SIZE:
 		ws_id = winsys_id_from_type(query->b.type);
-		query->end_result = sctx->b.ws->query_value(sctx->b.ws, ws_id);
-		query->end_time = sctx->b.ws->query_value(sctx->b.ws,
+		query->end_result = sctx->ws->query_value(sctx->ws, ws_id);
+		query->end_time = sctx->ws->query_value(sctx->ws,
 							RADEON_NUM_GFX_IBS);
 		break;
 	case SI_QUERY_CS_THREAD_BUSY:
 		ws_id = winsys_id_from_type(query->b.type);
-		query->end_result = sctx->b.ws->query_value(sctx->b.ws, ws_id);
+		query->end_result = sctx->ws->query_value(sctx->ws, ws_id);
 		query->end_time = os_time_get_nano();
 		break;
 	case SI_QUERY_GALLIUM_THREAD_BUSY:
 		query->end_result =
-			sctx->b.tc ? util_queue_get_thread_time_nano(&sctx->b.tc->queue, 0) : 0;
+			sctx->tc ? util_queue_get_thread_time_nano(&sctx->tc->queue, 0) : 0;
 		query->end_time = os_time_get_nano();
 		break;
 	case SI_QUERY_GPU_LOAD:
@@ -393,7 +393,7 @@ static bool si_query_sw_end(struct si_context *sctx,
 		query->end_result = p_atomic_read(&sctx->screen->num_shaders_created);
 		break;
 	case SI_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
-		query->end_result = sctx->b.last_tex_ps_draw_ratio;
+		query->end_result = sctx->last_tex_ps_draw_ratio;
 		break;
 	case SI_QUERY_NUM_SHADER_CACHE_HITS:
 		query->end_result =
@@ -427,8 +427,8 @@ static bool si_query_sw_get_result(struct si_context *sctx,
 		result->timestamp_disjoint.disjoint = false;
 		return true;
 	case PIPE_QUERY_GPU_FINISHED: {
-		struct pipe_screen *screen = sctx->b.b.screen;
-		struct pipe_context *ctx = rquery->b.flushed ? NULL : &sctx->b.b;
+		struct pipe_screen *screen = sctx->b.screen;
+		struct pipe_context *ctx = rquery->b.flushed ? NULL : &sctx->b;
 
 		result->b = screen->fence_finish(screen, ctx, query->fence,
 						 wait ? PIPE_TIMEOUT_INFINITE : 0);
@@ -697,21 +697,21 @@ static void si_update_occlusion_query_state(struct si_context *sctx,
 	if (type == PIPE_QUERY_OCCLUSION_COUNTER ||
 	    type == PIPE_QUERY_OCCLUSION_PREDICATE ||
 	    type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
-		bool old_enable = sctx->b.num_occlusion_queries != 0;
+		bool old_enable = sctx->num_occlusion_queries != 0;
 		bool old_perfect_enable =
-			sctx->b.num_perfect_occlusion_queries != 0;
+			sctx->num_perfect_occlusion_queries != 0;
 		bool enable, perfect_enable;
 
-		sctx->b.num_occlusion_queries += diff;
-		assert(sctx->b.num_occlusion_queries >= 0);
+		sctx->num_occlusion_queries += diff;
+		assert(sctx->num_occlusion_queries >= 0);
 
 		if (type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
-			sctx->b.num_perfect_occlusion_queries += diff;
-			assert(sctx->b.num_perfect_occlusion_queries >= 0);
+			sctx->num_perfect_occlusion_queries += diff;
+			assert(sctx->num_perfect_occlusion_queries >= 0);
 		}
 
-		enable = sctx->b.num_occlusion_queries != 0;
-		perfect_enable = sctx->b.num_perfect_occlusion_queries != 0;
+		enable = sctx->num_occlusion_queries != 0;
+		perfect_enable = sctx->num_perfect_occlusion_queries != 0;
 
 		if (enable != old_enable || perfect_enable != old_perfect_enable) {
 			si_set_occlusion_query_state(sctx, old_perfect_enable);
@@ -744,7 +744,7 @@ static void si_query_hw_do_emit_start(struct si_context *sctx,
 					struct r600_resource *buffer,
 					uint64_t va)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 
 	switch (query->b.type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -787,7 +787,7 @@ static void si_query_hw_do_emit_start(struct si_context *sctx,
 	default:
 		assert(0);
 	}
-	radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs, query->buffer.buf, RADEON_USAGE_WRITE,
+	radeon_add_to_buffer_list(sctx, sctx->gfx_cs, query->buffer.buf, RADEON_USAGE_WRITE,
 				  RADEON_PRIO_QUERY);
 }
 
@@ -820,7 +820,7 @@ static void si_query_hw_emit_start(struct si_context *sctx,
 
 	query->ops->emit_start(sctx, query, query->buffer.buf, va);
 
-	sctx->b.num_cs_dw_queries_suspend += query->num_cs_dw_end;
+	sctx->num_cs_dw_queries_suspend += query->num_cs_dw_end;
 }
 
 static void si_query_hw_do_emit_stop(struct si_context *sctx,
@@ -828,7 +828,7 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx,
 				       struct r600_resource *buffer,
 				       uint64_t va)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	uint64_t fence_va = 0;
 
 	switch (query->b.type) {
@@ -879,7 +879,7 @@ static void si_query_hw_do_emit_stop(struct si_context *sctx,
 	default:
 		assert(0);
 	}
-	radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs, query->buffer.buf, RADEON_USAGE_WRITE,
+	radeon_add_to_buffer_list(sctx, sctx->gfx_cs, query->buffer.buf, RADEON_USAGE_WRITE,
 				  RADEON_PRIO_QUERY);
 
 	if (fence_va)
@@ -909,7 +909,7 @@ static void si_query_hw_emit_stop(struct si_context *sctx,
 	query->buffer.results_end += query->result_size;
 
 	if (!(query->flags & SI_QUERY_HW_FLAG_NO_START))
-		sctx->b.num_cs_dw_queries_suspend -= query->num_cs_dw_end;
+		sctx->num_cs_dw_queries_suspend -= query->num_cs_dw_end;
 
 	si_update_occlusion_query_state(sctx, query->b.type, -1);
 	si_update_prims_generated_query_state(sctx, query->b.type, -1);
@@ -919,9 +919,9 @@ static void emit_set_predicate(struct si_context *ctx,
 			       struct r600_resource *buf, uint64_t va,
 			       uint32_t op)
 {
-	struct radeon_winsys_cs *cs = ctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = ctx->gfx_cs;
 
-	if (ctx->b.chip_class >= GFX9) {
+	if (ctx->chip_class >= GFX9) {
 		radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
 		radeon_emit(cs, op);
 		radeon_emit(cs, va);
@@ -931,14 +931,14 @@ static void emit_set_predicate(struct si_context *ctx,
 		radeon_emit(cs, va);
 		radeon_emit(cs, op | ((va >> 32) & 0xFF));
 	}
-	radeon_add_to_buffer_list(ctx, ctx->b.gfx_cs, buf, RADEON_USAGE_READ,
+	radeon_add_to_buffer_list(ctx, ctx->gfx_cs, buf, RADEON_USAGE_READ,
 				  RADEON_PRIO_QUERY);
 }
 
 static void si_emit_query_predication(struct si_context *ctx,
 				      struct r600_atom *atom)
 {
-	struct si_query_hw *query = (struct si_query_hw *)ctx->b.render_cond;
+	struct si_query_hw *query = (struct si_query_hw *)ctx->render_cond;
 	struct si_query_buffer *qbuf;
 	uint32_t op;
 	bool flag_wait, invert;
@@ -946,9 +946,9 @@ static void si_emit_query_predication(struct si_context *ctx,
 	if (!query)
 		return;
 
-	invert = ctx->b.render_cond_invert;
-	flag_wait = ctx->b.render_cond_mode == PIPE_RENDER_COND_WAIT ||
-		    ctx->b.render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
+	invert = ctx->render_cond_invert;
+	flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
+		    ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;
 
 	if (query->workaround_buf) {
 		op = PRED_OP(PREDICATION_OP_BOOL64);
@@ -1064,7 +1064,7 @@ void si_query_hw_reset_buffers(struct si_context *sctx,
 
 	/* Obtain a new buffer if the current one can't be mapped without a stall. */
 	if (si_rings_is_buffer_referenced(sctx, query->buffer.buf->buf, RADEON_USAGE_READWRITE) ||
-	    !sctx->b.ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
+	    !sctx->ws->buffer_wait(query->buffer.buf->buf, 0, RADEON_USAGE_READWRITE)) {
 		r600_resource_reference(&query->buffer.buf, NULL);
 		query->buffer.buf = si_new_query_buffer(sctx->screen, query);
 	} else {
@@ -1092,7 +1092,7 @@ bool si_query_hw_begin(struct si_context *sctx,
 	if (!query->buffer.buf)
 		return false;
 
-	LIST_ADDTAIL(&query->list, &sctx->b.active_queries);
+	LIST_ADDTAIL(&query->list, &sctx->active_queries);
 	return true;
 }
 
@@ -1368,7 +1368,7 @@ bool si_query_hw_get_result(struct si_context *sctx,
 		void *map;
 
 		if (rquery->b.flushed)
-			map = sctx->b.ws->buffer_map(qbuf->buf->buf, NULL, usage);
+			map = sctx->ws->buffer_map(qbuf->buf->buf, NULL, usage);
 		else
 			map = si_buffer_map_sync_with_rings(sctx, qbuf->buf, usage);
 
@@ -1603,18 +1603,18 @@ static void si_create_query_result_shader(struct si_context *sctx)
 	state.ir_type = PIPE_SHADER_IR_TGSI;
 	state.prog = tokens;
 
-	sctx->b.query_result_shader = sctx->b.b.create_compute_state(&sctx->b.b, &state);
+	sctx->query_result_shader = sctx->b.create_compute_state(&sctx->b, &state);
 }
 
 static void si_restore_qbo_state(struct si_context *sctx,
 				 struct si_qbo_state *st)
 {
-	sctx->b.b.bind_compute_state(&sctx->b.b, st->saved_compute);
+	sctx->b.bind_compute_state(&sctx->b, st->saved_compute);
 
-	sctx->b.b.set_constant_buffer(&sctx->b.b, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
+	sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
 	pipe_resource_reference(&st->saved_const0.buffer, NULL);
 
-	sctx->b.b.set_shader_buffers(&sctx->b.b, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
+	sctx->b.set_shader_buffers(&sctx->b, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
 	for (unsigned i = 0; i < 3; ++i)
 		pipe_resource_reference(&st->saved_ssbo[i].buffer, NULL);
 }
@@ -1647,14 +1647,14 @@ static void si_query_hw_get_result_resource(struct si_context *sctx,
 		uint32_t pair_count;
 	} consts;
 
-	if (!sctx->b.query_result_shader) {
+	if (!sctx->query_result_shader) {
 		si_create_query_result_shader(sctx);
-		if (!sctx->b.query_result_shader)
+		if (!sctx->query_result_shader)
 			return;
 	}
 
 	if (query->buffer.previous) {
-		u_suballocator_alloc(sctx->b.allocator_zeroed_memory, 16, 16,
+		u_suballocator_alloc(sctx->allocator_zeroed_memory, 16, 16,
 				     &tmp_buffer_offset, &tmp_buffer);
 		if (!tmp_buffer)
 			return;
@@ -1678,7 +1678,7 @@ static void si_query_hw_get_result_resource(struct si_context *sctx,
 
 	ssbo[2] = ssbo[1];
 
-	sctx->b.b.bind_compute_state(&sctx->b.b, sctx->b.query_result_shader);
+	sctx->b.bind_compute_state(&sctx->b, sctx->query_result_shader);
 
 	grid.block[0] = 1;
 	grid.block[1] = 1;
@@ -1712,7 +1712,7 @@ static void si_query_hw_get_result_resource(struct si_context *sctx,
 		break;
 	}
 
-	sctx->b.flags |= sctx->screen->barrier_flags.cp_to_L2;
+	sctx->flags |= sctx->screen->barrier_flags.cp_to_L2;
 
 	for (qbuf = &query->buffer; qbuf; qbuf = qbuf_prev) {
 		if (query->b.type != PIPE_QUERY_TIMESTAMP) {
@@ -1731,7 +1731,7 @@ static void si_query_hw_get_result_resource(struct si_context *sctx,
 			params.start_offset += qbuf->results_end - query->result_size;
 		}
 
-		sctx->b.b.set_constant_buffer(&sctx->b.b, PIPE_SHADER_COMPUTE, 0, &constant_buffer);
+		sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, &constant_buffer);
 
 		ssbo[0].buffer = &qbuf->buf->b.b;
 		ssbo[0].buffer_offset = params.start_offset;
@@ -1745,7 +1745,7 @@ static void si_query_hw_get_result_resource(struct si_context *sctx,
 			((struct r600_resource *)resource)->TC_L2_dirty = true;
 		}
 
-		sctx->b.b.set_shader_buffers(&sctx->b.b, PIPE_SHADER_COMPUTE, 0, 3, ssbo);
+		sctx->b.set_shader_buffers(&sctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo);
 
 		if (wait && qbuf == &query->buffer) {
 			uint64_t va;
@@ -1760,8 +1760,8 @@ static void si_query_hw_get_result_resource(struct si_context *sctx,
 			si_gfx_wait_fence(sctx, va, 0x80000000, 0x80000000);
 		}
 
-		sctx->b.b.launch_grid(&sctx->b.b, &grid);
-		sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
+		sctx->b.launch_grid(&sctx->b, &grid);
+		sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
 	}
 
 	si_restore_qbo_state(sctx, &saved_state);
@@ -1775,7 +1775,7 @@ static void si_render_condition(struct pipe_context *ctx,
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_query_hw *rquery = (struct si_query_hw *)query;
-	struct r600_atom *atom = &sctx->b.render_cond_atom;
+	struct r600_atom *atom = &sctx->render_cond_atom;
 
 	if (query) {
 		bool needs_workaround = false;
@@ -1784,8 +1784,8 @@ static void si_render_condition(struct pipe_context *ctx,
 		 * SET_PREDICATION packets to give the wrong answer for
 		 * non-inverted stream overflow predication.
 		 */
-		if (((sctx->b.chip_class == VI && sctx->screen->info.pfp_fw_feature < 49) ||
-		     (sctx->b.chip_class == GFX9 && sctx->screen->info.pfp_fw_feature < 38)) &&
+		if (((sctx->chip_class == VI && sctx->screen->info.pfp_fw_feature < 49) ||
+		     (sctx->chip_class == GFX9 && sctx->screen->info.pfp_fw_feature < 38)) &&
 		    !condition &&
 		    (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
 		     (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE &&
@@ -1795,18 +1795,18 @@ static void si_render_condition(struct pipe_context *ctx,
 		}
 
 		if (needs_workaround && !rquery->workaround_buf) {
-			bool old_force_off = sctx->b.render_cond_force_off;
-			sctx->b.render_cond_force_off = true;
+			bool old_force_off = sctx->render_cond_force_off;
+			sctx->render_cond_force_off = true;
 
 			u_suballocator_alloc(
-				sctx->b.allocator_zeroed_memory, 8, 8,
+				sctx->allocator_zeroed_memory, 8, 8,
 				&rquery->workaround_offset,
 				(struct pipe_resource **)&rquery->workaround_buf);
 
 			/* Reset to NULL to avoid a redundant SET_PREDICATION
 			 * from launching the compute grid.
 			 */
-			sctx->b.render_cond = NULL;
+			sctx->render_cond = NULL;
 
 			ctx->get_query_result_resource(
 				ctx, query, true, PIPE_QUERY_TYPE_U64, 0,
@@ -1814,16 +1814,16 @@ static void si_render_condition(struct pipe_context *ctx,
 
 			/* Settings this in the render cond atom is too late,
 			 * so set it here. */
-			sctx->b.flags |= sctx->screen->barrier_flags.L2_to_cp |
+			sctx->flags |= sctx->screen->barrier_flags.L2_to_cp |
 				       SI_CONTEXT_FLUSH_FOR_RENDER_COND;
 
-			sctx->b.render_cond_force_off = old_force_off;
+			sctx->render_cond_force_off = old_force_off;
 		}
 	}
 
-	sctx->b.render_cond = query;
-	sctx->b.render_cond_invert = condition;
-	sctx->b.render_cond_mode = mode;
+	sctx->render_cond = query;
+	sctx->render_cond_invert = condition;
+	sctx->render_cond_mode = mode;
 
 	si_set_atom_dirty(sctx, atom, query != NULL);
 }
@@ -1832,22 +1832,22 @@ void si_suspend_queries(struct si_context *sctx)
 {
 	struct si_query_hw *query;
 
-	LIST_FOR_EACH_ENTRY(query, &sctx->b.active_queries, list) {
+	LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, list) {
 		si_query_hw_emit_stop(sctx, query);
 	}
-	assert(sctx->b.num_cs_dw_queries_suspend == 0);
+	assert(sctx->num_cs_dw_queries_suspend == 0);
 }
 
 void si_resume_queries(struct si_context *sctx)
 {
 	struct si_query_hw *query;
 
-	assert(sctx->b.num_cs_dw_queries_suspend == 0);
+	assert(sctx->num_cs_dw_queries_suspend == 0);
 
 	/* Check CS space here. Resuming must not be interrupted by flushes. */
 	si_need_gfx_cs_space(sctx);
 
-	LIST_FOR_EACH_ENTRY(query, &sctx->b.active_queries, list) {
+	LIST_FOR_EACH_ENTRY(query, &sctx->active_queries, list) {
 		si_query_hw_emit_start(sctx, query);
 	}
 }
@@ -2045,19 +2045,19 @@ static int si_get_driver_query_group_info(struct pipe_screen *screen,
 
 void si_init_query_functions(struct si_context *sctx)
 {
-	sctx->b.b.create_query = si_create_query;
-	sctx->b.b.create_batch_query = si_create_batch_query;
-	sctx->b.b.destroy_query = si_destroy_query;
-	sctx->b.b.begin_query = si_begin_query;
-	sctx->b.b.end_query = si_end_query;
-	sctx->b.b.get_query_result = si_get_query_result;
-	sctx->b.b.get_query_result_resource = si_get_query_result_resource;
-	sctx->b.render_cond_atom.emit = si_emit_query_predication;
-
-	if (((struct si_screen*)sctx->b.b.screen)->info.num_render_backends > 0)
-	    sctx->b.b.render_condition = si_render_condition;
-
-	LIST_INITHEAD(&sctx->b.active_queries);
+	sctx->b.create_query = si_create_query;
+	sctx->b.create_batch_query = si_create_batch_query;
+	sctx->b.destroy_query = si_destroy_query;
+	sctx->b.begin_query = si_begin_query;
+	sctx->b.end_query = si_end_query;
+	sctx->b.get_query_result = si_get_query_result;
+	sctx->b.get_query_result_resource = si_get_query_result_resource;
+	sctx->render_cond_atom.emit = si_emit_query_predication;
+
+	if (((struct si_screen*)sctx->b.screen)->info.num_render_backends > 0)
+	    sctx->b.render_condition = si_render_condition;
+
+	LIST_INITHEAD(&sctx->active_queries);
 }
 
 void si_init_screen_query_functions(struct si_screen *sscreen)
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 60eec54eb3c..a33414e0e8d 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -85,7 +85,7 @@ static unsigned si_pack_float_12p4(float x)
  */
 static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	struct si_state_blend *blend = sctx->queued.named.blend;
 	/* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers,
 	 * but you never know. */
@@ -119,13 +119,13 @@ static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *a
 		radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
 	}
 
-	if (sctx->b.chip_class >= VI) {
+	if (sctx->chip_class >= VI) {
 		/* DCC MSAA workaround for blending.
 		 * Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_-
 		 * COMBINER_DISABLE, but that would be more complicated.
 		 */
-		bool oc_disable = (sctx->b.chip_class == VI ||
-				   sctx->b.chip_class == GFX9) &&
+		bool oc_disable = (sctx->chip_class == VI ||
+				   sctx->chip_class == GFX9) &&
 				  blend &&
 				  blend->blend_enable_4bit & cb_target_mask &&
 				  sctx->framebuffer.nr_samples >= 2;
@@ -723,7 +723,7 @@ static void si_set_blend_color(struct pipe_context *ctx,
 
 static void si_emit_blend_color(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 
 	radeon_set_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4);
 	radeon_emit_array(cs, (uint32_t*)sctx->blend_color.state.color, 4);
@@ -757,7 +757,7 @@ static void si_set_clip_state(struct pipe_context *ctx,
 
 static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 
 	radeon_set_context_reg_seq(cs, R_0285BC_PA_CL_UCP_0_X, 6*4);
 	radeon_emit_array(cs, (uint32_t*)sctx->clip_state.state.ucp, 6*4);
@@ -765,7 +765,7 @@ static void si_emit_clip_state(struct si_context *sctx, struct r600_atom *atom)
 
 static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	struct si_shader *vs = si_get_vs_state(sctx);
 	struct si_shader_selector *vs_sel = vs->selector;
 	struct tgsi_shader_info *info = &vs_sel->info;
@@ -1079,7 +1079,7 @@ static void si_delete_rs_state(struct pipe_context *ctx, void *state)
  */
 static void si_emit_stencil_ref(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	struct pipe_stencil_ref *ref = &sctx->stencil_ref.state;
 	struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part;
 
@@ -1321,7 +1321,7 @@ static void *si_create_db_flush_dsa(struct si_context *sctx)
 {
 	struct pipe_depth_stencil_alpha_state dsa = {};
 
-	return sctx->b.b.create_depth_stencil_alpha_state(&sctx->b.b, &dsa);
+	return sctx->b.create_depth_stencil_alpha_state(&sctx->b, &dsa);
 }
 
 /* DB RENDER STATE */
@@ -1332,11 +1332,11 @@ static void si_set_active_query_state(struct pipe_context *ctx, boolean enable)
 
 	/* Pipeline stat & streamout queries. */
 	if (enable) {
-		sctx->b.flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
-		sctx->b.flags |= SI_CONTEXT_START_PIPELINE_STATS;
+		sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS;
+		sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS;
 	} else {
-		sctx->b.flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
-		sctx->b.flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
+		sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS;
+		sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS;
 	}
 
 	/* Occlusion queries. */
@@ -1351,7 +1351,7 @@ void si_set_occlusion_query_state(struct si_context *sctx,
 {
 	si_mark_atom_dirty(sctx, &sctx->db_render_state);
 
-	bool perfect_enable = sctx->b.num_perfect_occlusion_queries != 0;
+	bool perfect_enable = sctx->num_perfect_occlusion_queries != 0;
 
 	if (perfect_enable != old_perfect_enable)
 		si_mark_atom_dirty(sctx, &sctx->msaa_config);
@@ -1367,7 +1367,7 @@ void si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st)
 
 static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *state)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 	unsigned db_shader_control;
 
@@ -1392,11 +1392,11 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
 	}
 
 	/* DB_COUNT_CONTROL (occlusion queries) */
-	if (sctx->b.num_occlusion_queries > 0 &&
+	if (sctx->num_occlusion_queries > 0 &&
 	    !sctx->occlusion_queries_disabled) {
-		bool perfect = sctx->b.num_perfect_occlusion_queries > 0;
+		bool perfect = sctx->num_perfect_occlusion_queries > 0;
 
-		if (sctx->b.chip_class >= CIK) {
+		if (sctx->chip_class >= CIK) {
 			radeon_emit(cs,
 				    S_028004_PERFECT_ZPASS_COUNTS(perfect) |
 				    S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
@@ -1410,7 +1410,7 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
 		}
 	} else {
 		/* Disable occlusion queries. */
-		if (sctx->b.chip_class >= CIK) {
+		if (sctx->chip_class >= CIK) {
 			radeon_emit(cs, 0);
 		} else {
 			radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1));
@@ -1426,7 +1426,7 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
 	db_shader_control = sctx->ps_db_shader_control;
 
 	/* Bug workaround for smoothing (overrasterization) on SI. */
-	if (sctx->b.chip_class == SI && sctx->smoothing_enabled) {
+	if (sctx->chip_class == SI && sctx->smoothing_enabled) {
 		db_shader_control &= C_02880C_Z_ORDER;
 		db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);
 	}
@@ -2437,14 +2437,14 @@ static void si_initialize_color_surface(struct si_context *sctx,
 			color_info |= S_028C70_COMPRESSION(1);
 			unsigned fmask_bankh = util_logbase2(rtex->fmask.bank_height);
 
-			if (sctx->b.chip_class == SI) {
+			if (sctx->chip_class == SI) {
 				/* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */
 				color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
 			}
 		}
 	}
 
-	if (sctx->b.chip_class >= VI) {
+	if (sctx->chip_class >= VI) {
 		unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
 		unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
 
@@ -2468,7 +2468,7 @@ static void si_initialize_color_surface(struct si_context *sctx,
 	}
 
 	/* This must be set for fast clear to work without FMASK. */
-	if (!rtex->fmask.size && sctx->b.chip_class == SI) {
+	if (!rtex->fmask.size && sctx->chip_class == SI) {
 		unsigned bankh = util_logbase2(rtex->surface.u.legacy.bankh);
 		color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
 	}
@@ -2476,7 +2476,7 @@ static void si_initialize_color_surface(struct si_context *sctx,
 	unsigned color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) |
 			      S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer);
 
-	if (sctx->b.chip_class >= GFX9) {
+	if (sctx->chip_class >= GFX9) {
 		unsigned mip0_depth = util_max_layer(&rtex->resource.b.b, 0);
 
 		color_view |= S_028C6C_MIP_LEVEL(surf->base.u.tex.level);
@@ -2518,7 +2518,7 @@ static void si_init_depth_surface(struct si_context *sctx,
 	surf->db_htile_data_base = 0;
 	surf->db_htile_surface = 0;
 
-	if (sctx->b.chip_class >= GFX9) {
+	if (sctx->chip_class >= GFX9) {
 		assert(rtex->surface.u.gfx9.surf_offset == 0);
 		surf->db_depth_base = rtex->resource.gpu_address >> 8;
 		surf->db_stencil_base = (rtex->resource.gpu_address +
@@ -2583,7 +2583,7 @@ static void si_init_depth_surface(struct si_context *sctx,
 		s_info = S_028044_FORMAT(stencil_format);
 		surf->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile);
 
-		if (sctx->b.chip_class >= CIK) {
+		if (sctx->chip_class >= CIK) {
 			struct radeon_info *info = &sctx->screen->info;
 			unsigned index = rtex->surface.u.legacy.tiling_index[level];
 			unsigned stencil_index = rtex->surface.u.legacy.stencil_tiling_index[level];
@@ -2782,7 +2782,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 		si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples,
 					   sctx->framebuffer.CB_has_shader_readable_metadata);
 
-	sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
+	sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
 
 	/* u_blitter doesn't invoke depth decompression when it does multiple
 	 * blits in a row, but the only case when it matters for DB is when
@@ -2793,14 +2793,14 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 	if (sctx->generate_mipmap_for_depth) {
 		si_make_DB_shader_coherent(sctx, 1, false,
 					   sctx->framebuffer.DB_has_shader_readable_metadata);
-	} else if (sctx->b.chip_class == GFX9) {
+	} else if (sctx->chip_class == GFX9) {
 		/* It appears that DB metadata "leaks" in a sequence of:
 		 *  - depth clear
 		 *  - DCC decompress for shader image writes (with DB disabled)
 		 *  - render with DEPTH_BEFORE_SHADER=1
 		 * Flushing DB metadata works around the problem.
 		 */
-		sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META;
+		sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META;
 	}
 
 	/* Take the maximum of the old and new count. If the new count is lower,
@@ -2954,7 +2954,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 
 static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
 	unsigned i, nr_cbufs = state->nr_cbufs;
 	struct r600_texture *tex = NULL;
@@ -2977,20 +2977,20 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 		}
 
 		tex = (struct r600_texture *)cb->base.texture;
-		radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 				      &tex->resource, RADEON_USAGE_READWRITE,
 				      tex->resource.b.b.nr_samples > 1 ?
 					      RADEON_PRIO_COLOR_BUFFER_MSAA :
 					      RADEON_PRIO_COLOR_BUFFER);
 
 		if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
-			radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+			radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 				tex->cmask_buffer, RADEON_USAGE_READWRITE,
 				RADEON_PRIO_CMASK);
 		}
 
 		if (tex->dcc_separate_buffer)
-			radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+			radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 						  tex->dcc_separate_buffer,
 						  RADEON_USAGE_READWRITE,
 						  RADEON_PRIO_DCC);
@@ -3022,7 +3022,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 			cb_dcc_base |= tex->surface.tile_swizzle;
 		}
 
-		if (sctx->b.chip_class >= GFX9) {
+		if (sctx->chip_class >= GFX9) {
 			struct gfx9_surf_meta_flags meta;
 
 			if (tex->dcc_offset)
@@ -3086,20 +3086,20 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 			cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
 
 			if (tex->fmask.size) {
-				if (sctx->b.chip_class >= CIK)
+				if (sctx->chip_class >= CIK)
 					cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->fmask.pitch_in_pixels / 8 - 1);
 				cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->fmask.tile_mode_index);
 				cb_color_fmask_slice = S_028C88_TILE_MAX(tex->fmask.slice_tile_max);
 			} else {
 				/* This must be set for fast clear to work without FMASK. */
-				if (sctx->b.chip_class >= CIK)
+				if (sctx->chip_class >= CIK)
 					cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
 				cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
 				cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
 			}
 
 			radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
-						   sctx->b.chip_class >= VI ? 14 : 13);
+						   sctx->chip_class >= VI ? 14 : 13);
 			radeon_emit(cs, cb_color_base);		/* CB_COLOR0_BASE */
 			radeon_emit(cs, cb_color_pitch);	/* CB_COLOR0_PITCH */
 			radeon_emit(cs, cb_color_slice);	/* CB_COLOR0_SLICE */
@@ -3114,7 +3114,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 			radeon_emit(cs, tex->color_clear_value[0]);	/* CB_COLOR0_CLEAR_WORD0 */
 			radeon_emit(cs, tex->color_clear_value[1]);	/* CB_COLOR0_CLEAR_WORD1 */
 
-			if (sctx->b.chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */
+			if (sctx->chip_class >= VI) /* R_028C94_CB_COLOR0_DCC_BASE */
 				radeon_emit(cs, cb_dcc_base);
 		}
 	}
@@ -3127,13 +3127,13 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 		struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
 		struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
 
-		radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 				      &rtex->resource, RADEON_USAGE_READWRITE,
 				      zb->base.texture->nr_samples > 1 ?
 					      RADEON_PRIO_DEPTH_BUFFER_MSAA :
 					      RADEON_PRIO_DEPTH_BUFFER);
 
-		if (sctx->b.chip_class >= GFX9) {
+		if (sctx->chip_class >= GFX9) {
 			radeon_set_context_reg_seq(cs, R_028014_DB_HTILE_DATA_BASE, 3);
 			radeon_emit(cs, zb->db_htile_data_base);	/* DB_HTILE_DATA_BASE */
 			radeon_emit(cs, S_028018_BASE_HI(zb->db_htile_data_base >> 32)); /* DB_HTILE_DATA_BASE_HI */
@@ -3178,7 +3178,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 		radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
 		radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
 	} else if (sctx->framebuffer.dirty_zsbuf) {
-		if (sctx->b.chip_class >= GFX9)
+		if (sctx->chip_class >= GFX9)
 			radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 2);
 		else
 			radeon_set_context_reg_seq(cs, R_028040_DB_Z_INFO, 2);
@@ -3204,7 +3204,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 static void si_emit_msaa_sample_locs(struct si_context *sctx,
 				     struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	unsigned nr_samples = sctx->framebuffer.nr_samples;
 	bool has_msaa_sample_loc_bug = sctx->screen->has_msaa_sample_loc_bug;
 
@@ -3225,12 +3225,12 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx,
 		si_emit_sample_locations(cs, nr_samples);
 	}
 
-	if (sctx->b.family >= CHIP_POLARIS10) {
+	if (sctx->family >= CHIP_POLARIS10) {
 		struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 		unsigned small_prim_filter_cntl =
 			S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
 			/* line bug */
-			S_028830_LINE_FILTER_DISABLE(sctx->b.family <= CHIP_POLARIS12);
+			S_028830_LINE_FILTER_DISABLE(sctx->family <= CHIP_POLARIS12);
 
 		/* The alternative of setting sample locations to 0 would
 		 * require a DB flush to avoid Z errors, see
@@ -3286,7 +3286,7 @@ static bool si_out_of_order_rasterization(struct si_context *sctx)
 		    !dsa_order_invariant.pass_set)
 			return false;
 
-		if (sctx->b.num_perfect_occlusion_queries != 0 &&
+		if (sctx->num_perfect_occlusion_queries != 0 &&
 		    !dsa_order_invariant.pass_set)
 			return false;
 	}
@@ -3315,7 +3315,7 @@ static bool si_out_of_order_rasterization(struct si_context *sctx)
 
 static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	unsigned num_tile_pipes = sctx->screen->info.num_tile_pipes;
 	/* 33% faster rendering to linear color buffers */
 	bool dst_is_linear = sctx->framebuffer.any_dst_linear;
@@ -3883,7 +3883,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
 	height = height0;
 	depth = texture->depth0;
 
-	if (sctx->b.chip_class <= VI && force_level) {
+	if (sctx->chip_class <= VI && force_level) {
 		assert(force_level == first_level &&
 		       force_level == last_level);
 		base_level = force_level;
@@ -4120,7 +4120,7 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
 			  S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
 			  S_008F30_ANISO_BIAS(max_aniso_ratio) |
 			  S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
-			  S_008F30_COMPAT_MODE(sctx->b.chip_class >= VI));
+			  S_008F30_COMPAT_MODE(sctx->chip_class >= VI));
 	rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
 			  S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) |
 			  S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
@@ -4129,9 +4129,9 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
 			  S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter, max_aniso)) |
 			  S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) |
 			  S_008F38_MIP_POINT_PRECLAMP(0) |
-			  S_008F38_DISABLE_LSB_CEIL(sctx->b.chip_class <= VI) |
+			  S_008F38_DISABLE_LSB_CEIL(sctx->chip_class <= VI) |
 			  S_008F38_FILTER_PREC_FIX(1) |
-			  S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI));
+			  S_008F38_ANISO_OVERRIDE(sctx->chip_class >= VI));
 	rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color, false);
 
 	/* Create sampler resource for integer textures. */
@@ -4170,7 +4170,7 @@ static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
 
 static void si_emit_sample_mask(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	unsigned mask = sctx->sample_mask.sample_mask;
 
 	/* Needed for line and polygon smoothing as well as for the Polaris
@@ -4463,11 +4463,11 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
 
 	/* Subsequent commands must wait for all shader invocations to
 	 * complete. */
-	sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+	sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
 	                 SI_CONTEXT_CS_PARTIAL_FLUSH;
 
 	if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
-		sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
+		sctx->flags |= SI_CONTEXT_INV_SMEM_L1 |
 				 SI_CONTEXT_INV_VMEM_L1;
 
 	if (flags & (PIPE_BARRIER_VERTEX_BUFFER |
@@ -4479,7 +4479,7 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
 		/* As far as I can tell, L1 contents are written back to L2
 		 * automatically at end of shader, but the contents of other
 		 * L1 caches might still be stale. */
-		sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1;
+		sctx->flags |= SI_CONTEXT_INV_VMEM_L1;
 	}
 
 	if (flags & PIPE_BARRIER_INDEX_BUFFER) {
@@ -4487,7 +4487,7 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
 		 * L1 isn't used.
 		 */
 		if (sctx->screen->info.chip_class <= CIK)
-			sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+			sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
 	}
 
 	/* MSAA color, any depth and any stencil are flushed in
@@ -4495,16 +4495,16 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
 	 */
 	if (flags & PIPE_BARRIER_FRAMEBUFFER &&
 	    sctx->framebuffer.uncompressed_cb_mask) {
-		sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
+		sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
 
-		if (sctx->b.chip_class <= VI)
-			sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+		if (sctx->chip_class <= VI)
+			sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
 	}
 
 	/* Indirect buffers use TC L2 on GFX9, but not older hw. */
 	if (sctx->screen->info.chip_class <= VI &&
 	    flags & PIPE_BARRIER_INDIRECT_BUFFER)
-		sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+		sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
 }
 
 static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
@@ -4514,14 +4514,14 @@ static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
 	memset(&blend, 0, sizeof(blend));
 	blend.independent_blend_enable = true;
 	blend.rt[0].colormask = 0xf;
-	return si_create_blend_state_mode(&sctx->b.b, &blend, mode);
+	return si_create_blend_state_mode(&sctx->b, &blend, mode);
 }
 
 static void si_init_config(struct si_context *sctx);
 
 void si_init_state_functions(struct si_context *sctx)
 {
-	si_init_external_atom(sctx, &sctx->b.render_cond_atom, &sctx->atoms.s.render_cond);
+	si_init_external_atom(sctx, &sctx->render_cond_atom, &sctx->atoms.s.render_cond);
 	si_init_external_atom(sctx, &sctx->streamout.begin_atom, &sctx->atoms.s.streamout_begin);
 	si_init_external_atom(sctx, &sctx->streamout.enable_atom, &sctx->atoms.s.streamout_enable);
 	si_init_external_atom(sctx, &sctx->scissors.atom, &sctx->atoms.s.scissors);
@@ -4539,18 +4539,18 @@ void si_init_state_functions(struct si_context *sctx)
 	si_init_atom(sctx, &sctx->clip_state.atom, &sctx->atoms.s.clip_state, si_emit_clip_state);
 	si_init_atom(sctx, &sctx->stencil_ref.atom, &sctx->atoms.s.stencil_ref, si_emit_stencil_ref);
 
-	sctx->b.b.create_blend_state = si_create_blend_state;
-	sctx->b.b.bind_blend_state = si_bind_blend_state;
-	sctx->b.b.delete_blend_state = si_delete_blend_state;
-	sctx->b.b.set_blend_color = si_set_blend_color;
+	sctx->b.create_blend_state = si_create_blend_state;
+	sctx->b.bind_blend_state = si_bind_blend_state;
+	sctx->b.delete_blend_state = si_delete_blend_state;
+	sctx->b.set_blend_color = si_set_blend_color;
 
-	sctx->b.b.create_rasterizer_state = si_create_rs_state;
-	sctx->b.b.bind_rasterizer_state = si_bind_rs_state;
-	sctx->b.b.delete_rasterizer_state = si_delete_rs_state;
+	sctx->b.create_rasterizer_state = si_create_rs_state;
+	sctx->b.bind_rasterizer_state = si_bind_rs_state;
+	sctx->b.delete_rasterizer_state = si_delete_rs_state;
 
-	sctx->b.b.create_depth_stencil_alpha_state = si_create_dsa_state;
-	sctx->b.b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
-	sctx->b.b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
+	sctx->b.create_depth_stencil_alpha_state = si_create_dsa_state;
+	sctx->b.bind_depth_stencil_alpha_state = si_bind_dsa_state;
+	sctx->b.delete_depth_stencil_alpha_state = si_delete_dsa_state;
 
 	sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx);
 	sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE);
@@ -4558,32 +4558,32 @@ void si_init_state_functions(struct si_context *sctx)
 	sctx->custom_blend_eliminate_fastclear = si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR);
 	sctx->custom_blend_dcc_decompress = si_create_blend_custom(sctx, V_028808_CB_DCC_DECOMPRESS);
 
-	sctx->b.b.set_clip_state = si_set_clip_state;
-	sctx->b.b.set_stencil_ref = si_set_stencil_ref;
+	sctx->b.set_clip_state = si_set_clip_state;
+	sctx->b.set_stencil_ref = si_set_stencil_ref;
 
-	sctx->b.b.set_framebuffer_state = si_set_framebuffer_state;
+	sctx->b.set_framebuffer_state = si_set_framebuffer_state;
 
-	sctx->b.b.create_sampler_state = si_create_sampler_state;
-	sctx->b.b.delete_sampler_state = si_delete_sampler_state;
+	sctx->b.create_sampler_state = si_create_sampler_state;
+	sctx->b.delete_sampler_state = si_delete_sampler_state;
 
-	sctx->b.b.create_sampler_view = si_create_sampler_view;
-	sctx->b.b.sampler_view_destroy = si_sampler_view_destroy;
+	sctx->b.create_sampler_view = si_create_sampler_view;
+	sctx->b.sampler_view_destroy = si_sampler_view_destroy;
 
-	sctx->b.b.set_sample_mask = si_set_sample_mask;
+	sctx->b.set_sample_mask = si_set_sample_mask;
 
-	sctx->b.b.create_vertex_elements_state = si_create_vertex_elements;
-	sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements;
-	sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element;
-	sctx->b.b.set_vertex_buffers = si_set_vertex_buffers;
+	sctx->b.create_vertex_elements_state = si_create_vertex_elements;
+	sctx->b.bind_vertex_elements_state = si_bind_vertex_elements;
+	sctx->b.delete_vertex_elements_state = si_delete_vertex_element;
+	sctx->b.set_vertex_buffers = si_set_vertex_buffers;
 
-	sctx->b.b.texture_barrier = si_texture_barrier;
-	sctx->b.b.memory_barrier = si_memory_barrier;
-	sctx->b.b.set_min_samples = si_set_min_samples;
-	sctx->b.b.set_tess_state = si_set_tess_state;
+	sctx->b.texture_barrier = si_texture_barrier;
+	sctx->b.memory_barrier = si_memory_barrier;
+	sctx->b.set_min_samples = si_set_min_samples;
+	sctx->b.set_tess_state = si_set_tess_state;
 
-	sctx->b.b.set_active_query_state = si_set_active_query_state;
+	sctx->b.set_active_query_state = si_set_active_query_state;
 
-	sctx->b.b.draw_vbo = si_draw_vbo;
+	sctx->b.draw_vbo = si_draw_vbo;
 
 	si_init_config(sctx);
 }
@@ -4596,7 +4596,7 @@ void si_init_screen_state_functions(struct si_screen *sscreen)
 static void si_set_grbm_gfx_index(struct si_context *sctx,
 				  struct si_pm4_state *pm4,  unsigned value)
 {
-	unsigned reg = sctx->b.chip_class >= CIK ? R_030800_GRBM_GFX_INDEX :
+	unsigned reg = sctx->chip_class >= CIK ? R_030800_GRBM_GFX_INDEX :
 						   R_00802C_GRBM_GFX_INDEX;
 	si_pm4_set_reg(pm4, reg, value);
 }
@@ -4719,7 +4719,7 @@ si_write_harvested_raster_configs(struct si_context *sctx,
 	}
 	si_set_grbm_gfx_index(sctx, pm4, ~0);
 
-	if (sctx->b.chip_class >= CIK) {
+	if (sctx->chip_class >= CIK) {
 		if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
 		                     (!se_mask[2] && !se_mask[3]))) {
 			raster_config_1 &= C_028354_SE_PAIR_MAP;
@@ -4744,7 +4744,7 @@ static void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *p
 	unsigned rb_mask = sctx->screen->info.enabled_rb_mask;
 	unsigned raster_config, raster_config_1;
 
-	switch (sctx->b.family) {
+	switch (sctx->family) {
 	case CHIP_TAHITI:
 	case CHIP_PITCAIRN:
 		raster_config = 0x2a00126a;
@@ -4828,7 +4828,7 @@ static void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *p
 		 */
 		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG,
 			       raster_config);
-		if (sctx->b.chip_class >= CIK)
+		if (sctx->chip_class >= CIK)
 			si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1,
 				       raster_config_1);
 	} else {
@@ -4860,7 +4860,7 @@ static void si_init_config(struct si_context *sctx)
 		si_pm4_cmd_end(pm4, false);
 	}
 
-	if (sctx->b.chip_class <= VI)
+	if (sctx->chip_class <= VI)
 		si_set_raster_config(sctx, pm4);
 
 	si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
@@ -4868,7 +4868,7 @@ static void si_init_config(struct si_context *sctx)
 		si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
 
 	/* FIXME calculate these values somehow ??? */
-	if (sctx->b.chip_class <= VI) {
+	if (sctx->chip_class <= VI) {
 		si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
 		si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
 	}
@@ -4882,7 +4882,7 @@ static void si_init_config(struct si_context *sctx)
 	si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
 	if (!has_clear_state)
 		si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
-	if (sctx->b.chip_class < CIK)
+	if (sctx->chip_class < CIK)
 		si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
 			       S_008A14_CLIP_VTX_REORDER_ENA(1));
 
@@ -4895,7 +4895,7 @@ static void si_init_config(struct si_context *sctx)
 	/* CLEAR_STATE doesn't clear these correctly on certain generations.
 	 * I don't know why. Deduced by trial and error.
 	 */
-	if (sctx->b.chip_class <= CIK) {
+	if (sctx->chip_class <= CIK) {
 		si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
 		si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
 		si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
@@ -4926,7 +4926,7 @@ static void si_init_config(struct si_context *sctx)
 		si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0);
 	}
 
-	if (sctx->b.chip_class >= GFX9) {
+	if (sctx->chip_class >= GFX9) {
 		si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0);
 		si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0);
 		si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0);
@@ -4940,8 +4940,8 @@ static void si_init_config(struct si_context *sctx)
 		si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
 	}
 
-	if (sctx->b.chip_class >= CIK) {
-		if (sctx->b.chip_class >= GFX9) {
+	if (sctx->chip_class >= CIK) {
+		if (sctx->chip_class >= GFX9) {
 			si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
 				       S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
 		} else {
@@ -4969,7 +4969,7 @@ static void si_init_config(struct si_context *sctx)
 					  sscreen->info.max_sh_per_se);
 		unsigned late_alloc_limit; /* The limit is per SH. */
 
-		if (sctx->b.family == CHIP_KABINI) {
+		if (sctx->family == CHIP_KABINI) {
 			late_alloc_limit = 0; /* Potential hang on Kabini. */
 		} else if (num_cu_per_sh <= 4) {
 			/* Too few available compute units per SH. Disallowing
@@ -5001,7 +5001,7 @@ static void si_init_config(struct si_context *sctx)
 			       S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F));
 	}
 
-	if (sctx->b.chip_class >= VI) {
+	if (sctx->chip_class >= VI) {
 		unsigned vgt_tess_distribution;
 
 		vgt_tess_distribution =
@@ -5013,8 +5013,8 @@ static void si_init_config(struct si_context *sctx)
 		/* Testing with Unigine Heaven extreme tesselation yielded best results
 		 * with TRAP_SPLIT = 3.
 		 */
-		if (sctx->b.family == CHIP_FIJI ||
-		    sctx->b.family >= CHIP_POLARIS10)
+		if (sctx->family == CHIP_FIJI ||
+		    sctx->family >= CHIP_POLARIS10)
 			vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
 
 		si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
@@ -5024,18 +5024,18 @@ static void si_init_config(struct si_context *sctx)
 	}
 
 	si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
-	if (sctx->b.chip_class >= CIK) {
+	if (sctx->chip_class >= CIK) {
 		si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI,
 			       S_028084_ADDRESS(border_color_va >> 40));
 	}
 	si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
 		      RADEON_PRIO_BORDER_COLORS);
 
-	if (sctx->b.chip_class >= GFX9) {
+	if (sctx->chip_class >= GFX9) {
 		unsigned num_se = sscreen->info.max_se;
 		unsigned pc_lines = 0;
 
-		switch (sctx->b.family) {
+		switch (sctx->family) {
 		case CHIP_VEGA10:
 		case CHIP_VEGA12:
 			pc_lines = 4096;
diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c b/src/gallium/drivers/radeonsi/si_state_binning.c
index d001b24dfc5..87b89e8b492 100644
--- a/src/gallium/drivers/radeonsi/si_state_binning.c
+++ b/src/gallium/drivers/radeonsi/si_state_binning.c
@@ -325,7 +325,7 @@ static struct uvec2 si_get_depth_bin_size(struct si_context *sctx)
 
 static void si_emit_dpbb_disable(struct si_context *sctx)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 
 	radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0,
 			       S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
@@ -341,7 +341,7 @@ void si_emit_dpbb_state(struct si_context *sctx, struct r600_atom *state)
 	struct si_state_dsa *dsa = sctx->queued.named.dsa;
 	unsigned db_shader_control = sctx->ps_db_shader_control;
 
-	assert(sctx->b.chip_class >= GFX9);
+	assert(sctx->chip_class >= GFX9);
 
 	if (!sscreen->dpbb_allowed || !blend || !dsa) {
 		si_emit_dpbb_disable(sctx);
@@ -411,7 +411,7 @@ void si_emit_dpbb_state(struct si_context *sctx, struct r600_atom *state)
 	unsigned persistent_states_per_bin; /* allowed range: [0, 31] */
 	unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
 
-	switch (sctx->b.family) {
+	switch (sctx->family) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_RAVEN:
@@ -431,7 +431,7 @@ void si_emit_dpbb_state(struct si_context *sctx, struct r600_atom *state)
 	if (bin_size.y >= 32)
 		bin_size_extend.y = util_logbase2(bin_size.y) - 5;
 
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0,
 			       S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
 			       S_028C44_BIN_SIZE_X(bin_size.x == 16) |
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index ee9133ba017..668491fbe92 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -95,7 +95,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 				       const struct pipe_draw_info *info,
 				       unsigned *num_patches)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	struct si_shader *ls_current;
 	struct si_shader_selector *ls;
 	/* The TES pointer will only be used for sctx->last_tcs.
@@ -103,7 +103,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 	struct si_shader_selector *tcs =
 		sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso;
 	unsigned tess_uses_primid = sctx->ia_multi_vgt_param_key.u.tess_uses_prim_id;
-	bool has_primid_instancing_bug = sctx->b.chip_class == SI &&
+	bool has_primid_instancing_bug = sctx->chip_class == SI &&
 					 sctx->screen->info.max_se == 1;
 	unsigned tes_sh_base = sctx->shader_pointers.sh_base[PIPE_SHADER_TESS_EVAL];
 	unsigned num_tcs_input_cp = info->vertices_per_patch;
@@ -116,7 +116,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 	unsigned offchip_layout, hardware_lds_size, ls_hs_config;
 
 	/* Since GFX9 has merged LS-HS in the TCS state, set LS = TCS. */
-	if (sctx->b.chip_class >= GFX9) {
+	if (sctx->chip_class >= GFX9) {
 		if (sctx->tcs_shader.cso)
 			ls_current = sctx->tcs_shader.current;
 		else
@@ -194,7 +194,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 	 */
 	*num_patches = MIN2(*num_patches, 40);
 
-	if (sctx->b.chip_class == SI) {
+	if (sctx->chip_class == SI) {
 		/* SI bug workaround, related to power management. Limit LS-HS
 		 * threadgroups to only one wave.
 		 */
@@ -247,7 +247,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 	/* Compute the LDS size. */
 	lds_size = output_patch0_offset + output_patch_size * *num_patches;
 
-	if (sctx->b.chip_class >= CIK) {
+	if (sctx->chip_class >= CIK) {
 		assert(lds_size <= 65536);
 		lds_size = align(lds_size, 512) / 512;
 	} else {
@@ -260,7 +260,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 				  C_VS_STATE_LS_OUT_VERTEX_SIZE;
 	sctx->current_vs_state |= tcs_in_layout;
 
-	if (sctx->b.chip_class >= GFX9) {
+	if (sctx->chip_class >= GFX9) {
 		unsigned hs_rsrc2 = ls_current->config.rsrc2 |
 				    S_00B42C_LDS_SIZE(lds_size);
 
@@ -281,7 +281,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 
 		/* Due to a hw bug, RSRC2_LS must be written twice with another
 		 * LS register written in between. */
-		if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
+		if (sctx->chip_class == CIK && sctx->family != CHIP_HAWAII)
 			radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
 		radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
 		radeon_emit(cs, ls_current->config.rsrc1);
@@ -305,7 +305,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 		       S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) |
 		       S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp);
 
-	if (sctx->b.chip_class >= CIK)
+	if (sctx->chip_class >= CIK)
 		radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2,
 					   ls_hs_config);
 	else
@@ -511,7 +511,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
 
 	if (sctx->gs_shader.cso) {
 		/* GS requirement. */
-		if (sctx->b.chip_class <= VI &&
+		if (sctx->chip_class <= VI &&
 		    SI_GS_PER_ES / primgroup_size >= sctx->screen->gs_table_depth - 3)
 			ia_multi_vgt_param |= S_028AA8_PARTIAL_ES_WAVE_ON(1);
 
@@ -519,13 +519,13 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
 		 * The hw doc says all multi-SE chips are affected, but Vulkan
 		 * only applies it to Hawaii. Do what Vulkan does.
 		 */
-		if (sctx->b.family == CHIP_HAWAII &&
+		if (sctx->family == CHIP_HAWAII &&
 		    G_028AA8_SWITCH_ON_EOI(ia_multi_vgt_param) &&
 		    (info->indirect ||
 		     (info->instance_count > 1 &&
 		      (info->count_from_stream_output ||
 		       si_num_prims_for_vertices(info) <= 1))))
-			sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
+			sctx->flags |= SI_CONTEXT_VGT_FLUSH;
 	}
 
 	return ia_multi_vgt_param;
@@ -534,7 +534,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
 /* rast_prim is the primitive type after GS. */
 static void si_emit_rasterizer_prim_state(struct si_context *sctx)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	enum pipe_prim_type rast_prim = sctx->current_rast_prim;
 	struct si_state_rasterizer *rs = sctx->emitted.named.rasterizer;
 
@@ -574,7 +574,7 @@ static void si_emit_vs_state(struct si_context *sctx,
 	}
 
 	if (sctx->current_vs_state != sctx->last_vs_state) {
-		struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+		struct radeon_winsys_cs *cs = sctx->gfx_cs;
 
 		radeon_set_sh_reg(cs,
 			sctx->shader_pointers.sh_base[PIPE_SHADER_VERTEX] +
@@ -589,7 +589,7 @@ static void si_emit_draw_registers(struct si_context *sctx,
 				   const struct pipe_draw_info *info,
 				   unsigned num_patches)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	unsigned prim = si_conv_pipe_prim(info->mode);
 	unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim);
 	unsigned ia_multi_vgt_param;
@@ -598,9 +598,9 @@ static void si_emit_draw_registers(struct si_context *sctx,
 
 	/* Draw state. */
 	if (ia_multi_vgt_param != sctx->last_multi_vgt_param) {
-		if (sctx->b.chip_class >= GFX9)
+		if (sctx->chip_class >= GFX9)
 			radeon_set_uconfig_reg_idx(cs, R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param);
-		else if (sctx->b.chip_class >= CIK)
+		else if (sctx->chip_class >= CIK)
 			radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
 		else
 			radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
@@ -608,7 +608,7 @@ static void si_emit_draw_registers(struct si_context *sctx,
 		sctx->last_multi_vgt_param = ia_multi_vgt_param;
 	}
 	if (prim != sctx->last_prim) {
-		if (sctx->b.chip_class >= CIK)
+		if (sctx->chip_class >= CIK)
 			radeon_set_uconfig_reg_idx(cs, R_030908_VGT_PRIMITIVE_TYPE, 1, prim);
 		else
 			radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim);
@@ -623,7 +623,7 @@ static void si_emit_draw_registers(struct si_context *sctx,
 
 	/* Primitive restart. */
 	if (info->primitive_restart != sctx->last_primitive_restart_en) {
-		if (sctx->b.chip_class >= GFX9)
+		if (sctx->chip_class >= GFX9)
 			radeon_set_uconfig_reg(cs, R_03092C_VGT_MULTI_PRIM_IB_RESET_EN,
 					       info->primitive_restart);
 		else
@@ -649,9 +649,9 @@ static void si_emit_draw_packets(struct si_context *sctx,
 				 unsigned index_offset)
 {
 	struct pipe_draw_indirect_info *indirect = info->indirect;
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	unsigned sh_base_reg = sctx->shader_pointers.sh_base[PIPE_SHADER_VERTEX];
-	bool render_cond_bit = sctx->b.render_cond && !sctx->b.render_cond_force_off;
+	bool render_cond_bit = sctx->render_cond && !sctx->render_cond_force_off;
 	uint32_t index_max_size = 0;
 	uint64_t index_va = 0;
 
@@ -673,7 +673,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 		radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
 		radeon_emit(cs, 0); /* unused */
 
-		radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 				      t->buf_filled_size, RADEON_USAGE_READ,
 				      RADEON_PRIO_SO_FILLED_SIZE);
 	}
@@ -690,12 +690,12 @@ static void si_emit_draw_packets(struct si_context *sctx,
 				break;
 			case 2:
 				index_type = V_028A7C_VGT_INDEX_16 |
-					     (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ?
+					     (SI_BIG_ENDIAN && sctx->chip_class <= CIK ?
 						      V_028A7C_VGT_DMA_SWAP_16_BIT : 0);
 				break;
 			case 4:
 				index_type = V_028A7C_VGT_INDEX_32 |
-					     (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ?
+					     (SI_BIG_ENDIAN && sctx->chip_class <= CIK ?
 						      V_028A7C_VGT_DMA_SWAP_32_BIT : 0);
 				break;
 			default:
@@ -703,7 +703,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 				return;
 			}
 
-			if (sctx->b.chip_class >= GFX9) {
+			if (sctx->chip_class >= GFX9) {
 				radeon_set_uconfig_reg_idx(cs, R_03090C_VGT_INDEX_TYPE,
 							   2, index_type);
 			} else {
@@ -718,14 +718,14 @@ static void si_emit_draw_packets(struct si_context *sctx,
 				  index_size;
 		index_va = r600_resource(indexbuf)->gpu_address + index_offset;
 
-		radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 				      (struct r600_resource *)indexbuf,
 				      RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER);
 	} else {
 		/* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE,
 		 * so the state must be re-emitted before the next indexed draw.
 		 */
-		if (sctx->b.chip_class >= CIK)
+		if (sctx->chip_class >= CIK)
 			sctx->last_index_size = -1;
 	}
 
@@ -741,7 +741,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 		radeon_emit(cs, indirect_va);
 		radeon_emit(cs, indirect_va >> 32);
 
-		radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 				      (struct r600_resource *)indirect->buffer,
 				      RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
 
@@ -775,7 +775,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 					(struct r600_resource *)indirect->indirect_draw_count;
 
 				radeon_add_to_buffer_list(
-					sctx, sctx->b.gfx_cs, params_buf,
+					sctx, sctx->gfx_cs, params_buf,
 					RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT);
 
 				count_va = params_buf->gpu_address + indirect->indirect_draw_count_offset;
@@ -851,9 +851,9 @@ static void si_emit_draw_packets(struct si_context *sctx,
 static void si_emit_surface_sync(struct si_context *sctx,
 				 unsigned cp_coher_cntl)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 
-	if (sctx->b.chip_class >= GFX9) {
+	if (sctx->chip_class >= GFX9) {
 		/* Flush caches and wait for the caches to assert idle. */
 		radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
 		radeon_emit(cs, cp_coher_cntl);	/* CP_COHER_CNTL */
@@ -874,16 +874,16 @@ static void si_emit_surface_sync(struct si_context *sctx,
 
 void si_emit_cache_flush(struct si_context *sctx)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
-	uint32_t flags = sctx->b.flags;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
+	uint32_t flags = sctx->flags;
 	uint32_t cp_coher_cntl = 0;
 	uint32_t flush_cb_db = flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
 					SI_CONTEXT_FLUSH_AND_INV_DB);
 
 	if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
-		sctx->b.num_cb_cache_flushes++;
+		sctx->num_cb_cache_flushes++;
 	if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
-		sctx->b.num_db_cache_flushes++;
+		sctx->num_db_cache_flushes++;
 
 	/* SI has a bug that it always flushes ICACHE and KCACHE if either
 	 * bit is set. An alternative way is to write SQC_CACHES, but that
@@ -898,7 +898,7 @@ void si_emit_cache_flush(struct si_context *sctx)
 	if (flags & SI_CONTEXT_INV_SMEM_L1)
 		cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
 
-	if (sctx->b.chip_class <= VI) {
+	if (sctx->chip_class <= VI) {
 		if (flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
 			cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
 					 S_0085F0_CB0_DEST_BASE_ENA(1) |
@@ -911,7 +911,7 @@ void si_emit_cache_flush(struct si_context *sctx)
 					 S_0085F0_CB7_DEST_BASE_ENA(1);
 
 			/* Necessary for DCC */
-			if (sctx->b.chip_class == VI)
+			if (sctx->chip_class == VI)
 				si_gfx_write_event_eop(sctx, V_028A90_FLUSH_AND_INV_CB_DATA_TS,
 						       0, EOP_DATA_SEL_DISCARD, NULL,
 						       0, 0, SI_NOT_QUERY);
@@ -944,12 +944,12 @@ void si_emit_cache_flush(struct si_context *sctx)
 			/* Only count explicit shader flushes, not implicit ones
 			 * done by SURFACE_SYNC.
 			 */
-			sctx->b.num_vs_flushes++;
-			sctx->b.num_ps_flushes++;
+			sctx->num_vs_flushes++;
+			sctx->num_ps_flushes++;
 		} else if (flags & SI_CONTEXT_VS_PARTIAL_FLUSH) {
 			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 			radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
-			sctx->b.num_vs_flushes++;
+			sctx->num_vs_flushes++;
 		}
 	}
 
@@ -957,7 +957,7 @@ void si_emit_cache_flush(struct si_context *sctx)
 	    sctx->compute_is_busy) {
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 		radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4)));
-		sctx->b.num_cs_flushes++;
+		sctx->num_cs_flushes++;
 		sctx->compute_is_busy = false;
 	}
 
@@ -974,7 +974,7 @@ void si_emit_cache_flush(struct si_context *sctx)
 	/* GFX9: Wait for idle if we're flushing CB or DB. ACQUIRE_MEM doesn't
 	 * wait for idle on GFX9. We have to use a TS event.
 	 */
-	if (sctx->b.chip_class >= GFX9 && flush_cb_db) {
+	if (sctx->chip_class >= GFX9 && flush_cb_db) {
 		uint64_t va;
 		unsigned tc_flags, cb_db_event;
 
@@ -1020,7 +1020,7 @@ void si_emit_cache_flush(struct si_context *sctx)
 			flags &= ~(SI_CONTEXT_INV_GLOBAL_L2 |
 				   SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
 				   SI_CONTEXT_INV_VMEM_L1);
-			sctx->b.num_L2_invalidates++;
+			sctx->num_L2_invalidates++;
 		}
 
 		/* Do the flush (enqueue the event and wait for it). */
@@ -1056,7 +1056,7 @@ void si_emit_cache_flush(struct si_context *sctx)
 	 * SI-CIK don't support L2 write-back.
 	 */
 	if (flags & SI_CONTEXT_INV_GLOBAL_L2 ||
-	    (sctx->b.chip_class <= CIK &&
+	    (sctx->chip_class <= CIK &&
 	     (flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
 		/* Invalidate L1 & L2. (L1 is always invalidated on SI)
 		 * WB must be set on VI+ when TC_ACTION is set.
@@ -1064,9 +1064,9 @@ void si_emit_cache_flush(struct si_context *sctx)
 		si_emit_surface_sync(sctx, cp_coher_cntl |
 				     S_0085F0_TC_ACTION_ENA(1) |
 				     S_0085F0_TCL1_ACTION_ENA(1) |
-				     S_0301F0_TC_WB_ACTION_ENA(sctx->b.chip_class >= VI));
+				     S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= VI));
 		cp_coher_cntl = 0;
-		sctx->b.num_L2_invalidates++;
+		sctx->num_L2_invalidates++;
 	} else {
 		/* L1 invalidation and L2 writeback must be done separately,
 		 * because both operations can't be done together.
@@ -1082,7 +1082,7 @@ void si_emit_cache_flush(struct si_context *sctx)
 					     S_0301F0_TC_WB_ACTION_ENA(1) |
 					     S_0301F0_TC_NC_ACTION_ENA(1));
 			cp_coher_cntl = 0;
-			sctx->b.num_L2_writebacks++;
+			sctx->num_L2_writebacks++;
 		}
 		if (flags & SI_CONTEXT_INV_VMEM_L1) {
 			/* Invalidate per-CU VMEM L1. */
@@ -1106,7 +1106,7 @@ void si_emit_cache_flush(struct si_context *sctx)
 			        EVENT_INDEX(0));
 	}
 
-	sctx->b.flags = 0;
+	sctx->flags = 0;
 }
 
 static void si_get_draw_start_count(struct si_context *sctx,
@@ -1123,7 +1123,7 @@ static void si_get_draw_start_count(struct si_context *sctx,
 		unsigned *data;
 
 		if (indirect->indirect_draw_count) {
-			data = pipe_buffer_map_range(&sctx->b.b,
+			data = pipe_buffer_map_range(&sctx->b,
 					indirect->indirect_draw_count,
 					indirect->indirect_draw_count_offset,
 					sizeof(unsigned),
@@ -1131,7 +1131,7 @@ static void si_get_draw_start_count(struct si_context *sctx,
 
 			indirect_count = *data;
 
-			pipe_buffer_unmap(&sctx->b.b, transfer);
+			pipe_buffer_unmap(&sctx->b, transfer);
 		} else {
 			indirect_count = indirect->draw_count;
 		}
@@ -1142,7 +1142,7 @@ static void si_get_draw_start_count(struct si_context *sctx,
 		}
 
 		map_size = (indirect_count - 1) * indirect->stride + 3 * sizeof(unsigned);
-		data = pipe_buffer_map_range(&sctx->b.b, indirect->buffer,
+		data = pipe_buffer_map_range(&sctx->b, indirect->buffer,
 					     indirect->offset, map_size,
 					     PIPE_TRANSFER_READ, &transfer);
 
@@ -1161,7 +1161,7 @@ static void si_get_draw_start_count(struct si_context *sctx,
 			data += indirect->stride / sizeof(unsigned);
 		}
 
-		pipe_buffer_unmap(&sctx->b.b, transfer);
+		pipe_buffer_unmap(&sctx->b, transfer);
 
 		if (begin < end) {
 			*start = begin;
@@ -1250,8 +1250,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 
 	/* Recompute and re-emit the texture resource states if needed. */
 	dirty_tex_counter = p_atomic_read(&sctx->screen->dirty_tex_counter);
-	if (unlikely(dirty_tex_counter != sctx->b.last_dirty_tex_counter)) {
-		sctx->b.last_dirty_tex_counter = dirty_tex_counter;
+	if (unlikely(dirty_tex_counter != sctx->last_dirty_tex_counter)) {
+		sctx->last_dirty_tex_counter = dirty_tex_counter;
 		sctx->framebuffer.dirty_cbufs |=
 			((1 << sctx->framebuffer.state.nr_cbufs) - 1);
 		sctx->framebuffer.dirty_zsbuf = true;
@@ -1333,7 +1333,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	if (index_size) {
 		/* Translate or upload, if needed. */
 		/* 8-bit indices are supported on VI. */
-		if (sctx->b.chip_class <= CIK && index_size == 1) {
+		if (sctx->chip_class <= CIK && index_size == 1) {
 			unsigned start, count, start_offset, size, offset;
 			void *ptr;
 
@@ -1349,7 +1349,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 			if (!indexbuf)
 				return;
 
-			util_shorten_ubyte_elts_to_userptr(&sctx->b.b, info, 0, 0,
+			util_shorten_ubyte_elts_to_userptr(&sctx->b, info, 0, 0,
 							   index_offset + start,
 							   count, ptr);
 
@@ -1373,11 +1373,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 
 			/* info->start will be added by the drawing code */
 			index_offset -= start_offset;
-		} else if (sctx->b.chip_class <= CIK &&
+		} else if (sctx->chip_class <= CIK &&
 			   r600_resource(indexbuf)->TC_L2_dirty) {
 			/* VI reads index buffers through TC L2, so it doesn't
 			 * need this. */
-			sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+			sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
 			r600_resource(indexbuf)->TC_L2_dirty = false;
 		}
 	}
@@ -1389,15 +1389,15 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		si_context_add_resource_size(sctx, indirect->buffer);
 
 		/* Indirect buffers use TC L2 on GFX9, but not older hw. */
-		if (sctx->b.chip_class <= VI) {
+		if (sctx->chip_class <= VI) {
 			if (r600_resource(indirect->buffer)->TC_L2_dirty) {
-				sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+				sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
 				r600_resource(indirect->buffer)->TC_L2_dirty = false;
 			}
 
 			if (indirect->indirect_draw_count &&
 			    r600_resource(indirect->indirect_draw_count)->TC_L2_dirty) {
-				sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+				sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
 				r600_resource(indirect->indirect_draw_count)->TC_L2_dirty = false;
 			}
 		}
@@ -1416,14 +1416,14 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	 * scissor registers are changed. There is also a more efficient but
 	 * more involved alternative workaround.
 	 */
-	if ((sctx->b.family == CHIP_VEGA10 || sctx->b.family == CHIP_RAVEN) &&
+	if ((sctx->family == CHIP_VEGA10 || sctx->family == CHIP_RAVEN) &&
 	    si_is_atom_dirty(sctx, &sctx->scissors.atom)) {
-		sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
+		sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
 		si_emit_cache_flush(sctx);
 	}
 
 	/* Use optimal packet order based on whether we need to sync the pipeline. */
-	if (unlikely(sctx->b.flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
+	if (unlikely(sctx->flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
 				      SI_CONTEXT_FLUSH_AND_INV_DB |
 				      SI_CONTEXT_PS_PARTIAL_FLUSH |
 				      SI_CONTEXT_CS_PARTIAL_FLUSH))) {
@@ -1437,8 +1437,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		struct r600_atom *shader_pointers = &sctx->shader_pointers.atom;
 		unsigned masked_atoms = 1u << shader_pointers->id;
 
-		if (unlikely(sctx->b.flags & SI_CONTEXT_FLUSH_FOR_RENDER_COND))
-			masked_atoms |= 1u << sctx->b.render_cond_atom.id;
+		if (unlikely(sctx->flags & SI_CONTEXT_FLUSH_FOR_RENDER_COND))
+			masked_atoms |= 1u << sctx->render_cond_atom.id;
 
 		/* Emit all states except shader pointers and render condition. */
 		si_emit_all_states(sctx, info, masked_atoms);
@@ -1451,8 +1451,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		/* Set shader pointers after descriptors are uploaded. */
 		if (si_is_atom_dirty(sctx, shader_pointers))
 			shader_pointers->emit(sctx, NULL);
-		if (si_is_atom_dirty(sctx, &sctx->b.render_cond_atom))
-			sctx->b.render_cond_atom.emit(sctx, NULL);
+		if (si_is_atom_dirty(sctx, &sctx->render_cond_atom))
+			sctx->render_cond_atom.emit(sctx, NULL);
 		sctx->dirty_atoms = 0;
 
 		si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
@@ -1461,16 +1461,16 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		/* Start prefetches after the draw has been started. Both will run
 		 * in parallel, but starting the draw first is more important.
 		 */
-		if (sctx->b.chip_class >= CIK && sctx->prefetch_L2_mask)
+		if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
 			cik_emit_prefetch_L2(sctx);
 	} else {
 		/* If we don't wait for idle, start prefetches first, then set
 		 * states, and draw at the end.
 		 */
-		if (sctx->b.flags)
+		if (sctx->flags)
 			si_emit_cache_flush(sctx);
 
-		if (sctx->b.chip_class >= CIK && sctx->prefetch_L2_mask)
+		if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
 			cik_emit_prefetch_L2(sctx);
 
 		if (!si_upload_graphics_shader_descriptors(sctx))
@@ -1482,28 +1482,28 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 
 	if (unlikely(sctx->current_saved_cs)) {
 		si_trace_emit(sctx);
-		si_log_draw_state(sctx, sctx->b.log);
+		si_log_draw_state(sctx, sctx->log);
 	}
 
 	/* Workaround for a VGT hang when streamout is enabled.
 	 * It must be done after drawing. */
-	if ((sctx->b.family == CHIP_HAWAII ||
-	     sctx->b.family == CHIP_TONGA ||
-	     sctx->b.family == CHIP_FIJI) &&
+	if ((sctx->family == CHIP_HAWAII ||
+	     sctx->family == CHIP_TONGA ||
+	     sctx->family == CHIP_FIJI) &&
 	    si_get_strmout_en(sctx)) {
-		sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
+		sctx->flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
 	}
 
 	if (unlikely(sctx->decompression_enabled)) {
-		sctx->b.num_decompress_calls++;
+		sctx->num_decompress_calls++;
 	} else {
-		sctx->b.num_draw_calls++;
+		sctx->num_draw_calls++;
 		if (sctx->framebuffer.state.nr_cbufs > 1)
-			sctx->b.num_mrt_draw_calls++;
+			sctx->num_mrt_draw_calls++;
 		if (info->primitive_restart)
-			sctx->b.num_prim_restart_calls++;
+			sctx->num_prim_restart_calls++;
 		if (G_0286E8_WAVESIZE(sctx->spi_tmpring_size))
-			sctx->b.num_spill_draw_calls++;
+			sctx->num_spill_draw_calls++;
 	}
 	if (index_size && indexbuf != info->index.resource)
 		pipe_resource_reference(&indexbuf, NULL);
@@ -1556,7 +1556,7 @@ void si_draw_rectangle(struct blitter_context *blitter,
 
 void si_trace_emit(struct si_context *sctx)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	uint64_t va = sctx->current_saved_cs->trace_buf->gpu_address;
 	uint32_t trace_id = ++sctx->current_saved_cs->trace_id;
 
@@ -1570,6 +1570,6 @@ void si_trace_emit(struct si_context *sctx)
 	radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
 	radeon_emit(cs, AC_ENCODE_TRACE_POINT(trace_id));
 
-	if (sctx->b.log)
-		u_log_flush(sctx->b.log);
+	if (sctx->log)
+		u_log_flush(sctx->log);
 }
diff --git a/src/gallium/drivers/radeonsi/si_state_msaa.c b/src/gallium/drivers/radeonsi/si_state_msaa.c
index 890e0d479e3..19bed09df4b 100644
--- a/src/gallium/drivers/radeonsi/si_state_msaa.c
+++ b/src/gallium/drivers/radeonsi/si_state_msaa.c
@@ -190,16 +190,16 @@ void si_init_msaa_functions(struct si_context *sctx)
 {
 	int i;
 
-	sctx->b.b.get_sample_position = si_get_sample_position;
+	sctx->b.get_sample_position = si_get_sample_position;
 
-	si_get_sample_position(&sctx->b.b, 1, 0, sctx->sample_locations_1x[0]);
+	si_get_sample_position(&sctx->b, 1, 0, sctx->sample_locations_1x[0]);
 
 	for (i = 0; i < 2; i++)
-		si_get_sample_position(&sctx->b.b, 2, i, sctx->sample_locations_2x[i]);
+		si_get_sample_position(&sctx->b, 2, i, sctx->sample_locations_2x[i]);
 	for (i = 0; i < 4; i++)
-		si_get_sample_position(&sctx->b.b, 4, i, sctx->sample_locations_4x[i]);
+		si_get_sample_position(&sctx->b, 4, i, sctx->sample_locations_4x[i]);
 	for (i = 0; i < 8; i++)
-		si_get_sample_position(&sctx->b.b, 8, i, sctx->sample_locations_8x[i]);
+		si_get_sample_position(&sctx->b, 8, i, sctx->sample_locations_8x[i]);
 	for (i = 0; i < 16; i++)
-		si_get_sample_position(&sctx->b.b, 16, i, sctx->sample_locations_16x[i]);
+		si_get_sample_position(&sctx->b, 16, i, sctx->sample_locations_16x[i]);
 }
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 2c8ffd7f672..7e1660415f5 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1264,7 +1264,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 		}
 		break;
 	case PIPE_SHADER_TESS_CTRL:
-		if (sctx->b.chip_class >= GFX9) {
+		if (sctx->chip_class >= GFX9) {
 			si_shader_selector_key_vs(sctx, sctx->vs_shader.cso,
 						  key, &key->part.tcs.ls_prolog);
 			key->part.tcs.ls = sctx->vs_shader.cso;
@@ -1305,7 +1305,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 		}
 		break;
 	case PIPE_SHADER_GEOMETRY:
-		if (sctx->b.chip_class >= GFX9) {
+		if (sctx->chip_class >= GFX9) {
 			if (sctx->tes_shader.cso) {
 				key->part.gs.es = sctx->tes_shader.cso;
 			} else {
@@ -1379,7 +1379,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 		 * to the range supported by the type if a channel has less
 		 * than 16 bits and the export format is 16_ABGR.
 		 */
-		if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII) {
+		if (sctx->chip_class <= CIK && sctx->family != CHIP_HAWAII) {
 			key->part.ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8;
 			key->part.ps.epilog.color_is_int10 = sctx->framebuffer.color_is_int10;
 		}
@@ -1463,7 +1463,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 
 			/* 1D textures are allocated and used as 2D on GFX9. */
 			key->mono.u.ps.fbfetch_msaa = sctx->framebuffer.nr_samples > 1;
-			key->mono.u.ps.fbfetch_is_1D = sctx->b.chip_class != GFX9 &&
+			key->mono.u.ps.fbfetch_is_1D = sctx->chip_class != GFX9 &&
 						       (tex->target == PIPE_TEXTURE_1D ||
 							tex->target == PIPE_TEXTURE_1D_ARRAY);
 			key->mono.u.ps.fbfetch_layered = tex->target == PIPE_TEXTURE_1D_ARRAY ||
@@ -2113,7 +2113,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 		/* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
 		 * conflicts, i.e. each vertex will start at a different bank.
 		 */
-		if (sctx->b.chip_class >= GFX9)
+		if (sctx->chip_class >= GFX9)
 			sel->esgs_itemsize += 4;
 		break;
 
@@ -2454,10 +2454,10 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
 		switch (shader->selector->type) {
 		case PIPE_SHADER_VERTEX:
 			if (shader->key.as_ls) {
-				assert(sctx->b.chip_class <= VI);
+				assert(sctx->chip_class <= VI);
 				si_pm4_delete_state(sctx, ls, shader->pm4);
 			} else if (shader->key.as_es) {
-				assert(sctx->b.chip_class <= VI);
+				assert(sctx->chip_class <= VI);
 				si_pm4_delete_state(sctx, es, shader->pm4);
 			} else {
 				si_pm4_delete_state(sctx, vs, shader->pm4);
@@ -2468,7 +2468,7 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
 			break;
 		case PIPE_SHADER_TESS_EVAL:
 			if (shader->key.as_es) {
-				assert(sctx->b.chip_class <= VI);
+				assert(sctx->chip_class <= VI);
 				si_pm4_delete_state(sctx, es, shader->pm4);
 			} else {
 				si_pm4_delete_state(sctx, vs, shader->pm4);
@@ -2600,7 +2600,7 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx,
 
 static void si_emit_spi_map(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	struct si_shader *ps = sctx->ps_shader.current;
 	struct si_shader *vs = si_get_vs_state(sctx);
 	struct tgsi_shader_info *psinfo = ps ? &ps->selector->info : NULL;
@@ -2679,7 +2679,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
 	/* On SI-CI, the value comes from VGT_GS_VERTEX_REUSE = 16.
 	 * On VI+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
 	 */
-	unsigned gs_vertex_reuse = (sctx->b.chip_class >= VI ? 32 : 16) * num_se;
+	unsigned gs_vertex_reuse = (sctx->chip_class >= VI ? 32 : 16) * num_se;
 	unsigned alignment = 256 * num_se;
 	/* The maximum size is 63.999 MB per SE. */
 	unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
@@ -2706,7 +2706,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
 	 *
 	 * GFX9 doesn't have the ESGS ring.
 	 */
-	bool update_esgs = sctx->b.chip_class <= VI &&
+	bool update_esgs = sctx->chip_class <= VI &&
 			   esgs_ring_size &&
 			   (!sctx->esgs_ring ||
 			    sctx->esgs_ring->width0 < esgs_ring_size);
@@ -2720,7 +2720,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
 	if (update_esgs) {
 		pipe_resource_reference(&sctx->esgs_ring, NULL);
 		sctx->esgs_ring =
-			si_aligned_buffer_create(sctx->b.b.screen,
+			si_aligned_buffer_create(sctx->b.screen,
 						   SI_RESOURCE_FLAG_UNMAPPABLE,
 						   PIPE_USAGE_DEFAULT,
 						   esgs_ring_size, alignment);
@@ -2731,7 +2731,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
 	if (update_gsvs) {
 		pipe_resource_reference(&sctx->gsvs_ring, NULL);
 		sctx->gsvs_ring =
-			si_aligned_buffer_create(sctx->b.b.screen,
+			si_aligned_buffer_create(sctx->b.screen,
 						   SI_RESOURCE_FLAG_UNMAPPABLE,
 						   PIPE_USAGE_DEFAULT,
 						   gsvs_ring_size, alignment);
@@ -2744,9 +2744,9 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
 	if (!pm4)
 		return false;
 
-	if (sctx->b.chip_class >= CIK) {
+	if (sctx->chip_class >= CIK) {
 		if (sctx->esgs_ring) {
-			assert(sctx->b.chip_class <= VI);
+			assert(sctx->chip_class <= VI);
 			si_pm4_set_reg(pm4, R_030900_VGT_ESGS_RING_SIZE,
 				       sctx->esgs_ring->width0 / 256);
 		}
@@ -2773,12 +2773,12 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
 	}
 
 	/* Flush the context to re-emit both init_config states. */
-	sctx->b.initial_gfx_cs_size = 0; /* force flush */
+	sctx->initial_gfx_cs_size = 0; /* force flush */
 	si_flush_gfx_cs(sctx, PIPE_FLUSH_ASYNC, NULL);
 
 	/* Set ring bindings. */
 	if (sctx->esgs_ring) {
-		assert(sctx->b.chip_class <= VI);
+		assert(sctx->chip_class <= VI);
 		si_set_ring_buffer(sctx, SI_ES_RING_ESGS,
 				   sctx->esgs_ring, 0, sctx->esgs_ring->width0,
 				   true, true, 4, 64, 0);
@@ -3009,7 +3009,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
 	/* The address must be aligned to 2^19, because the shader only
 	 * receives the high 13 bits.
 	 */
-	sctx->tess_rings = si_aligned_buffer_create(sctx->b.b.screen,
+	sctx->tess_rings = si_aligned_buffer_create(sctx->b.screen,
 						    SI_RESOURCE_FLAG_32BIT,
 						    PIPE_USAGE_DEFAULT,
 						    sctx->screen->tess_offchip_ring_size +
@@ -3027,12 +3027,12 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
 			     sctx->screen->tess_offchip_ring_size;
 
 	/* Append these registers to the init config state. */
-	if (sctx->b.chip_class >= CIK) {
+	if (sctx->chip_class >= CIK) {
 		si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE,
 			       S_030938_SIZE(sctx->screen->tess_factor_ring_size / 4));
 		si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE,
 			       factor_va >> 8);
-		if (sctx->b.chip_class >= GFX9)
+		if (sctx->chip_class >= GFX9)
 			si_pm4_set_reg(sctx->init_config, R_030944_VGT_TF_MEMORY_BASE_HI,
 				       S_030944_BASE_HI(factor_va >> 40));
 		si_pm4_set_reg(sctx->init_config, R_03093C_VGT_HS_OFFCHIP_PARAM,
@@ -3050,7 +3050,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
 	 * This is done only once in a lifetime of a context.
 	 */
 	si_pm4_upload_indirect_buffer(sctx, sctx->init_config);
-	sctx->b.initial_gfx_cs_size = 0; /* force flush */
+	sctx->initial_gfx_cs_size = 0; /* force flush */
 	si_flush_gfx_cs(sctx, PIPE_FLUSH_ASYNC, NULL);
 }
 
@@ -3083,7 +3083,7 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx)
 	ureg_END(ureg);
 
 	sctx->fixed_func_tcs_shader.cso =
-		ureg_create_shader_and_destroy(ureg, &sctx->b.b);
+		ureg_create_shader_and_destroy(ureg, &sctx->b);
 }
 
 static void si_update_vgt_shader_config(struct si_context *sctx)
@@ -3114,7 +3114,7 @@ static void si_update_vgt_shader_config(struct si_context *sctx)
 			          S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
 		}
 
-		if (sctx->b.chip_class >= GFX9)
+		if (sctx->chip_class >= GFX9)
 			stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
 
 		si_pm4_set_reg(*pm4, R_028B54_VGT_SHADER_STAGES_EN, stages);
@@ -3147,7 +3147,7 @@ bool si_update_shaders(struct si_context *sctx)
 		}
 
 		/* VS as LS */
-		if (sctx->b.chip_class <= VI) {
+		if (sctx->chip_class <= VI) {
 			r = si_shader_select(ctx, &sctx->vs_shader,
 					     &compiler_state);
 			if (r)
@@ -3178,7 +3178,7 @@ bool si_update_shaders(struct si_context *sctx)
 
 		if (sctx->gs_shader.cso) {
 			/* TES as ES */
-			if (sctx->b.chip_class <= VI) {
+			if (sctx->chip_class <= VI) {
 				r = si_shader_select(ctx, &sctx->tes_shader,
 						     &compiler_state);
 				if (r)
@@ -3194,7 +3194,7 @@ bool si_update_shaders(struct si_context *sctx)
 			si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4);
 		}
 	} else if (sctx->gs_shader.cso) {
-		if (sctx->b.chip_class <= VI) {
+		if (sctx->chip_class <= VI) {
 			/* VS as ES */
 			r = si_shader_select(ctx, &sctx->vs_shader,
 					     &compiler_state);
@@ -3227,7 +3227,7 @@ bool si_update_shaders(struct si_context *sctx)
 			return false;
 	} else {
 		si_pm4_bind_state(sctx, gs, NULL);
-		if (sctx->b.chip_class <= VI)
+		if (sctx->chip_class <= VI)
 			si_pm4_bind_state(sctx, es, NULL);
 	}
 
@@ -3274,7 +3274,7 @@ bool si_update_shaders(struct si_context *sctx)
 			sctx->smoothing_enabled = sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing;
 			si_mark_atom_dirty(sctx, &sctx->msaa_config);
 
-			if (sctx->b.chip_class == SI)
+			if (sctx->chip_class == SI)
 				si_mark_atom_dirty(sctx, &sctx->db_render_state);
 
 			if (sctx->framebuffer.nr_samples <= 1)
@@ -3292,7 +3292,7 @@ bool si_update_shaders(struct si_context *sctx)
 			return false;
 	}
 
-	if (sctx->b.chip_class >= CIK) {
+	if (sctx->chip_class >= CIK) {
 		if (si_pm4_state_enabled_and_changed(sctx, ls))
 			sctx->prefetch_L2_mask |= SI_PREFETCH_LS;
 		else if (!sctx->queued.named.ls)
@@ -3331,13 +3331,13 @@ bool si_update_shaders(struct si_context *sctx)
 static void si_emit_scratch_state(struct si_context *sctx,
 				  struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 
 	radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
 			       sctx->spi_tmpring_size);
 
 	if (sctx->scratch_buffer) {
-		radeon_add_to_buffer_list(sctx, sctx->b.gfx_cs,
+		radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
 				      sctx->scratch_buffer, RADEON_USAGE_READWRITE,
 				      RADEON_PRIO_SCRATCH_BUFFER);
 	}
@@ -3403,7 +3403,7 @@ void *si_get_blit_vs(struct si_context *sctx, enum blitter_attrib_type type,
 	}
 	ureg_END(ureg);
 
-	*vs = ureg_create_shader_and_destroy(ureg, &sctx->b.b);
+	*vs = ureg_create_shader_and_destroy(ureg, &sctx->b);
 	return *vs;
 }
 
@@ -3413,21 +3413,21 @@ void si_init_shader_functions(struct si_context *sctx)
 	si_init_atom(sctx, &sctx->scratch_state, &sctx->atoms.s.scratch_state,
 		     si_emit_scratch_state);
 
-	sctx->b.b.create_vs_state = si_create_shader_selector;
-	sctx->b.b.create_tcs_state = si_create_shader_selector;
-	sctx->b.b.create_tes_state = si_create_shader_selector;
-	sctx->b.b.create_gs_state = si_create_shader_selector;
-	sctx->b.b.create_fs_state = si_create_shader_selector;
-
-	sctx->b.b.bind_vs_state = si_bind_vs_shader;
-	sctx->b.b.bind_tcs_state = si_bind_tcs_shader;
-	sctx->b.b.bind_tes_state = si_bind_tes_shader;
-	sctx->b.b.bind_gs_state = si_bind_gs_shader;
-	sctx->b.b.bind_fs_state = si_bind_ps_shader;
-
-	sctx->b.b.delete_vs_state = si_delete_shader_selector;
-	sctx->b.b.delete_tcs_state = si_delete_shader_selector;
-	sctx->b.b.delete_tes_state = si_delete_shader_selector;
-	sctx->b.b.delete_gs_state = si_delete_shader_selector;
-	sctx->b.b.delete_fs_state = si_delete_shader_selector;
+	sctx->b.create_vs_state = si_create_shader_selector;
+	sctx->b.create_tcs_state = si_create_shader_selector;
+	sctx->b.create_tes_state = si_create_shader_selector;
+	sctx->b.create_gs_state = si_create_shader_selector;
+	sctx->b.create_fs_state = si_create_shader_selector;
+
+	sctx->b.bind_vs_state = si_bind_vs_shader;
+	sctx->b.bind_tcs_state = si_bind_tcs_shader;
+	sctx->b.bind_tes_state = si_bind_tes_shader;
+	sctx->b.bind_gs_state = si_bind_gs_shader;
+	sctx->b.bind_fs_state = si_bind_ps_shader;
+
+	sctx->b.delete_vs_state = si_delete_shader_selector;
+	sctx->b.delete_tcs_state = si_delete_shader_selector;
+	sctx->b.delete_tes_state = si_delete_shader_selector;
+	sctx->b.delete_gs_state = si_delete_shader_selector;
+	sctx->b.delete_fs_state = si_delete_shader_selector;
 }
diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c
index e70f667f097..e77eafe8388 100644
--- a/src/gallium/drivers/radeonsi/si_state_streamout.c
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -49,7 +49,7 @@ si_create_so_target(struct pipe_context *ctx,
 		return NULL;
 	}
 
-	u_suballocator_alloc(sctx->b.allocator_zeroed_memory, 4, 4,
+	u_suballocator_alloc(sctx->allocator_zeroed_memory, 4, 4,
 			     &t->buf_filled_size_offset,
 			     (struct pipe_resource**)&t->buf_filled_size);
 	if (!t->buf_filled_size) {
@@ -122,7 +122,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
 		 * VS_PARTIAL_FLUSH is required if the buffers are going to be
 		 * used as an input immediately.
 		 */
-		sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
+		sctx->flags |= SI_CONTEXT_INV_SMEM_L1 |
 				 SI_CONTEXT_INV_VMEM_L1 |
 				 SI_CONTEXT_VS_PARTIAL_FLUSH;
 	}
@@ -131,7 +131,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
 	 * start writing to the targets.
 	 */
 	if (num_targets)
-		sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+		sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
 		                 SI_CONTEXT_CS_PARTIAL_FLUSH;
 
 	/* Streamout buffers must be bound in 2 places:
@@ -229,11 +229,11 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
 
 static void si_flush_vgt_streamout(struct si_context *sctx)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	unsigned reg_strmout_cntl;
 
 	/* The register is at different places on different ASICs. */
-	if (sctx->b.chip_class >= CIK) {
+	if (sctx->chip_class >= CIK) {
 		reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
 		radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
 	} else {
@@ -255,7 +255,7 @@ static void si_flush_vgt_streamout(struct si_context *sctx)
 
 static void si_emit_streamout_begin(struct si_context *sctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	struct si_streamout_target **t = sctx->streamout.targets;
 	uint16_t *stride_in_dw = sctx->streamout.stride_in_dw;
 	unsigned i;
@@ -289,7 +289,7 @@ static void si_emit_streamout_begin(struct si_context *sctx, struct r600_atom *a
 			radeon_emit(cs, va); /* src address lo */
 			radeon_emit(cs, va >> 32); /* src address hi */
 
-			radeon_add_to_buffer_list(sctx,  sctx->b.gfx_cs,
+			radeon_add_to_buffer_list(sctx,  sctx->gfx_cs,
 						  t[i]->buf_filled_size,
 						  RADEON_USAGE_READ,
 						  RADEON_PRIO_SO_FILLED_SIZE);
@@ -310,7 +310,7 @@ static void si_emit_streamout_begin(struct si_context *sctx, struct r600_atom *a
 
 void si_emit_streamout_end(struct si_context *sctx)
 {
-	struct radeon_winsys_cs *cs = sctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	struct si_streamout_target **t = sctx->streamout.targets;
 	unsigned i;
 	uint64_t va;
@@ -331,7 +331,7 @@ void si_emit_streamout_end(struct si_context *sctx)
 		radeon_emit(cs, 0); /* unused */
 		radeon_emit(cs, 0); /* unused */
 
-		radeon_add_to_buffer_list(sctx,  sctx->b.gfx_cs,
+		radeon_add_to_buffer_list(sctx,  sctx->gfx_cs,
 					  t[i]->buf_filled_size,
 					  RADEON_USAGE_WRITE,
 					  RADEON_PRIO_SO_FILLED_SIZE);
@@ -358,14 +358,14 @@ void si_emit_streamout_end(struct si_context *sctx)
 static void si_emit_streamout_enable(struct si_context *sctx,
 				     struct r600_atom *atom)
 {
-	radeon_set_context_reg_seq(sctx->b.gfx_cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
-	radeon_emit(sctx->b.gfx_cs,
+	radeon_set_context_reg_seq(sctx->gfx_cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
+	radeon_emit(sctx->gfx_cs,
 		    S_028B94_STREAMOUT_0_EN(si_get_strmout_en(sctx)) |
 		    S_028B94_RAST_STREAM(0) |
 		    S_028B94_STREAMOUT_1_EN(si_get_strmout_en(sctx)) |
 		    S_028B94_STREAMOUT_2_EN(si_get_strmout_en(sctx)) |
 		    S_028B94_STREAMOUT_3_EN(si_get_strmout_en(sctx)));
-	radeon_emit(sctx->b.gfx_cs,
+	radeon_emit(sctx->gfx_cs,
 		    sctx->streamout.hw_enabled_mask &
 		    sctx->streamout.enabled_stream_buffers_mask);
 }
@@ -406,9 +406,9 @@ void si_update_prims_generated_query_state(struct si_context *sctx,
 
 void si_init_streamout_functions(struct si_context *sctx)
 {
-	sctx->b.b.create_stream_output_target = si_create_so_target;
-	sctx->b.b.stream_output_target_destroy = si_so_target_destroy;
-	sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
+	sctx->b.create_stream_output_target = si_create_so_target;
+	sctx->b.stream_output_target_destroy = si_so_target_destroy;
+	sctx->b.set_stream_output_targets = si_set_streamout_targets;
 	sctx->streamout.begin_atom.emit = si_emit_streamout_begin;
 	sctx->streamout.enable_atom.emit = si_emit_streamout_enable;
 }
diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c
index a3482a8f30b..f392c900f13 100644
--- a/src/gallium/drivers/radeonsi/si_state_viewport.c
+++ b/src/gallium/drivers/radeonsi/si_state_viewport.c
@@ -139,7 +139,7 @@ static void si_emit_one_scissor(struct si_context *ctx,
 static void si_emit_guardband(struct si_context *ctx,
 			      struct si_signed_scissor *vp_as_scissor)
 {
-	struct radeon_winsys_cs *cs = ctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = ctx->gfx_cs;
 	struct pipe_viewport_state vp;
 	float left, top, right, bottom, max_range, guardband_x, guardband_y;
 	float discard_x, discard_y;
@@ -212,7 +212,7 @@ static void si_emit_guardband(struct si_context *ctx,
 
 static void si_emit_scissors(struct si_context *ctx, struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = ctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = ctx->gfx_cs;
 	struct pipe_scissor_state *states = ctx->scissors.states;
 	unsigned mask = ctx->scissors.dirty_mask;
 	bool scissor_enabled = false;
@@ -286,7 +286,7 @@ static void si_set_viewport_states(struct pipe_context *pctx,
 static void si_emit_one_viewport(struct si_context *ctx,
 				 struct pipe_viewport_state *state)
 {
-	struct radeon_winsys_cs *cs = ctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = ctx->gfx_cs;
 
 	radeon_emit(cs, fui(state->scale[0]));
 	radeon_emit(cs, fui(state->translate[0]));
@@ -298,7 +298,7 @@ static void si_emit_one_viewport(struct si_context *ctx,
 
 static void si_emit_viewports(struct si_context *ctx)
 {
-	struct radeon_winsys_cs *cs = ctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = ctx->gfx_cs;
 	struct pipe_viewport_state *states = ctx->viewports.states;
 	unsigned mask = ctx->viewports.dirty_mask;
 
@@ -340,7 +340,7 @@ si_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz,
 
 static void si_emit_depth_ranges(struct si_context *ctx)
 {
-	struct radeon_winsys_cs *cs = ctx->b.gfx_cs;
+	struct radeon_winsys_cs *cs = ctx->gfx_cs;
 	struct pipe_viewport_state *states = ctx->viewports.states;
 	unsigned mask = ctx->viewports.depth_range_dirty_mask;
 	bool clip_halfz = false;
@@ -437,6 +437,6 @@ void si_init_viewport_functions(struct si_context *ctx)
 	ctx->scissors.atom.emit = si_emit_scissors;
 	ctx->viewports.atom.emit = si_emit_viewport_states;
 
-	ctx->b.b.set_scissor_states = si_set_scissor_states;
-	ctx->b.b.set_viewport_states = si_set_viewport_states;
+	ctx->b.set_scissor_states = si_set_scissor_states;
+	ctx->b.set_viewport_states = si_set_viewport_states;
 }
diff --git a/src/gallium/drivers/radeonsi/si_test_dma.c b/src/gallium/drivers/radeonsi/si_test_dma.c
index 6cf3d3b5151..6c3cde49727 100644
--- a/src/gallium/drivers/radeonsi/si_test_dma.c
+++ b/src/gallium/drivers/radeonsi/si_test_dma.c
@@ -320,8 +320,8 @@ void si_test_dma(struct si_screen *sscreen)
 			int width, height, depth;
 			int srcx, srcy, srcz, dstx, dsty, dstz;
 			struct pipe_box box;
-			unsigned old_num_draw_calls = sctx->b.num_draw_calls;
-			unsigned old_num_dma_calls = sctx->b.num_dma_calls;
+			unsigned old_num_draw_calls = sctx->num_draw_calls;
+			unsigned old_num_dma_calls = sctx->num_dma_calls;
 
 			if (!do_partial_copies) {
 				/* copy whole src to dst */
@@ -376,11 +376,11 @@ void si_test_dma(struct si_screen *sscreen)
 
 			/* GPU copy */
 			u_box_3d(srcx, srcy, srcz, width, height, depth, &box);
-			sctx->b.dma_copy(ctx, dst, 0, dstx, dsty, dstz, src, 0, &box);
+			sctx->dma_copy(ctx, dst, 0, dstx, dsty, dstz, src, 0, &box);
 
 			/* See which engine was used. */
-			gfx_blits += sctx->b.num_draw_calls > old_num_draw_calls;
-			dma_blits += sctx->b.num_dma_calls > old_num_dma_calls;
+			gfx_blits += sctx->num_draw_calls > old_num_draw_calls;
+			dma_blits += sctx->num_dma_calls > old_num_dma_calls;
 
 			/* CPU copy */
 			util_copy_box(dst_cpu.ptr, tdst.format, dst_cpu.stride,
diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c
index 77117ea961c..6aa12405566 100644
--- a/src/gallium/drivers/radeonsi/si_texture.c
+++ b/src/gallium/drivers/radeonsi/si_texture.c
@@ -50,7 +50,7 @@ bool si_prepare_for_dma_blit(struct si_context *sctx,
 			     unsigned src_level,
 			     const struct pipe_box *src_box)
 {
-	if (!sctx->b.dma_cs)
+	if (!sctx->dma_cs)
 		return false;
 
 	if (rdst->surface.bpe != rsrc->surface.bpe)
@@ -94,7 +94,7 @@ bool si_prepare_for_dma_blit(struct si_context *sctx,
 
 	/* All requirements are met. Prepare textures for SDMA. */
 	if (rsrc->cmask.size && rsrc->dirty_level_mask & (1 << src_level))
-		sctx->b.b.flush_resource(&sctx->b.b, &rsrc->resource.b.b);
+		sctx->b.flush_resource(&sctx->b, &rsrc->resource.b.b);
 
 	assert(!(rsrc->dirty_level_mask & (1 << src_level)));
 	assert(!(rdst->dirty_level_mask & (1 << dst_level)));
@@ -150,7 +150,7 @@ static void si_copy_to_staging_texture(struct pipe_context *ctx, struct r600_tra
 		return;
 	}
 
-	sctx->b.dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level,
+	sctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level,
 		       &transfer->box);
 }
 
@@ -172,7 +172,7 @@ static void si_copy_from_staging_texture(struct pipe_context *ctx, struct r600_t
 		return;
 	}
 
-	sctx->b.dma_copy(ctx, dst, transfer->level,
+	sctx->dma_copy(ctx, dst, transfer->level,
 		       transfer->box.x, transfer->box.y, transfer->box.z,
 		       src, 0, &sbox);
 }
@@ -385,16 +385,16 @@ void si_eliminate_fast_color_clear(struct si_context *sctx,
 				   struct r600_texture *rtex)
 {
 	struct si_screen *sscreen = sctx->screen;
-	struct pipe_context *ctx = &sctx->b.b;
+	struct pipe_context *ctx = &sctx->b;
 
 	if (ctx == sscreen->aux_context)
 		mtx_lock(&sscreen->aux_context_lock);
 
-	unsigned n = sctx->b.num_decompress_calls;
+	unsigned n = sctx->num_decompress_calls;
 	ctx->flush_resource(ctx, &rtex->resource.b.b);
 
 	/* Flush only if any fast clear elimination took place. */
-	if (n != sctx->b.num_decompress_calls)
+	if (n != sctx->num_decompress_calls)
 		ctx->flush(ctx, NULL, 0);
 
 	if (ctx == sscreen->aux_context)
@@ -477,14 +477,14 @@ bool si_texture_disable_dcc(struct si_context *sctx,
 	if (!si_can_disable_dcc(rtex))
 		return false;
 
-	if (&sctx->b.b == sscreen->aux_context)
+	if (&sctx->b == sscreen->aux_context)
 		mtx_lock(&sscreen->aux_context_lock);
 
 	/* Decompress DCC. */
 	si_decompress_dcc(sctx, rtex);
-	sctx->b.b.flush(&sctx->b.b, NULL, 0);
+	sctx->b.flush(&sctx->b, NULL, 0);
 
-	if (&sctx->b.b == sscreen->aux_context)
+	if (&sctx->b == sscreen->aux_context)
 		mtx_unlock(&sscreen->aux_context_lock);
 
 	return si_texture_discard_dcc(sscreen, rtex);
@@ -495,7 +495,7 @@ static void si_reallocate_texture_inplace(struct si_context *sctx,
 					  unsigned new_bind_flag,
 					  bool invalidate_storage)
 {
-	struct pipe_screen *screen = sctx->b.b.screen;
+	struct pipe_screen *screen = sctx->b.screen;
 	struct r600_texture *new_tex;
 	struct pipe_resource templ = rtex->resource.b.b;
 	unsigned i;
@@ -528,7 +528,7 @@ static void si_reallocate_texture_inplace(struct si_context *sctx,
 				 u_minify(templ.width0, i), u_minify(templ.height0, i),
 				 util_num_layers(&templ, i), &box);
 
-			sctx->b.dma_copy(&sctx->b.b, &new_tex->resource.b.b, i, 0, 0, 0,
+			sctx->dma_copy(&sctx->b, &new_tex->resource.b.b, i, 0, 0, 0,
 				       &rtex->resource.b.b, i, &box);
 		}
 	}
@@ -780,11 +780,11 @@ static boolean si_texture_get_handle(struct pipe_screen* screen,
 			/* Copy the old buffer contents to the new one. */
 			struct pipe_box box;
 			u_box_1d(0, newb->width0, &box);
-			sctx->b.b.resource_copy_region(&sctx->b.b, newb, 0, 0, 0, 0,
+			sctx->b.resource_copy_region(&sctx->b, newb, 0, 0, 0, 0,
 						     &res->b.b, 0, &box);
 			flush = true;
 			/* Move the new buffer storage to the old pipe_resource. */
-			si_replace_buffer_storage(&sctx->b.b, &res->b.b, newb);
+			si_replace_buffer_storage(&sctx->b, &res->b.b, newb);
 			pipe_resource_reference(&newb, NULL);
 
 			assert(res->b.b.bind & PIPE_BIND_SHARED);
@@ -798,7 +798,7 @@ static boolean si_texture_get_handle(struct pipe_screen* screen,
 	}
 
 	if (flush)
-		sctx->b.b.flush(&sctx->b.b, NULL, 0);
+		sctx->b.flush(&sctx->b, NULL, 0);
 
 	if (res->b.is_shared) {
 		/* USAGE_EXPLICIT_FLUSH must be cleared if at least one user
@@ -1638,7 +1638,7 @@ static void si_texture_invalidate_storage(struct si_context *sctx,
 
 	p_atomic_inc(&sscreen->dirty_tex_counter);
 
-	sctx->b.num_alloc_tex_transfer_bytes += rtex->size;
+	sctx->num_alloc_tex_transfer_bytes += rtex->size;
 }
 
 static void *si_texture_transfer_map(struct pipe_context *ctx,
@@ -1696,7 +1696,7 @@ static void *si_texture_transfer_map(struct pipe_context *ctx,
 		/* Write & linear only: */
 		else if (si_rings_is_buffer_referenced(sctx, rtex->resource.buf,
 						       RADEON_USAGE_READWRITE) ||
-			 !sctx->b.ws->buffer_wait(rtex->resource.buf, 0,
+			 !sctx->ws->buffer_wait(rtex->resource.buf, 0,
 						RADEON_USAGE_READWRITE)) {
 			/* It's busy. */
 			if (si_can_invalidate_texture(sctx->screen, rtex,
@@ -1846,7 +1846,7 @@ static void si_texture_transfer_unmap(struct pipe_context *ctx,
 	}
 
 	if (rtransfer->staging) {
-		sctx->b.num_alloc_tex_transfer_bytes += rtransfer->staging->buf->size;
+		sctx->num_alloc_tex_transfer_bytes += rtransfer->staging->buf->size;
 		r600_resource_reference(&rtransfer->staging, NULL);
 	}
 
@@ -1863,9 +1863,9 @@ static void si_texture_transfer_unmap(struct pipe_context *ctx,
 	 *
 	 * The result is that the kernel memory manager is never a bottleneck.
 	 */
-	if (sctx->b.num_alloc_tex_transfer_bytes > sctx->screen->info.gart_size / 4) {
+	if (sctx->num_alloc_tex_transfer_bytes > sctx->screen->info.gart_size / 4) {
 		si_flush_gfx_cs(sctx, PIPE_FLUSH_ASYNC, NULL);
-		sctx->b.num_alloc_tex_transfer_bytes = 0;
+		sctx->num_alloc_tex_transfer_bytes = 0;
 	}
 
 	pipe_resource_reference(&transfer->resource, NULL);
@@ -2122,18 +2122,18 @@ static void vi_dcc_clean_up_context_slot(struct si_context *sctx,
 {
 	int i;
 
-	if (sctx->b.dcc_stats[slot].query_active)
+	if (sctx->dcc_stats[slot].query_active)
 		vi_separate_dcc_stop_query(sctx,
-					   sctx->b.dcc_stats[slot].tex);
+					   sctx->dcc_stats[slot].tex);
 
-	for (i = 0; i < ARRAY_SIZE(sctx->b.dcc_stats[slot].ps_stats); i++)
-		if (sctx->b.dcc_stats[slot].ps_stats[i]) {
-			sctx->b.b.destroy_query(&sctx->b.b,
-					      sctx->b.dcc_stats[slot].ps_stats[i]);
-			sctx->b.dcc_stats[slot].ps_stats[i] = NULL;
+	for (i = 0; i < ARRAY_SIZE(sctx->dcc_stats[slot].ps_stats); i++)
+		if (sctx->dcc_stats[slot].ps_stats[i]) {
+			sctx->b.destroy_query(&sctx->b,
+					      sctx->dcc_stats[slot].ps_stats[i]);
+			sctx->dcc_stats[slot].ps_stats[i] = NULL;
 		}
 
-	r600_texture_reference(&sctx->b.dcc_stats[slot].tex, NULL);
+	r600_texture_reference(&sctx->dcc_stats[slot].tex, NULL);
 }
 
 /**
@@ -2145,21 +2145,21 @@ static unsigned vi_get_context_dcc_stats_index(struct si_context *sctx,
 	int i, empty_slot = -1;
 
 	/* Remove zombie textures (textures kept alive by this array only). */
-	for (i = 0; i < ARRAY_SIZE(sctx->b.dcc_stats); i++)
-		if (sctx->b.dcc_stats[i].tex &&
-		    sctx->b.dcc_stats[i].tex->resource.b.b.reference.count == 1)
+	for (i = 0; i < ARRAY_SIZE(sctx->dcc_stats); i++)
+		if (sctx->dcc_stats[i].tex &&
+		    sctx->dcc_stats[i].tex->resource.b.b.reference.count == 1)
 			vi_dcc_clean_up_context_slot(sctx, i);
 
 	/* Find the texture. */
-	for (i = 0; i < ARRAY_SIZE(sctx->b.dcc_stats); i++) {
+	for (i = 0; i < ARRAY_SIZE(sctx->dcc_stats); i++) {
 		/* Return if found. */
-		if (sctx->b.dcc_stats[i].tex == tex) {
-			sctx->b.dcc_stats[i].last_use_timestamp = os_time_get();
+		if (sctx->dcc_stats[i].tex == tex) {
+			sctx->dcc_stats[i].last_use_timestamp = os_time_get();
 			return i;
 		}
 
 		/* Record the first seen empty slot. */
-		if (empty_slot == -1 && !sctx->b.dcc_stats[i].tex)
+		if (empty_slot == -1 && !sctx->dcc_stats[i].tex)
 			empty_slot = i;
 	}
 
@@ -2168,9 +2168,9 @@ static unsigned vi_get_context_dcc_stats_index(struct si_context *sctx,
 		int oldest_slot = 0;
 
 		/* Find the oldest slot. */
-		for (i = 1; i < ARRAY_SIZE(sctx->b.dcc_stats); i++)
-			if (sctx->b.dcc_stats[oldest_slot].last_use_timestamp >
-			    sctx->b.dcc_stats[i].last_use_timestamp)
+		for (i = 1; i < ARRAY_SIZE(sctx->dcc_stats); i++)
+			if (sctx->dcc_stats[oldest_slot].last_use_timestamp >
+			    sctx->dcc_stats[i].last_use_timestamp)
 				oldest_slot = i;
 
 		/* Clean up the oldest slot. */
@@ -2179,8 +2179,8 @@ static unsigned vi_get_context_dcc_stats_index(struct si_context *sctx,
 	}
 
 	/* Add the texture to the new slot. */
-	r600_texture_reference(&sctx->b.dcc_stats[empty_slot].tex, tex);
-	sctx->b.dcc_stats[empty_slot].last_use_timestamp = os_time_get();
+	r600_texture_reference(&sctx->dcc_stats[empty_slot].tex, tex);
+	sctx->dcc_stats[empty_slot].last_use_timestamp = os_time_get();
 	return empty_slot;
 }
 
@@ -2188,7 +2188,7 @@ static struct pipe_query *
 vi_create_resuming_pipestats_query(struct si_context *sctx)
 {
 	struct si_query_hw *query = (struct si_query_hw*)
-		sctx->b.b.create_query(&sctx->b.b, PIPE_QUERY_PIPELINE_STATISTICS, 0);
+		sctx->b.create_query(&sctx->b, PIPE_QUERY_PIPELINE_STATISTICS, 0);
 
 	query->flags |= SI_QUERY_HW_FLAG_BEGIN_RESUMES;
 	return (struct pipe_query*)query;
@@ -2202,14 +2202,14 @@ void vi_separate_dcc_start_query(struct si_context *sctx,
 {
 	unsigned i = vi_get_context_dcc_stats_index(sctx, tex);
 
-	assert(!sctx->b.dcc_stats[i].query_active);
+	assert(!sctx->dcc_stats[i].query_active);
 
-	if (!sctx->b.dcc_stats[i].ps_stats[0])
-		sctx->b.dcc_stats[i].ps_stats[0] = vi_create_resuming_pipestats_query(sctx);
+	if (!sctx->dcc_stats[i].ps_stats[0])
+		sctx->dcc_stats[i].ps_stats[0] = vi_create_resuming_pipestats_query(sctx);
 
 	/* begin or resume the query */
-	sctx->b.b.begin_query(&sctx->b.b, sctx->b.dcc_stats[i].ps_stats[0]);
-	sctx->b.dcc_stats[i].query_active = true;
+	sctx->b.begin_query(&sctx->b, sctx->dcc_stats[i].ps_stats[0]);
+	sctx->dcc_stats[i].query_active = true;
 }
 
 /**
@@ -2220,12 +2220,12 @@ void vi_separate_dcc_stop_query(struct si_context *sctx,
 {
 	unsigned i = vi_get_context_dcc_stats_index(sctx, tex);
 
-	assert(sctx->b.dcc_stats[i].query_active);
-	assert(sctx->b.dcc_stats[i].ps_stats[0]);
+	assert(sctx->dcc_stats[i].query_active);
+	assert(sctx->dcc_stats[i].ps_stats[0]);
 
 	/* pause or end the query */
-	sctx->b.b.end_query(&sctx->b.b, sctx->b.dcc_stats[i].ps_stats[0]);
-	sctx->b.dcc_stats[i].query_active = false;
+	sctx->b.end_query(&sctx->b, sctx->dcc_stats[i].ps_stats[0]);
+	sctx->dcc_stats[i].query_active = false;
 }
 
 static bool vi_should_enable_separate_dcc(struct r600_texture *tex)
@@ -2274,7 +2274,7 @@ void vi_separate_dcc_try_enable(struct si_context *sctx,
 		tex->last_dcc_separate_buffer = NULL;
 	} else {
 		tex->dcc_separate_buffer = (struct r600_resource*)
-			si_aligned_buffer_create(sctx->b.b.screen,
+			si_aligned_buffer_create(sctx->b.screen,
 						   SI_RESOURCE_FLAG_UNMAPPABLE,
 						   PIPE_USAGE_DEFAULT,
 						   tex->surface.dcc_size,
@@ -2301,24 +2301,24 @@ void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
 	struct si_context *sctx = (struct si_context*)ctx;
 	struct pipe_query *tmp;
 	unsigned i = vi_get_context_dcc_stats_index(sctx, tex);
-	bool query_active = sctx->b.dcc_stats[i].query_active;
+	bool query_active = sctx->dcc_stats[i].query_active;
 	bool disable = false;
 
-	if (sctx->b.dcc_stats[i].ps_stats[2]) {
+	if (sctx->dcc_stats[i].ps_stats[2]) {
 		union pipe_query_result result;
 
 		/* Read the results. */
-		ctx->get_query_result(ctx, sctx->b.dcc_stats[i].ps_stats[2],
+		ctx->get_query_result(ctx, sctx->dcc_stats[i].ps_stats[2],
 				      true, &result);
 		si_query_hw_reset_buffers(sctx,
 					  (struct si_query_hw*)
-					  sctx->b.dcc_stats[i].ps_stats[2]);
+					  sctx->dcc_stats[i].ps_stats[2]);
 
 		/* Compute the approximate number of fullscreen draws. */
 		tex->ps_draw_ratio =
 			result.pipeline_statistics.ps_invocations /
 			(tex->resource.b.b.width0 * tex->resource.b.b.height0);
-		sctx->b.last_tex_ps_draw_ratio = tex->ps_draw_ratio;
+		sctx->last_tex_ps_draw_ratio = tex->ps_draw_ratio;
 
 		disable = tex->dcc_separate_buffer &&
 			  !vi_should_enable_separate_dcc(tex);
@@ -2331,10 +2331,10 @@ void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
 		vi_separate_dcc_stop_query(sctx, tex);
 
 	/* Move the queries in the queue by one. */
-	tmp = sctx->b.dcc_stats[i].ps_stats[2];
-	sctx->b.dcc_stats[i].ps_stats[2] = sctx->b.dcc_stats[i].ps_stats[1];
-	sctx->b.dcc_stats[i].ps_stats[1] = sctx->b.dcc_stats[i].ps_stats[0];
-	sctx->b.dcc_stats[i].ps_stats[0] = tmp;
+	tmp = sctx->dcc_stats[i].ps_stats[2];
+	sctx->dcc_stats[i].ps_stats[2] = sctx->dcc_stats[i].ps_stats[1];
+	sctx->dcc_stats[i].ps_stats[1] = sctx->dcc_stats[i].ps_stats[0];
+	sctx->dcc_stats[i].ps_stats[0] = tmp;
 
 	/* create and start a new query as ps_stats[0] */
 	if (query_active)
@@ -2494,6 +2494,6 @@ void si_init_screen_texture_functions(struct si_screen *sscreen)
 
 void si_init_context_texture_functions(struct si_context *sctx)
 {
-	sctx->b.b.create_surface = si_create_surface;
-	sctx->b.b.surface_destroy = si_surface_destroy;
+	sctx->b.create_surface = si_create_surface;
+	sctx->b.surface_destroy = si_surface_destroy;
 }
diff --git a/src/gallium/drivers/radeonsi/si_uvd.c b/src/gallium/drivers/radeonsi/si_uvd.c
index cddca12182f..4165725b0e9 100644
--- a/src/gallium/drivers/radeonsi/si_uvd.c
+++ b/src/gallium/drivers/radeonsi/si_uvd.c
@@ -94,7 +94,7 @@ struct pipe_video_buffer *si_video_buffer_create(struct pipe_context *pipe,
 			continue;
 
 		/* reset the address */
-		resources[i]->resource.gpu_address = ctx->b.ws->buffer_get_virtual_address(
+		resources[i]->resource.gpu_address = ctx->ws->buffer_get_virtual_address(
 			resources[i]->resource.buf);
 	}
 
@@ -146,16 +146,16 @@ struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context,
 					       const struct pipe_video_codec *templ)
 {
 	struct si_context *ctx = (struct si_context *)context;
-	bool vcn = (ctx->b.family == CHIP_RAVEN) ? true : false;
+	bool vcn = (ctx->family == CHIP_RAVEN) ? true : false;
 
 	if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
 		if (vcn) {
-			return radeon_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer);
+			return radeon_create_encoder(context, templ, ctx->ws, si_vce_get_buffer);
 		} else {
 			if (u_reduce_video_profile(templ->profile) == PIPE_VIDEO_FORMAT_HEVC)
-				return radeon_uvd_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer);
+				return radeon_uvd_create_encoder(context, templ, ctx->ws, si_vce_get_buffer);
 			else
-				return si_vce_create_encoder(context, templ, ctx->b.ws, si_vce_get_buffer);
+				return si_vce_create_encoder(context, templ, ctx->ws, si_vce_get_buffer);
 		}
 	}