diff options
Diffstat (limited to 'src/gallium')
57 files changed, 2323 insertions, 1829 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index a30accaa6ff..e37cf21416a 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -44,7 +44,7 @@ C_SOURCES = \ draw/draw_vs_exec.c \ draw/draw_vs_ppc.c \ draw/draw_vs_sse.c \ - draw/draw_vs_varient.c \ + draw/draw_vs_variant.c \ indices/u_indices_gen.c \ indices/u_unfilled_gen.c \ os/os_misc.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index 6176f0ae2a8..58d78afe133 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -88,7 +88,7 @@ source = [ 'draw/draw_vs_exec.c', 'draw/draw_vs_ppc.c', 'draw/draw_vs_sse.c', - 'draw/draw_vs_varient.c', + 'draw/draw_vs_variant.c', #'indices/u_indices.c', #'indices/u_unfilled_indices.c', 'indices/u_indices_gen.c', diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index d99f94edc43..f8196bb476f 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -128,8 +128,8 @@ boolean draw_init(struct draw_context *draw) ASSIGN_4V( draw->plane[4], 0, 0, 1, 1 ); /* yes these are correct */ ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */ draw->nr_planes = 6; - draw->clip_xy = 1; - draw->clip_z = 1; + draw->clip_xy = TRUE; + draw->clip_z = TRUE; draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index a1b8fc38880..56c26f57cce 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -435,6 +435,7 @@ draw_llvm_create_variant(struct draw_llvm *llvm, return variant; } + static void generate_vs(struct draw_llvm *llvm, LLVMBuilderRef builder, @@ -464,8 +465,7 @@ generate_vs(struct draw_llvm *llvm, tgsi_dump(tokens, 0); } - if (llvm->draw->num_sampler_views && - llvm->draw->num_samplers) + if (llvm->draw->num_sampler_views && llvm->draw->num_samplers) sampler = draw_sampler; lp_build_tgsi_soa(llvm->gallivm, @@ -480,8 +480,7 @@ generate_vs(struct draw_llvm *llvm, sampler, &llvm->draw->vs.vertex_shader->info); - if(clamp_vertex_color) - { + if (clamp_vertex_color) { LLVMValueRef out; unsigned chan, attrib; struct lp_build_context bld; @@ -489,8 +488,8 @@ generate_vs(struct draw_llvm *llvm, lp_build_context_init(&bld, llvm->gallivm, vs_type); for (attrib = 0; attrib < info->num_outputs; ++attrib) { - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(outputs[attrib][chan]) { + for (chan = 0; chan < NUM_CHANNELS; ++chan) { + if (outputs[attrib][chan]) { switch (info->output_semantic_name[attrib]) { case TGSI_SEMANTIC_COLOR: case TGSI_SEMANTIC_BCOLOR: @@ -505,6 +504,7 @@ generate_vs(struct draw_llvm *llvm, } } + #if DEBUG_STORE static void print_vectorf(LLVMBuilderRef builder, LLVMValueRef vec) @@ -523,6 +523,7 @@ static void print_vectorf(LLVMBuilderRef builder, } #endif + static void generate_fetch(struct gallivm_state *gallivm, LLVMValueRef vbuffers_ptr, @@ -566,6 +567,7 @@ generate_fetch(struct gallivm_state *gallivm, *res = draw_llvm_translate_from(gallivm, vbuffer_ptr, velem->src_format); } + static LLVMValueRef aos_to_soa(struct gallivm_state *gallivm, LLVMValueRef val0, @@ -609,6 +611,7 @@ aos_to_soa(struct gallivm_state *gallivm, return res; } + static void soa_to_aos(struct gallivm_state *gallivm, LLVMValueRef soa[NUM_CHANNELS], @@ -645,6 +648,7 @@ soa_to_aos(struct gallivm_state *gallivm, } } + static void convert_to_soa(struct gallivm_state *gallivm, LLVMValueRef (*aos)[NUM_CHANNELS], @@ -672,6 +676,7 @@ convert_to_soa(struct gallivm_state *gallivm, } } + static void store_aos(struct gallivm_state *gallivm, LLVMValueRef io_ptr, @@ -748,6 +753,7 @@ store_aos(struct gallivm_state *gallivm, #endif } + static void store_aos_array(struct gallivm_state *gallivm, LLVMValueRef io_ptr, @@ -796,6 +802,7 @@ store_aos_array(struct gallivm_state *gallivm, store_aos(gallivm, io3_ptr, attr_index, aos[3], clipmask3); } + static void convert_to_aos(struct gallivm_state *gallivm, LLVMValueRef io, @@ -813,8 +820,8 @@ convert_to_aos(struct gallivm_state *gallivm, for (attrib = 0; attrib < num_outputs; ++attrib) { LLVMValueRef soa[4]; LLVMValueRef aos[4]; - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(outputs[attrib][chan]) { + for (chan = 0; chan < NUM_CHANNELS; ++chan) { + if (outputs[attrib][chan]) { LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]); /*lp_build_printf(builder, "output %d : %d ", @@ -822,8 +829,10 @@ convert_to_aos(struct gallivm_state *gallivm, LLVMConstInt(LLVMInt32Type(), chan, 0)); print_vectorf(builder, out);*/ soa[chan] = out; - } else + } + else { soa[chan] = 0; + } } soa_to_aos(gallivm, soa, aos); store_aos_array(gallivm, @@ -838,7 +847,8 @@ convert_to_aos(struct gallivm_state *gallivm, #endif } -/* + +/** * Stores original vertex positions in clip coordinates * There is probably a more efficient way to do this, 4 floats at once * rather than extracting each element one by one. @@ -880,7 +890,7 @@ store_clip(struct gallivm_state *gallivm, clip_ptr2 = draw_jit_header_clip(gallivm, io2_ptr); clip_ptr3 = draw_jit_header_clip(gallivm, io3_ptr); - for (i = 0; i<4; i++){ + for (i = 0; i<4; i++) { clip0_ptr = LLVMBuildGEP(builder, clip_ptr0, indices, 2, ""); /* x0 */ clip1_ptr = LLVMBuildGEP(builder, clip_ptr1, indices, 2, ""); /* x1 */ clip2_ptr = LLVMBuildGEP(builder, clip_ptr2, indices, 2, ""); /* x2 */ @@ -901,7 +911,9 @@ store_clip(struct gallivm_state *gallivm, } -/* Equivalent of _mm_set1_ps(a) + +/** + * Equivalent of _mm_set1_ps(a) */ static LLVMValueRef vec4f_from_scalar(struct gallivm_state *gallivm, @@ -912,7 +924,7 @@ vec4f_from_scalar(struct gallivm_state *gallivm, LLVMValueRef res = LLVMGetUndef(LLVMVectorType(float_type, 4)); int i; - for(i = 0; i < 4; ++i) { + for (i = 0; i < 4; ++i) { LLVMValueRef index = lp_build_const_int32(gallivm, i); res = LLVMBuildInsertElement(gallivm->builder, res, a, index, i == 3 ? name : ""); @@ -921,7 +933,8 @@ vec4f_from_scalar(struct gallivm_state *gallivm, return res; } -/* + +/** * Transforms the outputs for viewport mapping */ static void @@ -942,7 +955,7 @@ generate_viewport(struct draw_llvm *llvm, LLVMBuildStore(builder, out3, outputs[0][3]); /* Viewport Mapping */ - for (i=0; i<3; i++){ + for (i=0; i<3; i++) { LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/ LLVMValueRef scale; LLVMValueRef trans; @@ -973,7 +986,7 @@ generate_viewport(struct draw_llvm *llvm, } -/* +/** * Returns clipmask as 4xi32 bitmask for the 4 vertices */ static LLVMValueRef @@ -992,9 +1005,7 @@ generate_clipmask(struct gallivm_state *gallivm, LLVMValueRef zero, shift; LLVMValueRef pos_x, pos_y, pos_z, pos_w; LLVMValueRef plane1, planes, plane_ptr, sum; - unsigned i; - struct lp_type f32_type = lp_type_float_vec(32); mask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0); @@ -1009,7 +1020,7 @@ generate_clipmask(struct gallivm_state *gallivm, pos_w = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ /* Cliptest, for hardwired planes */ - if (clip_xy){ + if (clip_xy) { /* plane 1 */ test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w); temp = shift; @@ -1037,15 +1048,15 @@ generate_clipmask(struct gallivm_state *gallivm, mask = LLVMBuildOr(builder, mask, test, ""); } - if (clip_z){ + if (clip_z) { temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 16); - if (clip_halfz){ + if (clip_halfz) { /* plane 5 */ test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z); test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); } - else{ + else { /* plane 5 */ test = LLVMBuildFAdd(builder, pos_z, pos_w, ""); test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test); @@ -1059,7 +1070,7 @@ generate_clipmask(struct gallivm_state *gallivm, mask = LLVMBuildOr(builder, mask, test, ""); } - if (clip_user){ + if (clip_user) { LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr); LLVMValueRef indices[3]; temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 32); @@ -1105,7 +1116,8 @@ generate_clipmask(struct gallivm_state *gallivm, return mask; } -/* + +/** * Returns boolean if any clipping has occurred * Used zero/non-zero i32 value to represent boolean */ @@ -1119,7 +1131,7 @@ clipmask_bool(struct gallivm_state *gallivm, LLVMValueRef temp; int i; - for (i=0; i<4; i++){ + for (i=0; i<4; i++) { temp = LLVMBuildExtractElement(builder, clipmask, lp_build_const_int32(gallivm, i) , ""); ret = LLVMBuildOr(builder, ret, temp, ""); @@ -1128,6 +1140,7 @@ clipmask_bool(struct gallivm_state *gallivm, LLVMBuildStore(builder, ret, ret_ptr); } + static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) { @@ -1172,8 +1185,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) variant->function = LLVMAddFunction(gallivm->module, "draw_llvm_shader", func_type); LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); - for(i = 0; i < Elements(arg_types); ++i) - if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + for (i = 0; i < Elements(arg_types); ++i) + if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute); context_ptr = LLVMGetParam(variant->function, 0); @@ -1271,7 +1284,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) store_clip(gallivm, io, outputs); /* do cliptest */ - if (enable_cliptest){ + if (enable_cliptest) { /* allocate clipmask, assign it integer type */ clipmask = generate_clipmask(gallivm, outputs, variant->key.clip_xy, @@ -1283,12 +1296,12 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) /* return clipping boolean value for function */ clipmask_bool(gallivm, clipmask, ret_ptr); } - else{ + else { clipmask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0); } /* do viewport mapping */ - if (!bypass_viewport){ + if (!bypass_viewport) { generate_viewport(llvm, builder, outputs, context_ptr); } @@ -1308,7 +1321,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) * Translate the LLVM IR into machine code. */ #ifdef DEBUG - if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { + if (LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { lp_debug_dump_value(variant->function); assert(0); } @@ -1375,8 +1388,8 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian variant->function_elts = LLVMAddFunction(gallivm->module, "draw_llvm_shader_elts", func_type); LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv); - for(i = 0; i < Elements(arg_types); ++i) - if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + for (i = 0; i < Elements(arg_types); ++i) + if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute); @@ -1483,7 +1496,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian store_clip(gallivm, io, outputs); /* do cliptest */ - if (enable_cliptest){ + if (enable_cliptest) { /* allocate clipmask, assign it integer type */ clipmask = generate_clipmask(gallivm, outputs, variant->key.clip_xy, @@ -1495,12 +1508,12 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian /* return clipping boolean value for function */ clipmask_bool(gallivm, clipmask, ret_ptr); } - else{ + else { clipmask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0); } /* do viewport mapping */ - if (!bypass_viewport){ + if (!bypass_viewport) { generate_viewport(llvm, builder, outputs, context_ptr); } @@ -1523,7 +1536,7 @@ draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *varian * Translate the LLVM IR into machine code. */ #ifdef DEBUG - if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) { + if (LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) { lp_debug_dump_value(variant->function_elts); assert(0); } @@ -1595,6 +1608,7 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store) return key; } + void draw_llvm_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, @@ -1609,7 +1623,6 @@ draw_llvm_set_mapped_texture(struct draw_context *draw, assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS); - jit_tex = &draw->llvm->jit_context.textures[sampler_idx]; jit_tex->width = width; diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index a10d8e9edc0..b49502cec48 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -163,6 +163,7 @@ static void interp( const struct clip_stage *clip, */ static void emit_poly( struct draw_stage *stage, struct vertex_header **inlist, + const boolean *edgeflags, unsigned n, const struct prim_header *origPrim) { @@ -181,6 +182,9 @@ static void emit_poly( struct draw_stage *stage, edge_last = DRAW_PIPE_EDGE_FLAG_1; } + if (!edgeflags[0]) + edge_first = 0; + /* later stages may need the determinant, but only the sign matters */ header.det = origPrim->det; header.flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; @@ -199,7 +203,11 @@ static void emit_poly( struct draw_stage *stage, header.v[2] = inlist[0]; /* the provoking vertex */ } - if (i == n-1) + if (!edgeflags[i-1]) { + header.flags &= ~edge_middle; + } + + if (i == n - 1 && edgeflags[i]) header.flags |= edge_last; if (0) { @@ -248,15 +256,33 @@ do_clip_tri( struct draw_stage *stage, unsigned tmpnr = 0; unsigned n = 3; unsigned i; + boolean aEdges[MAX_CLIPPED_VERTICES]; + boolean bEdges[MAX_CLIPPED_VERTICES]; + boolean *inEdges = aEdges; + boolean *outEdges = bEdges; inlist[0] = header->v[0]; inlist[1] = header->v[1]; inlist[2] = header->v[2]; + /* + * Note: at this point we can't just use the per-vertex edge flags. + * We have to observe the edge flag bits set in header->flags which + * were set during primitive decomposition. Put those flags into + * an edge flags array which parallels the vertex array. + * Later, in the 'unfilled' pipeline stage we'll draw the edge if both + * the header.flags bit is set AND the per-vertex edgeflag field is set. + */ + inEdges[0] = !!(header->flags & DRAW_PIPE_EDGE_FLAG_0); + inEdges[1] = !!(header->flags & DRAW_PIPE_EDGE_FLAG_1); + inEdges[2] = !!(header->flags & DRAW_PIPE_EDGE_FLAG_2); + while (clipmask && n >= 3) { const unsigned plane_idx = ffs(clipmask)-1; + const boolean is_user_clip_plane = plane_idx >= 6; const float *plane = clipper->plane[plane_idx]; struct vertex_header *vert_prev = inlist[0]; + boolean *edge_prev = &inEdges[0]; float dp_prev = dot4( vert_prev->clip, plane ); unsigned outcount = 0; @@ -266,9 +292,11 @@ do_clip_tri( struct draw_stage *stage, if (n >= MAX_CLIPPED_VERTICES) return; inlist[n] = inlist[0]; /* prevent rotation of vertices */ + inEdges[n] = inEdges[0]; for (i = 1; i <= n; i++) { struct vertex_header *vert = inlist[i]; + boolean *edge = &inEdges[i]; float dp = dot4( vert->clip, plane ); @@ -276,11 +304,13 @@ do_clip_tri( struct draw_stage *stage, assert(outcount < MAX_CLIPPED_VERTICES); if (outcount >= MAX_CLIPPED_VERTICES) return; + outEdges[outcount] = *edge_prev; outlist[outcount++] = vert_prev; } if (DIFFERENT_SIGNS(dp, dp_prev)) { struct vertex_header *new_vert; + boolean *new_edge; assert(tmpnr < MAX_CLIPPED_VERTICES + 1); if (tmpnr >= MAX_CLIPPED_VERTICES + 1) @@ -290,6 +320,8 @@ do_clip_tri( struct draw_stage *stage, assert(outcount < MAX_CLIPPED_VERTICES); if (outcount >= MAX_CLIPPED_VERTICES) return; + + new_edge = &outEdges[outcount]; outlist[outcount++] = new_vert; if (IS_NEGATIVE(dp)) { @@ -299,10 +331,22 @@ do_clip_tri( struct draw_stage *stage, float t = dp / (dp - dp_prev); interp( clipper, new_vert, t, vert, vert_prev ); - /* Force edgeflag true in this case: + /* Whether or not to set edge flag for the new vert depends + * on whether it's a user-defined clipping plane. We're + * copying NVIDIA's behaviour here. */ - new_vert->edgeflag = 1; - } else { + if (is_user_clip_plane) { + /* we want to see an edge along the clip plane */ + *new_edge = TRUE; + new_vert->edgeflag = TRUE; + } + else { + /* we don't want to see an edge along the frustum clip plane */ + *new_edge = *edge_prev; + new_vert->edgeflag = FALSE; + } + } + else { /* Coming back in. */ float t = dp_prev / (dp_prev - dp); @@ -311,10 +355,12 @@ do_clip_tri( struct draw_stage *stage, /* Copy starting vert's edgeflag: */ new_vert->edgeflag = vert_prev->edgeflag; + *new_edge = *edge_prev; } } vert_prev = vert; + edge_prev = edge; dp_prev = dp; } @@ -325,6 +371,12 @@ do_clip_tri( struct draw_stage *stage, outlist = tmp; n = outcount; } + { + boolean *tmp = inEdges; + inEdges = outEdges; + outEdges = tmp; + } + } /* If flat-shading, copy provoking vertex color to polygon vertex[0] @@ -353,7 +405,7 @@ do_clip_tri( struct draw_stage *stage, /* Emit the polygon as triangles to the setup stage: */ - emit_poly( stage, inlist, n, header ); + emit_poly( stage, inlist, inEdges, n, header ); } } diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_variant.c index d8f030f61eb..d8f030f61eb 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_variant.c diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c index ad514463de0..5ad32d9182c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c @@ -105,7 +105,7 @@ analyse_tex(struct analysis_context *ctx, if (info->num_texs < Elements(info->tex)) { struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs]; - bool indirect = FALSE; + boolean indirect = FALSE; unsigned readmask = 0; tex_info->target = inst->Texture.Texture; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h index 9d5553f0ea0..e3a4915d03c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h @@ -65,7 +65,7 @@ struct ureg_src /* Very similar to a tgsi_dst_register, removing unsupported fields * and adding a Saturate flag. It's easier to push saturate into the - * destination register than to try and create a _SAT varient of each + * destination register than to try and create a _SAT variant of each * instruction function. */ struct ureg_dst @@ -434,7 +434,7 @@ ureg_fixup_label(struct ureg_program *ureg, /* Generic instruction emitter. Use if you need to pass the opcode as - * a parameter, rather than using the emit_OP() varients below. + * a parameter, rather than using the emit_OP() variants below. */ void ureg_insn(struct ureg_program *ureg, diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c index a4c399052fd..528f344a0f7 100644 --- a/src/gallium/auxiliary/util/u_blitter.c +++ b/src/gallium/auxiliary/util/u_blitter.c @@ -770,7 +770,7 @@ void util_blitter_copy_region(struct blitter_context *blitter, /* Check if we can sample from and render to the surfaces. */ /* (assuming copying a stencil buffer is not possible) */ - if ((!ignore_stencil && is_stencil) || + if ((!ignore_stencil && is_stencil) || !screen->is_format_supported(screen, dst->format, dst->target, dst->nr_samples, bind) || !screen->is_format_supported(screen, src->format, src->target, diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h index c47c13c64cf..b5ea4050633 100644 --- a/src/gallium/auxiliary/util/u_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -192,7 +192,7 @@ void _debug_assert_fail(const char *expr, */ #ifdef DEBUG #define debug_checkpoint_full() \ - _debug_printf("%s:%u:%s", __FILE__, __LINE__, __FUNCTION__) + _debug_printf("%s:%u:%s\n", __FILE__, __LINE__, __FUNCTION__) #else #define debug_checkpoint_full() \ ((void)0) diff --git a/src/gallium/auxiliary/util/u_format_r11g11b10f.h b/src/gallium/auxiliary/util/u_format_r11g11b10f.h index c4181d0e34e..8e0572aa7ce 100644 --- a/src/gallium/auxiliary/util/u_format_r11g11b10f.h +++ b/src/gallium/auxiliary/util/u_format_r11g11b10f.h @@ -45,14 +45,18 @@ static INLINE unsigned f32_to_uf11(float val) { - uint32_t f32 = (*(uint32_t *) &val); + union { + float f; + uint32_t ui; + } f32 = {val}; + uint16_t uf11 = 0; /* Decode little-endian 32-bit floating-point value */ - int sign = (f32 >> 16) & 0x8000; + int sign = (f32.ui >> 16) & 0x8000; /* Map exponent to the range [-127,128] */ - int exponent = ((f32 >> 23) & 0xff) - 127; - int mantissa = f32 & 0x007fffff; + int exponent = ((f32.ui >> 23) & 0xff) - 127; + int mantissa = f32.ui & 0x007fffff; if (sign) return 0; @@ -111,14 +115,18 @@ static INLINE float uf11_to_f32(uint16_t val) static INLINE unsigned f32_to_uf10(float val) { - uint32_t f32 = (*(uint32_t *) &val); + union { + float f; + uint32_t ui; + } f32 = {val}; + uint16_t uf10 = 0; /* Decode little-endian 32-bit floating-point value */ - int sign = (f32 >> 16) & 0x8000; + int sign = (f32.ui >> 16) & 0x8000; /* Map exponent to the range [-127,128] */ - int exponent = ((f32 >> 23) & 0xff) - 127; - int mantissa = f32 & 0x007fffff; + int exponent = ((f32.ui >> 23) & 0xff) - 127; + int mantissa = f32.ui & 0x007fffff; if (sign) return 0; diff --git a/src/gallium/auxiliary/util/u_linkage.h b/src/gallium/auxiliary/util/u_linkage.h index 4720e0ee603..43ec917fc81 100644 --- a/src/gallium/auxiliary/util/u_linkage.h +++ b/src/gallium/auxiliary/util/u_linkage.h @@ -35,7 +35,7 @@ struct util_semantic_set unsigned long masks[256 / 8 / sizeof(unsigned long)]; }; -static INLINE bool +static INLINE boolean util_semantic_set_contains(struct util_semantic_set *set, unsigned char value) { return !!(set->masks[value / (sizeof(long) * 8)] & (1 << (value / (sizeof(long) * 8)))); diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 65a99fcb394..0b5284428eb 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -477,6 +477,9 @@ float_to_byte_tex(float f) static INLINE unsigned util_logbase2(unsigned n) { +#if defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION >= 304) + return ((sizeof(unsigned) * 8 - 1) - __builtin_clz(n | 1)); +#else unsigned pos = 0; if (n >= 1<<16) { n >>= 16; pos += 16; } if (n >= 1<< 8) { n >>= 8; pos += 8; } @@ -484,6 +487,7 @@ util_logbase2(unsigned n) if (n >= 1<< 2) { n >>= 2; pos += 2; } if (n >= 1<< 1) { pos += 1; } return pos; +#endif } @@ -493,17 +497,29 @@ util_logbase2(unsigned n) static INLINE unsigned util_next_power_of_two(unsigned x) { - unsigned i; - - if (x == 0) - return 1; +#if defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION >= 304) + if (x <= 1) + return 1; - --x; + return (1 << ((sizeof(unsigned) * 8) - __builtin_clz(x - 1))); +#else + unsigned val = x; - for (i = 1; i < sizeof(unsigned) * 8; i <<= 1) - x |= x >> i; + if (x <= 1) + return 1; - return x + 1; + if (util_is_power_of_two(x)) + return x; + + val--; + val = (val >> 1) | val; + val = (val >> 2) | val; + val = (val >> 4) | val; + val = (val >> 8) | val; + val = (val >> 16) | val; + val++; + return val; +#endif } @@ -513,7 +529,7 @@ util_next_power_of_two(unsigned x) static INLINE unsigned util_bitcount(unsigned n) { -#if defined(PIPE_CC_GCC) +#if defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION >= 304) return __builtin_popcount(n); #else /* K&R classic bitcount. diff --git a/src/gallium/auxiliary/util/u_staging.c b/src/gallium/auxiliary/util/u_staging.c index b6bf241a22a..b5e37932e83 100644 --- a/src/gallium/auxiliary/util/u_staging.c +++ b/src/gallium/auxiliary/util/u_staging.c @@ -55,7 +55,7 @@ util_staging_transfer_init(struct pipe_context *pipe, unsigned level, unsigned usage, const struct pipe_box *box, - bool direct, struct util_staging_transfer *tx) + boolean direct, struct util_staging_transfer *tx) { struct pipe_screen *pscreen = pipe->screen; diff --git a/src/gallium/auxiliary/util/u_staging.h b/src/gallium/auxiliary/util/u_staging.h index 49839d25439..ddbb33443e4 100644 --- a/src/gallium/auxiliary/util/u_staging.h +++ b/src/gallium/auxiliary/util/u_staging.h @@ -55,7 +55,7 @@ util_staging_transfer_init(struct pipe_context *pipe, unsigned level, unsigned usage, const struct pipe_box *box, - bool direct, struct util_staging_transfer *tx); + boolean direct, struct util_staging_transfer *tx); void util_staging_transfer_destroy(struct pipe_context *pipe, struct pipe_transfer *ptx); diff --git a/src/gallium/drivers/i915/i915_batchbuffer.h b/src/gallium/drivers/i915/i915_batchbuffer.h index 78554034781..14ae749c828 100644 --- a/src/gallium/drivers/i915/i915_batchbuffer.h +++ b/src/gallium/drivers/i915/i915_batchbuffer.h @@ -95,7 +95,7 @@ static INLINE int i915_winsys_batchbuffer_reloc(struct i915_winsys_batchbuffer *batch, struct i915_winsys_buffer *buffer, enum i915_winsys_buffer_usage usage, - size_t offset, bool fenced) + size_t offset, boolean fenced) { return batch->iws->batchbuffer_reloc(batch, buffer, usage, offset, fenced); } diff --git a/src/gallium/drivers/i915/i915_context.h b/src/gallium/drivers/i915/i915_context.h index dacf50e870d..964948edc0e 100644 --- a/src/gallium/drivers/i915/i915_context.h +++ b/src/gallium/drivers/i915/i915_context.h @@ -124,6 +124,12 @@ struct i915_fragment_shader * Else, the bitmask indicates which components are occupied by immediates. */ ubyte constant_flags[I915_MAX_CONSTANT]; + + /** + * The mapping between generics and hw texture coords. + * We need to share this between the vertex and fragment stages. + **/ + int generic_mapping[I915_TEX_UNITS]; }; diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index b145b58be30..27f100843bf 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -133,7 +133,21 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...) p->error = 1; } - +static uint get_mapping(struct i915_fragment_shader* fs, int unit) +{ + int i; + for (i = 0; i < I915_TEX_UNITS; i++) + { + if (fs->generic_mapping[i] == -1) { + fs->generic_mapping[i] = unit; + return i; + } + if (fs->generic_mapping[i] == unit) + return i; + } + debug_printf("Exceeded max generics\n"); + return 0; +} /** * Construct a ureg for the given source register. Will emit @@ -141,7 +155,8 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...) */ static uint src_vector(struct i915_fp_compile *p, - const struct tgsi_full_src_register *source) + const struct tgsi_full_src_register *source, + struct i915_fragment_shader* fs) { uint index = source->Register.Index; uint src = 0, sem_name, sem_ind; @@ -192,9 +207,11 @@ src_vector(struct i915_fp_compile *p, src = swizzle(src, W, W, W, W); break; case TGSI_SEMANTIC_GENERIC: - /* usually a texcoord */ - src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + sem_ind, D0_CHANNEL_ALL); - break; + { + int real_tex_unit = get_mapping(fs, sem_ind); + src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); + break; + } default: i915_program_error(p, "Bad source->Index"); return 0; @@ -336,13 +353,14 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex) static void emit_tex(struct i915_fp_compile *p, const struct tgsi_full_instruction *inst, - uint opcode) + uint opcode, + struct i915_fragment_shader* fs) { uint texture = inst->Texture.Texture; uint unit = inst->Src[1].Register.Index; uint tex = translate_tex_src_target( p, texture ); uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); - uint coord = src_vector( p, &inst->Src[0]); + uint coord = src_vector( p, &inst->Src[0], fs); i915_emit_texld( p, get_result_vector( p, &inst->Dst[0] ), @@ -361,15 +379,16 @@ emit_tex(struct i915_fp_compile *p, static void emit_simple_arith(struct i915_fp_compile *p, const struct tgsi_full_instruction *inst, - uint opcode, uint numArgs) + uint opcode, uint numArgs, + struct i915_fragment_shader* fs) { uint arg1, arg2, arg3; assert(numArgs <= 3); - arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0] ); - arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1] ); - arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2] ); + arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0], fs ); + arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1], fs ); + arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2], fs ); i915_emit_arith( p, opcode, @@ -385,7 +404,8 @@ emit_simple_arith(struct i915_fp_compile *p, static void emit_simple_arith_swap2(struct i915_fp_compile *p, const struct tgsi_full_instruction *inst, - uint opcode, uint numArgs) + uint opcode, uint numArgs, + struct i915_fragment_shader* fs) { struct tgsi_full_instruction inst2; @@ -396,7 +416,7 @@ emit_simple_arith_swap2(struct i915_fp_compile *p, inst2.Src[0] = inst->Src[1]; inst2.Src[1] = inst->Src[0]; - emit_simple_arith(p, &inst2, opcode, numArgs); + emit_simple_arith(p, &inst2, opcode, numArgs, fs); } @@ -415,7 +435,8 @@ emit_simple_arith_swap2(struct i915_fp_compile *p, */ static void i915_translate_instruction(struct i915_fp_compile *p, - const struct tgsi_full_instruction *inst) + const struct tgsi_full_instruction *inst, + struct i915_fragment_shader *fs) { uint writemask; uint src0, src1, src2, flags; @@ -423,7 +444,7 @@ i915_translate_instruction(struct i915_fp_compile *p, switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ABS: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); i915_emit_arith(p, A0_MAX, get_result_vector(p, &inst->Dst[0]), @@ -432,13 +453,13 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_ADD: - emit_simple_arith(p, inst, A0_ADD, 2); + emit_simple_arith(p, inst, A0_ADD, 2, fs); break; case TGSI_OPCODE_CMP: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); - src2 = src_vector(p, &inst->Src[2]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); + src2 = src_vector(p, &inst->Src[2], fs); i915_emit_arith(p, A0_CMP, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), @@ -446,7 +467,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_COS: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -495,17 +516,28 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_const4fv(p, cos_constants), 0); break; + case TGSI_OPCODE_DP2: + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); + + i915_emit_arith(p, + A0_DP3, + get_result_vector(p, &inst->Dst[0]), + get_result_flags(inst), 0, + swizzle(src0, X, Y, ZERO, ZERO), src1, 0); + break; + case TGSI_OPCODE_DP3: - emit_simple_arith(p, inst, A0_DP3, 2); + emit_simple_arith(p, inst, A0_DP3, 2, fs); break; case TGSI_OPCODE_DP4: - emit_simple_arith(p, inst, A0_DP4, 2); + emit_simple_arith(p, inst, A0_DP4, 2, fs); break; case TGSI_OPCODE_DPH: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); i915_emit_arith(p, A0_DP4, @@ -515,8 +547,8 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_DST: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); /* result[0] = 1 * 1; * result[1] = a[1] * b[1]; @@ -536,7 +568,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_EX2: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); i915_emit_arith(p, A0_EXP, @@ -546,16 +578,16 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_FLR: - emit_simple_arith(p, inst, A0_FLR, 1); + emit_simple_arith(p, inst, A0_FLR, 1, fs); break; case TGSI_OPCODE_FRC: - emit_simple_arith(p, inst, A0_FRC, 1); + emit_simple_arith(p, inst, A0_FRC, 1, fs); break; case TGSI_OPCODE_KIL: /* kill if src[0].x < 0 || src[0].y < 0 ... */ - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); tmp = i915_get_utemp(p); i915_emit_texld(p, @@ -571,7 +603,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_LG2: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); i915_emit_arith(p, A0_LOG, @@ -581,7 +613,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_LIT: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); tmp = i915_get_utemp(p); /* tmp = max( a.xyzw, a.00zw ) @@ -614,9 +646,9 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_LRP: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); - src2 = src_vector(p, &inst->Src[2]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); + src2 = src_vector(p, &inst->Src[2], fs); flags = get_result_flags(inst); tmp = i915_get_utemp(p); @@ -636,16 +668,16 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_MAD: - emit_simple_arith(p, inst, A0_MAD, 3); + emit_simple_arith(p, inst, A0_MAD, 3, fs); break; case TGSI_OPCODE_MAX: - emit_simple_arith(p, inst, A0_MAX, 2); + emit_simple_arith(p, inst, A0_MAX, 2, fs); break; case TGSI_OPCODE_MIN: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); tmp = i915_get_utemp(p); flags = get_result_flags(inst); @@ -662,16 +694,16 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_MOV: - emit_simple_arith(p, inst, A0_MOV, 1); + emit_simple_arith(p, inst, A0_MOV, 1, fs); break; case TGSI_OPCODE_MUL: - emit_simple_arith(p, inst, A0_MUL, 2); + emit_simple_arith(p, inst, A0_MUL, 2, fs); break; case TGSI_OPCODE_POW: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); tmp = i915_get_utemp(p); flags = get_result_flags(inst); @@ -695,17 +727,17 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_RCP: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); i915_emit_arith(p, A0_RCP, get_result_vector(p, &inst->Dst[0]), - get_result_flags(inst), 0, + get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 0); break; case TGSI_OPCODE_RSQ: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); i915_emit_arith(p, A0_RSQ, @@ -715,7 +747,7 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_SCS: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); tmp = i915_get_utemp(p); /* @@ -778,17 +810,40 @@ i915_translate_instruction(struct i915_fp_compile *p, } break; - case TGSI_OPCODE_SGE: - emit_simple_arith(p, inst, A0_SGE, 2); + case TGSI_OPCODE_SEQ: + /* if we're both >= and <= then we're == */ + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_SGE, + tmp, A0_DEST_CHANNEL_ALL, 0, + src0, + src1, 0); + + i915_emit_arith(p, + A0_SGE, + get_result_vector(p, &inst->Dst[0]), + A0_DEST_CHANNEL_ALL, 0, + src1, + src0, 0); + + i915_emit_arith(p, + A0_MUL, + get_result_vector(p, &inst->Dst[0]), + A0_DEST_CHANNEL_ALL, 0, + get_result_vector(p, &inst->Dst[0]), + tmp, 0); + break; - case TGSI_OPCODE_SLE: - /* like SGE, but swap reg0, reg1 */ - emit_simple_arith_swap2(p, inst, A0_SGE, 2); + case TGSI_OPCODE_SGE: + emit_simple_arith(p, inst, A0_SGE, 2, fs); break; case TGSI_OPCODE_SIN: - src0 = src_vector(p, &inst->Src[0]); + src0 = src_vector(p, &inst->Src[0], fs); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -837,18 +892,78 @@ i915_translate_instruction(struct i915_fp_compile *p, i915_emit_const4fv(p, sin_constants), 0); break; + case TGSI_OPCODE_SLE: + /* like SGE, but swap reg0, reg1 */ + emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs); + break; + case TGSI_OPCODE_SLT: - emit_simple_arith(p, inst, A0_SLT, 2); + emit_simple_arith(p, inst, A0_SLT, 2, fs); break; case TGSI_OPCODE_SGT: /* like SLT, but swap reg0, reg1 */ - emit_simple_arith_swap2(p, inst, A0_SLT, 2); + emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs); + break; + + case TGSI_OPCODE_SNE: + /* if we're neither < nor > then we're != */ + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_SLT, + tmp, + A0_DEST_CHANNEL_ALL, 0, + src0, + src1, 0); + + i915_emit_arith(p, + A0_SLT, + get_result_vector(p, &inst->Dst[0]), + A0_DEST_CHANNEL_ALL, 0, + src1, + src0, 0); + + i915_emit_arith(p, + A0_ADD, + get_result_vector(p, &inst->Dst[0]), + A0_DEST_CHANNEL_ALL, 0, + get_result_vector(p, &inst->Dst[0]), + tmp, 0); + break; + + case TGSI_OPCODE_SSG: + /* compute (src>0) - (src<0) */ + src0 = src_vector(p, &inst->Src[0], fs); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_SLT, + tmp, + A0_DEST_CHANNEL_ALL, 0, + src0, + swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0); + + i915_emit_arith(p, + A0_SLT, + get_result_vector(p, &inst->Dst[0]), + A0_DEST_CHANNEL_ALL, 0, + swizzle(src0, ZERO, ZERO, ZERO, ZERO), + src0, 0); + + i915_emit_arith(p, + A0_ADD, + get_result_vector(p, &inst->Dst[0]), + A0_DEST_CHANNEL_ALL, 0, + get_result_vector(p, &inst->Dst[0]), + negate(tmp, 1, 1, 1, 1), 0); break; case TGSI_OPCODE_SUB: - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); i915_emit_arith(p, A0_ADD, @@ -858,15 +973,19 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_TEX: - emit_tex(p, inst, T0_TEXLD); + emit_tex(p, inst, T0_TEXLD, fs); + break; + + case TGSI_OPCODE_TRUNC: + emit_simple_arith(p, inst, A0_TRC, 1, fs); break; case TGSI_OPCODE_TXB: - emit_tex(p, inst, T0_TEXLDB); + emit_tex(p, inst, T0_TEXLDB, fs); break; case TGSI_OPCODE_TXP: - emit_tex(p, inst, T0_TEXLDP); + emit_tex(p, inst, T0_TEXLDP, fs); break; case TGSI_OPCODE_XPD: @@ -876,8 +995,8 @@ i915_translate_instruction(struct i915_fp_compile *p, * result.z = src0.x * src1.y - src0.y * src1.x; * result.w = undef; */ - src0 = src_vector(p, &inst->Src[0]); - src1 = src_vector(p, &inst->Src[1]); + src0 = src_vector(p, &inst->Src[0], fs); + src1 = src_vector(p, &inst->Src[1], fs); tmp = i915_get_utemp(p); i915_emit_arith(p, @@ -912,7 +1031,8 @@ i915_translate_instruction(struct i915_fp_compile *p, */ static void i915_translate_instructions(struct i915_fp_compile *p, - const struct tgsi_token *tokens) + const struct tgsi_token *tokens, + struct i915_fragment_shader *fs) { struct i915_fragment_shader *ifs = p->shader; struct tgsi_parse_context parse; @@ -993,7 +1113,7 @@ i915_translate_instructions(struct i915_fp_compile *p, p->first_instruction = FALSE; } - i915_translate_instruction(p, &parse.FullToken.FullInstruction); + i915_translate_instruction(p, &parse.FullToken.FullInstruction, fs); break; default: @@ -1011,6 +1131,7 @@ i915_init_compile(struct i915_context *i915, struct i915_fragment_shader *ifs) { struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); + int i; p->shader = ifs; @@ -1023,6 +1144,9 @@ i915_init_compile(struct i915_context *i915, ifs->num_constants = 0; memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); + for (i = 0; i < I915_TEX_UNITS; i++) + ifs->generic_mapping[i] = -1; + p->first_instruction = TRUE; p->nr_tex_indirect = 1; /* correct? */ @@ -1192,7 +1316,7 @@ i915_translate_fragment_program( struct i915_context *i915, p = i915_init_compile(i915, fs); i915_find_wpos_space(p); - i915_translate_instructions(p, tokens); + i915_translate_instructions(p, tokens, fs); i915_fixup_depth_write(p); i915_fini_compile(i915, p); diff --git a/src/gallium/drivers/i915/i915_resource_texture.c b/src/gallium/drivers/i915/i915_resource_texture.c index e05b059706d..b74b19d0fe4 100644 --- a/src/gallium/drivers/i915/i915_resource_texture.c +++ b/src/gallium/drivers/i915/i915_resource_texture.c @@ -700,7 +700,8 @@ i915_texture_destroy(struct pipe_screen *screen, struct i915_winsys *iws = i915_screen(screen)->iws; uint i; - iws->buffer_destroy(iws, tex->buffer); + if (tex->buffer) + iws->buffer_destroy(iws, tex->buffer); for (i = 0; i < Elements(tex->image_offset); i++) if (tex->image_offset[i]) diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c index da96b420f2c..c86baa58b28 100644 --- a/src/gallium/drivers/i915/i915_screen.c +++ b/src/gallium/drivers/i915/i915_screen.c @@ -134,6 +134,11 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap) case PIPE_CAP_TEXTURE_MIRROR_CLAMP: case PIPE_CAP_TEXTURE_SWIZZLE: case PIPE_CAP_TIMER_QUERY: + case PIPE_CAP_SM3: + case PIPE_CAP_SEAMLESS_CUBE_MAP: + case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: + case PIPE_CAP_FRAGMENT_COLOR_CLAMP_CONTROL: + case PIPE_CAP_MIXED_COLORBUFFER_FORMATS: return 0; /* Features we can lie about (boolean caps). */ diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c index 59ac2f7292a..bf6b30a4530 100644 --- a/src/gallium/drivers/i915/i915_state_derived.c +++ b/src/gallium/drivers/i915/i915_state_derived.c @@ -35,6 +35,18 @@ #include "i915_debug.h" #include "i915_reg.h" +static uint find_mapping(struct i915_fragment_shader* fs, int unit) +{ + int i; + for (i = 0; i < I915_TEX_UNITS ; i++) + { + if (fs->generic_mapping[i] == unit) + return i; + } + debug_printf("Mapping not found\n"); + return 0; +} + /*********************************************************************** @@ -46,7 +58,7 @@ static void calculate_vertex_layout(struct i915_context *i915) const struct i915_fragment_shader *fs = i915->fs; const enum interp_mode colorInterp = i915->rasterizer->color_interp; struct vertex_info vinfo; - boolean texCoords[8], colors[2], fog, needW; + boolean texCoords[I915_TEX_UNITS], colors[2], fog, needW; uint i; int src; @@ -66,11 +78,12 @@ static void calculate_vertex_layout(struct i915_context *i915) colors[fs->info.input_semantic_index[i]] = TRUE; break; case TGSI_SEMANTIC_GENERIC: - /* usually a texcoord */ { - const uint unit = fs->info.input_semantic_index[i]; - assert(unit < 8); - texCoords[unit] = TRUE; + /* texcoords/varyings/other generic */ + /* XXX handle back/front face and point size */ + uint unit = fs->info.input_semantic_index[i]; + + texCoords[find_mapping(fs, unit)] = TRUE; needW = TRUE; } break; @@ -82,7 +95,7 @@ static void calculate_vertex_layout(struct i915_context *i915) } } - + /* pos */ src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); if (needW) { @@ -120,12 +133,12 @@ static void calculate_vertex_layout(struct i915_context *i915) vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM; } - /* texcoords */ - for (i = 0; i < 8; i++) { + /* texcoords/varyings */ + for (i = 0; i < I915_TEX_UNITS; i++) { uint hwtc; if (texCoords[i]) { hwtc = TEXCOORDFMT_4D; - src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); + src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, fs->generic_mapping[i]); draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); } else { diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile index 4068bed393c..ba9705bebee 100644 --- a/src/gallium/drivers/llvmpipe/Makefile +++ b/src/gallium/drivers/llvmpipe/Makefile @@ -71,7 +71,7 @@ lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxil LDFLAGS += $(LLVM_LDFLAGS) LIBS += -L../../auxiliary/ -lgallium libllvmpipe.a $(LLVM_LIBS) $(GL_LIB_DEPS) -LD=g++ +LD=$(CXX) $(PROGS): lp_test_main.o libllvmpipe.a diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index 8d75dd07c4d..fb125f3a8d8 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -508,7 +508,7 @@ nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso) { unsigned s, i; - for (s = 0; s < 5; ++s) + for (s = 0; s < 3; ++s) for (i = 0; i < nv50_context(pipe)->num_samplers[s]; ++i) if (nv50_context(pipe)->samplers[s][i] == hwcso) nv50_context(pipe)->samplers[s][i] = NULL; diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c index ced26494e15..339906e6a63 100644 --- a/src/gallium/drivers/nvfx/nvfx_surface.c +++ b/src/gallium/drivers/nvfx/nvfx_surface.c @@ -84,7 +84,7 @@ nvfx_region_set_format(struct nv04_region* rgn, enum pipe_format format) } static INLINE void -nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y, bool for_write) +nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, unsigned x, unsigned y, boolean for_write) { rgn->x = x; rgn->y = y; @@ -120,7 +120,7 @@ nvfx_region_init_for_surface(struct nv04_region* rgn, struct nvfx_surface* surf, } static INLINE void -nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, unsigned level, unsigned x, unsigned y, unsigned z, bool for_write) +nvfx_region_init_for_subresource(struct nv04_region* rgn, struct pipe_resource* pt, unsigned level, unsigned x, unsigned y, unsigned z, boolean for_write) { if(pt->target != PIPE_BUFFER) { diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.c b/src/gallium/drivers/nvfx/nvfx_transfer.c index 2debcb6eb8f..cc4b51ec1f8 100644 --- a/src/gallium/drivers/nvfx/nvfx_transfer.c +++ b/src/gallium/drivers/nvfx/nvfx_transfer.c @@ -56,7 +56,7 @@ nvfx_transfer_new(struct pipe_context *pipe, else { struct nvfx_staging_transfer* tx; - bool direct = !nvfx_resource_on_gpu(pt) && pt->flags & NVFX_RESOURCE_FLAG_LINEAR; + boolean direct = !nvfx_resource_on_gpu(pt) && pt->flags & NVFX_RESOURCE_FLAG_LINEAR; tx = CALLOC_STRUCT(nvfx_staging_transfer); if(!tx) diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile index 37b0f01cfd3..dfedf353877 100644 --- a/src/gallium/drivers/r300/Makefile +++ b/src/gallium/drivers/r300/Makefile @@ -30,7 +30,8 @@ C_SOURCES = \ r300_transfer.c LIBRARY_INCLUDES = \ - -I$(TOP)/src/mesa/drivers/dri/r300/compiler + -I$(TOP)/src/mesa/drivers/dri/r300/compiler \ + -I$(TOP)/include COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c index 4949703120d..571986c3011 100644 --- a/src/gallium/drivers/r300/r300_chipset.c +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -1,5 +1,6 @@ /* * Copyright 2008 Corbin Simpson <[email protected]> + * Copyright 2011 Marek Olšák <[email protected]> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -32,392 +33,109 @@ /* Parse a PCI ID and fill an r300_capabilities struct with information. */ void r300_parse_chipset(struct r300_capabilities* caps) { - /* Reasonable defaults */ - caps->num_vert_fpus = 2; - caps->num_tex_units = 16; - caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; - caps->hiz_ram = 0; - caps->is_r400 = FALSE; - caps->is_r500 = FALSE; - caps->high_second_pipe = FALSE; - - /* Note: These are not ordered by PCI ID. I leave that task to GCC, - * which will perform the ordering while collating jump tables. Instead, - * I've tried to group them according to capabilities and age. */ switch (caps->pci_id) { - case 0x4144: - caps->family = CHIP_FAMILY_R300; - caps->high_second_pipe = TRUE; - caps->num_vert_fpus = 4; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x4145: - case 0x4146: - case 0x4147: - case 0x4E44: - case 0x4E45: - case 0x4E46: - case 0x4E47: - caps->family = CHIP_FAMILY_R300; - caps->high_second_pipe = TRUE; - caps->num_vert_fpus = 4; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x4150: - case 0x4151: - case 0x4152: - case 0x4153: - case 0x4154: - case 0x4155: - case 0x4156: - case 0x4E50: - case 0x4E51: - case 0x4E52: - case 0x4E53: - case 0x4E54: - case 0x4E56: - caps->family = CHIP_FAMILY_RV350; - caps->high_second_pipe = TRUE; - caps->zmask_ram = RV3xx_ZMASK_SIZE; - break; - - case 0x4148: - case 0x4149: - case 0x414A: - case 0x414B: - case 0x4E48: - case 0x4E49: - case 0x4E4B: - caps->family = CHIP_FAMILY_R350; - caps->high_second_pipe = TRUE; - caps->num_vert_fpus = 4; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x4E4A: - caps->family = CHIP_FAMILY_R360; - caps->high_second_pipe = TRUE; - caps->num_vert_fpus = 4; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x5460: - case 0x5462: - case 0x5464: - case 0x5B60: - case 0x5B62: - case 0x5B63: - case 0x5B64: - case 0x5B65: - caps->family = CHIP_FAMILY_RV370; - caps->high_second_pipe = TRUE; - caps->zmask_ram = RV3xx_ZMASK_SIZE; - break; - - case 0x3150: - case 0x3152: - case 0x3154: - case 0x3155: - case 0x3E50: - case 0x3E54: - caps->family = CHIP_FAMILY_RV380; - caps->high_second_pipe = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = RV3xx_ZMASK_SIZE; - break; - - case 0x4A48: - case 0x4A49: - case 0x4A4A: - case 0x4A4B: - case 0x4A4C: - case 0x4A4D: - case 0x4A4E: - case 0x4A4F: - case 0x4A50: - case 0x4A54: - caps->family = CHIP_FAMILY_R420; - caps->num_vert_fpus = 6; - caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x5548: - case 0x5549: - case 0x554A: - case 0x554B: - case 0x5550: - case 0x5551: - case 0x5552: - case 0x5554: - case 0x5D57: - caps->family = CHIP_FAMILY_R423; - caps->num_vert_fpus = 6; - caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x554C: - case 0x554D: - case 0x554E: - case 0x554F: - case 0x5D48: - case 0x5D49: - case 0x5D4A: - caps->family = CHIP_FAMILY_R430; - caps->num_vert_fpus = 6; - caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x5D4C: - case 0x5D4D: - case 0x5D4E: - case 0x5D4F: - case 0x5D50: - case 0x5D52: - caps->family = CHIP_FAMILY_R480; - caps->num_vert_fpus = 6; - caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x4B48: - case 0x4B49: - case 0x4B4A: - case 0x4B4B: - case 0x4B4C: - caps->family = CHIP_FAMILY_R481; - caps->num_vert_fpus = 6; - caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x5E4C: - case 0x5E4F: - case 0x564A: - case 0x564B: - case 0x564F: - case 0x5652: - case 0x5653: - case 0x5657: - case 0x5E48: - case 0x5E4A: - case 0x5E4B: - case 0x5E4D: - caps->family = CHIP_FAMILY_RV410; - caps->num_vert_fpus = 6; - caps->is_r400 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x5954: - case 0x5955: - caps->family = CHIP_FAMILY_RS480; - caps->has_tcl = FALSE; - caps->zmask_ram = RV3xx_ZMASK_SIZE; +#define CHIPSET(pci_id, name, chipfamily) \ + case pci_id: \ + caps->family = CHIP_FAMILY_##chipfamily; \ break; +#include "pci_ids/r300_pci_ids.h" +#undef CHIPSET - case 0x5974: - case 0x5975: - caps->family = CHIP_FAMILY_RS482; - caps->has_tcl = FALSE; - caps->zmask_ram = RV3xx_ZMASK_SIZE; - break; - - case 0x5A41: - case 0x5A42: - caps->family = CHIP_FAMILY_RS400; - caps->has_tcl = FALSE; - break; - - case 0x5A61: - case 0x5A62: - caps->family = CHIP_FAMILY_RC410; - caps->has_tcl = FALSE; - caps->zmask_ram = RV3xx_ZMASK_SIZE; - break; - - case 0x791E: - case 0x791F: - caps->family = CHIP_FAMILY_RS690; - caps->has_tcl = FALSE; - caps->is_r400 = TRUE; - break; - - case 0x793F: - case 0x7941: - case 0x7942: - caps->family = CHIP_FAMILY_RS600; - caps->has_tcl = FALSE; - caps->is_r400 = TRUE; - break; - - case 0x796C: - case 0x796D: - case 0x796E: - case 0x796F: - caps->family = CHIP_FAMILY_RS740; - caps->has_tcl = FALSE; - caps->is_r400 = TRUE; - break; - - case 0x7100: - case 0x7101: - case 0x7102: - case 0x7103: - case 0x7104: - case 0x7105: - case 0x7106: - case 0x7108: - case 0x7109: - case 0x710A: - case 0x710B: - case 0x710C: - case 0x710E: - case 0x710F: - caps->family = CHIP_FAMILY_R520; - caps->num_vert_fpus = 8; - caps->is_r500 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x7140: - case 0x7141: - case 0x7142: - case 0x7143: - case 0x7144: - case 0x7145: - case 0x7146: - case 0x7147: - case 0x7149: - case 0x714A: - case 0x714B: - case 0x714C: - case 0x714D: - case 0x714E: - case 0x714F: - case 0x7151: - case 0x7152: - case 0x7153: - case 0x715E: - case 0x715F: - case 0x7180: - case 0x7181: - case 0x7183: - case 0x7186: - case 0x7187: - case 0x7188: - case 0x718A: - case 0x718B: - case 0x718C: - case 0x718D: - case 0x718F: - case 0x7193: - case 0x7196: - case 0x719B: - case 0x719F: - case 0x7200: - case 0x7210: - case 0x7211: - caps->family = CHIP_FAMILY_RV515; - caps->num_vert_fpus = 2; - caps->is_r500 = TRUE; - caps->hiz_ram = R300_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x71C0: - case 0x71C1: - case 0x71C2: - case 0x71C3: - case 0x71C4: - case 0x71C5: - case 0x71C6: - case 0x71C7: - case 0x71CD: - case 0x71CE: - case 0x71D2: - case 0x71D4: - case 0x71D5: - case 0x71D6: - case 0x71DA: - case 0x71DE: - caps->family = CHIP_FAMILY_RV530; - caps->num_vert_fpus = 5; - caps->is_r500 = TRUE; - caps->hiz_ram = RV530_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x7240: - case 0x7243: - case 0x7244: - case 0x7245: - case 0x7246: - case 0x7247: - case 0x7248: - case 0x7249: - case 0x724A: - case 0x724B: - case 0x724C: - case 0x724D: - case 0x724E: - case 0x724F: - case 0x7284: - caps->family = CHIP_FAMILY_R580; - caps->num_vert_fpus = 8; - caps->is_r500 = TRUE; - caps->hiz_ram = RV530_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x7280: - caps->family = CHIP_FAMILY_RV570; - caps->num_vert_fpus = 8; - caps->is_r500 = TRUE; - caps->hiz_ram = RV530_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; - - case 0x7281: - case 0x7283: - case 0x7287: - case 0x7288: - case 0x7289: - case 0x728B: - case 0x728C: - case 0x7290: - case 0x7291: - case 0x7293: - case 0x7297: - caps->family = CHIP_FAMILY_RV560; - caps->num_vert_fpus = 8; - caps->is_r500 = TRUE; - caps->hiz_ram = RV530_HIZ_LIMIT; - caps->zmask_ram = PIPE_ZMASK_SIZE; - break; + default: + fprintf(stderr, "r300: Warning: Unknown chipset 0x%x\nAborting...", + caps->pci_id); + abort(); + } - default: - fprintf(stderr, "r300: Warning: Unknown chipset 0x%x\n", - caps->pci_id); + /* Defaults. */ + caps->high_second_pipe = FALSE; + caps->num_vert_fpus = 0; + caps->hiz_ram = 0; + caps->zmask_ram = 0; + + + switch (caps->family) { + case CHIP_FAMILY_R300: + case CHIP_FAMILY_R350: + caps->high_second_pipe = TRUE; + caps->num_vert_fpus = 4; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; + break; + + case CHIP_FAMILY_RV350: + case CHIP_FAMILY_RV370: + caps->high_second_pipe = TRUE; + caps->num_vert_fpus = 2; + caps->zmask_ram = RV3xx_ZMASK_SIZE; + break; + + case CHIP_FAMILY_RV380: + caps->high_second_pipe = TRUE; + caps->num_vert_fpus = 2; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = RV3xx_ZMASK_SIZE; + break; + + case CHIP_FAMILY_RS400: + case CHIP_FAMILY_RS600: + case CHIP_FAMILY_RS690: + case CHIP_FAMILY_RS740: + break; + + case CHIP_FAMILY_RC410: + case CHIP_FAMILY_RS480: + caps->zmask_ram = RV3xx_ZMASK_SIZE; + break; + + case CHIP_FAMILY_R420: + case CHIP_FAMILY_R423: + case CHIP_FAMILY_R430: + case CHIP_FAMILY_R480: + case CHIP_FAMILY_R481: + case CHIP_FAMILY_RV410: + caps->num_vert_fpus = 6; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; + break; + + case CHIP_FAMILY_R520: + caps->num_vert_fpus = 8; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; + break; + + case CHIP_FAMILY_RV515: + caps->num_vert_fpus = 2; + caps->hiz_ram = R300_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; + break; + + case CHIP_FAMILY_RV530: + caps->num_vert_fpus = 5; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; + break; + + case CHIP_FAMILY_R580: + case CHIP_FAMILY_RV560: + case CHIP_FAMILY_RV570: + caps->num_vert_fpus = 8; + caps->hiz_ram = RV530_HIZ_LIMIT; + caps->zmask_ram = PIPE_ZMASK_SIZE; + break; } + caps->num_tex_units = 16; + caps->is_r400 = caps->family >= CHIP_FAMILY_R420 && caps->family < CHIP_FAMILY_RV515; + caps->is_r500 = caps->family >= CHIP_FAMILY_RV515; caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350; caps->z_compress = caps->is_rv350 ? R300_ZCOMP_8X8 : R300_ZCOMP_4X4; caps->dxtc_swizzle = caps->is_r400 || caps->is_r500; caps->has_us_format = caps->family == CHIP_FAMILY_R520; + caps->has_tcl = caps->num_vert_fpus > 0; + + if (caps->has_tcl) { + caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE; + } } diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h index d0050bed2e8..4df6b5b6292 100644 --- a/src/gallium/drivers/r300/r300_chipset.h +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -96,26 +96,24 @@ struct r300_capabilities { /* Enumerations for legibility and telling which card we're running on. */ enum { - CHIP_FAMILY_R300 = 0, + CHIP_FAMILY_R300 = 0, /* R3xx-based cores. */ CHIP_FAMILY_R350, - CHIP_FAMILY_R360, CHIP_FAMILY_RV350, CHIP_FAMILY_RV370, CHIP_FAMILY_RV380, - CHIP_FAMILY_R420, + CHIP_FAMILY_RS400, + CHIP_FAMILY_RC410, + CHIP_FAMILY_RS480, + CHIP_FAMILY_R420, /* R4xx-based cores. */ CHIP_FAMILY_R423, CHIP_FAMILY_R430, CHIP_FAMILY_R480, CHIP_FAMILY_R481, CHIP_FAMILY_RV410, - CHIP_FAMILY_RS400, - CHIP_FAMILY_RC410, - CHIP_FAMILY_RS480, - CHIP_FAMILY_RS482, CHIP_FAMILY_RS600, CHIP_FAMILY_RS690, CHIP_FAMILY_RS740, - CHIP_FAMILY_RV515, + CHIP_FAMILY_RV515, /* R5xx-based cores. */ CHIP_FAMILY_R520, CHIP_FAMILY_RV530, CHIP_FAMILY_R580, diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 240b841ed2a..05af2148b38 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -49,20 +49,18 @@ static const char* r300_get_vendor(struct pipe_screen* pscreen) static const char* chip_families[] = { "ATI R300", "ATI R350", - "ATI R360", "ATI RV350", "ATI RV370", "ATI RV380", + "ATI RS400", + "ATI RC410", + "ATI RS480", "ATI R420", "ATI R423", "ATI R430", "ATI R480", "ATI R481", "ATI RV410", - "ATI RS400", - "ATI RC410", - "ATI RS480", - "ATI RS482", "ATI RS600", "ATI RS690", "ATI RS740", diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c index 04499c78cc6..121409b2260 100644 --- a/src/gallium/drivers/r300/r300_state_derived.c +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -438,7 +438,7 @@ static void r300_update_rs_block(struct r300_context *r300) /* Rasterize texture coordinates. */ for (i = 0; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) { - bool sprite_coord = false; + boolean sprite_coord = false; if (fs_inputs->generic[i] != ATTR_UNUSED) { sprite_coord = !!(r300->sprite_coord_enable & (1 << i)); diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 54f5410c324..9ebfe54c76d 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -380,9 +380,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte struct pipe_resource *texture, const struct pipe_sampler_view *state) { - struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); - struct r600_pipe_state *rstate; + struct r600_pipe_resource_state *rstate; const struct util_format_description *desc; struct r600_resource_texture *tmp; struct r600_resource *rbuffer; @@ -438,35 +437,27 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte array_mode = tmp->array_mode[0]; tile_type = tmp->tile_type; - r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, - S_030000_DIM(r600_tex_dim(texture->target)) | - S_030000_PITCH((pitch / 8) - 1) | - S_030000_NON_DISP_TILING_ORDER(tile_type) | - S_030000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, - S_030004_TEX_HEIGHT(texture->height0 - 1) | - S_030004_TEX_DEPTH(texture->depth0 - 1) | - S_030004_ARRAY_MODE(array_mode), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, - (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); - r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, - (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]); - r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, - word4 | - S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | - S_030010_ENDIAN_SWAP(endian) | - S_030010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, - S_030014_LAST_LEVEL(state->u.tex.last_level) | - S_030014_BASE_ARRAY(0) | - S_030014_LAST_ARRAY(0), 0xffffffff, NULL); - r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, - S_030018_MAX_ANISO(4 /* max 16 samples */), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, - S_03001C_DATA_FORMAT(format) | - S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE), 0xFFFFFFFF, NULL); + rstate->bo[0] = bo[0]; + rstate->bo[1] = bo[1]; + rstate->val[0] = (S_030000_DIM(r600_tex_dim(texture->target)) | + S_030000_PITCH((pitch / 8) - 1) | + S_030000_NON_DISP_TILING_ORDER(tile_type) | + S_030000_TEX_WIDTH(texture->width0 - 1)); + rstate->val[1] = (S_030004_TEX_HEIGHT(texture->height0 - 1) | + S_030004_TEX_DEPTH(texture->depth0 - 1) | + S_030004_ARRAY_MODE(array_mode)); + rstate->val[2] = (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8; + rstate->val[3] = (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8; + rstate->val[4] = (word4 | + S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | + S_030010_ENDIAN_SWAP(endian) | + S_030010_BASE_LEVEL(state->u.tex.first_level)); + rstate->val[5] = (S_030014_LAST_LEVEL(state->u.tex.last_level) | + S_030014_BASE_ARRAY(0) | + S_030014_LAST_ARRAY(0)); + rstate->val[6] = (S_030018_MAX_ANISO(4 /* max 16 samples */)); + rstate->val[7] = (S_03001C_DATA_FORMAT(format) | + S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE)); return &resource->base; } @@ -1032,7 +1023,10 @@ static void cayman_init_config(struct r600_pipe_context *rctx) tmp |= S_008C00_EXPORT_SRC_C(1); r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, (4 << 28), 0xFFFFFFFF, NULL); + /* always set the temp clauses */ + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, S_008C04_NUM_CLAUSE_TEMP_GPRS(4), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL); r600_pipe_state_add_reg(rstate, R_028A48_PA_SC_MODE_CNTL_0, 0x0, 0xFFFFFFFF, NULL); @@ -1384,21 +1378,38 @@ void evergreen_init_config(struct r600_pipe_context *rctx) tmp |= S_008C00_ES_PRIO(es_prio); r600_pipe_state_add_reg(rstate, R_008C00_SQ_CONFIG, tmp, 0xFFFFFFFF, NULL); - tmp = 0; - tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); - tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); - tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); - r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); - - tmp = 0; - tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); - tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs); - r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); - - tmp = 0; - tmp |= S_008C0C_NUM_HS_GPRS(num_hs_gprs); - tmp |= S_008C0C_NUM_LS_GPRS(num_ls_gprs); - r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + /* enable dynamic GPR resource management */ + if (r600_get_minor_version(rctx->radeon) >= 7) { + /* always set temp clauses */ + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, + S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, (1 << 8), 0xFFFFFFFF, NULL); + r600_pipe_state_add_reg(rstate, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1, + S_028838_PS_GPRS(0x1e) | + S_028838_VS_GPRS(0x1e) | + S_028838_GS_GPRS(0x1e) | + S_028838_ES_GPRS(0x1e) | + S_028838_HS_GPRS(0x1e) | + S_028838_LS_GPRS(0x1e), 0xFFFFFFFF, NULL); /* workaround for hw issues with dyn gpr - must set all limits to 240 instead of 0, 0x1e == 240 / 8*/ + } else { + tmp = 0; + tmp |= S_008C04_NUM_PS_GPRS(num_ps_gprs); + tmp |= S_008C04_NUM_VS_GPRS(num_vs_gprs); + tmp |= S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs); + r600_pipe_state_add_reg(rstate, R_008C04_SQ_GPR_RESOURCE_MGMT_1, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C08_NUM_GS_GPRS(num_gs_gprs); + tmp |= S_008C08_NUM_ES_GPRS(num_es_gprs); + r600_pipe_state_add_reg(rstate, R_008C08_SQ_GPR_RESOURCE_MGMT_2, tmp, 0xFFFFFFFF, NULL); + + tmp = 0; + tmp |= S_008C0C_NUM_HS_GPRS(num_hs_gprs); + tmp |= S_008C0C_NUM_HS_GPRS(num_ls_gprs); + r600_pipe_state_add_reg(rstate, R_008C0C_SQ_GPR_RESOURCE_MGMT_3, tmp, 0xFFFFFFFF, NULL); + } tmp = 0; tmp |= S_008C18_NUM_PS_THREADS(num_ps_threads); @@ -1769,45 +1780,32 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx) } void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx, - struct r600_pipe_state *rstate, - struct r600_resource *rbuffer, - unsigned offset, unsigned stride) + struct r600_pipe_resource_state *rstate) { rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - r600_pipe_state_add_reg(rstate, R_030000_RESOURCE0_WORD0, - offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R_030004_RESOURCE0_WORD1, - rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030008_RESOURCE0_WORD2, - S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | - S_030008_STRIDE(stride), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03000C_RESOURCE0_WORD3, - S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | - S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | - S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | - S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W), - 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_030018_RESOURCE0_WORD6, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03001C_RESOURCE0_WORD7, - 0xC0000000, 0xFFFFFFFF, NULL); + + rstate->val[0] = 0; + rstate->bo[0] = NULL; + rstate->val[1] = 0; + rstate->val[2] = S_030008_ENDIAN_SWAP(r600_endian_swap(32)); + rstate->val[3] = S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | + S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | + S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | + S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W); + rstate->val[4] = 0; + rstate->val[5] = 0; + rstate->val[6] = 0; + rstate->val[7] = 0xc0000000; } -void evergreen_pipe_mod_buffer_resource(struct r600_pipe_state *rstate, +void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride) { - rstate->nregs = 0; - r600_pipe_state_mod_reg_bo(rstate, offset, rbuffer->bo); - r600_pipe_state_mod_reg(rstate, rbuffer->bo_size - offset - 1); - r600_pipe_state_mod_reg(rstate, S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | - S_030008_STRIDE(stride)); - rstate->nregs = 8; - + rstate->bo[0] = rbuffer->bo; + rstate->val[0] = offset; + rstate->val[1] = rbuffer->bo_size - offset - 1; + rstate->val[2] = S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | + S_030008_STRIDE(stride); } diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index ee0c7c9ed9b..d795f5757ed 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -171,6 +171,10 @@ #define S_008C0C_NUM_LS_GPRS(x) (((x) & 0xFF) << 16) #define G_008C0C_NUM_LS_GPRS(x) (((x) >> 16) & 0xFF) #define C_008C0C_NUM_LS_GPRS(x) 0xFF00FFFF + +#define R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 0x00008C10 +#define R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 0x00008C14 + #define R_008C18_SQ_THREAD_RESOURCE_MGMT_1 0x00008C18 #define S_008C18_NUM_PS_THREADS(x) (((x) & 0xFF) << 0) #define G_008C18_NUM_PS_THREADS(x) (((x) >> 0) & 0xFF) @@ -1637,6 +1641,12 @@ #define R_028818_PA_CL_VTE_CNTL 0x00028818 #define R_028820_PA_CL_NANINF_CNTL 0x00028820 #define R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1 0x00028838 +#define S_028838_PS_GPRS(x) (((x) & 0x1F) << 0) +#define S_028838_VS_GPRS(x) (((x) & 0x1F) << 5) +#define S_028838_GS_GPRS(x) (((x) & 0x1F) << 10) +#define S_028838_ES_GPRS(x) (((x) & 0x1F) << 15) +#define S_028838_HS_GPRS(x) (((x) & 0x1F) << 20) +#define S_028838_LS_GPRS(x) (((x) & 0x1F) << 25) #define R_028840_SQ_PGM_START_PS 0x00028840 #define R_02884C_SQ_PGM_EXPORTS_PS 0x0002884C #define S_02884C_EXPORT_COLORS(x) (((x) & 0xF) << 1) @@ -1948,6 +1958,4 @@ #define CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0 0x28c38 #define CM_R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1 0x28c3c -#define CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1 0x00008C10 -#define CM_R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2 0x00008C14 #endif diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 23e7181a86e..bf7138d9e4e 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -30,6 +30,7 @@ #include <stdint.h> #include <stdio.h> #include <util/u_double_list.h> +#include <util/u_inlines.h> #include <pipe/p_compiler.h> #define RADEON_CTX_MAX_PM4 (64 * 1024 / 4) @@ -47,33 +48,6 @@ struct winsys_handle; enum radeon_family { CHIP_UNKNOWN, - CHIP_R100, - CHIP_RV100, - CHIP_RS100, - CHIP_RV200, - CHIP_RS200, - CHIP_R200, - CHIP_RV250, - CHIP_RS300, - CHIP_RV280, - CHIP_R300, - CHIP_R350, - CHIP_RV350, - CHIP_RV380, - CHIP_R420, - CHIP_R423, - CHIP_RV410, - CHIP_RS400, - CHIP_RS480, - CHIP_RS600, - CHIP_RS690, - CHIP_RS740, - CHIP_RV515, - CHIP_R520, - CHIP_RV530, - CHIP_RV560, - CHIP_RV570, - CHIP_R580, CHIP_R600, CHIP_RV610, CHIP_RV630, @@ -130,14 +104,24 @@ struct r600_bo *r600_bo_handle(struct radeon *radeon, unsigned handle, unsigned *array_mode); void *r600_bo_map(struct radeon *radeon, struct r600_bo *bo, unsigned usage, void *ctx); void r600_bo_unmap(struct radeon *radeon, struct r600_bo *bo); -void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, - struct r600_bo *src); boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *pb_bo, unsigned stride, struct winsys_handle *whandle); static INLINE unsigned r600_bo_offset(struct r600_bo *bo) { return 0; } +void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo); + +/* this relies on the pipe_reference being the first member of r600_bo */ +static INLINE void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, struct r600_bo *src) +{ + struct r600_bo *old = *dst; + + if (pipe_reference((struct pipe_reference *)(*dst), (struct pipe_reference *)src)) { + r600_bo_destroy(radeon, old); + } + *dst = src; +} /* R600/R700 STATES */ @@ -170,8 +154,17 @@ struct r600_pipe_state { struct r600_pipe_reg regs[R600_BLOCK_MAX_REG]; }; +struct r600_pipe_resource_state { + unsigned id; + u32 val[8]; + struct r600_bo *bo[2]; +}; + #define R600_BLOCK_STATUS_ENABLED (1 << 0) #define R600_BLOCK_STATUS_DIRTY (1 << 1) +#define R600_BLOCK_STATUS_RESOURCE_DIRTY (1 << 2) + +#define R600_BLOCK_STATUS_RESOURCE_VERTEX (1 << 3) struct r600_block_reloc { struct r600_bo *bo; @@ -182,6 +175,7 @@ struct r600_block_reloc { struct r600_block { struct list_head list; + struct list_head enable_list; unsigned status; unsigned flags; unsigned start_offset; @@ -245,6 +239,8 @@ struct r600_context { unsigned nblocks; struct r600_block **blocks; struct list_head dirty; + struct list_head resource_dirty; + struct list_head enable_list; unsigned pm4_ndwords; unsigned pm4_cdwords; unsigned pm4_dirty_cdwords; @@ -261,6 +257,10 @@ struct r600_context { unsigned num_dest_buffers; unsigned flags; boolean predicate_drawing; + struct r600_range ps_resources; + struct r600_range vs_resources; + struct r600_range fs_resources; + int num_ps_resources, num_vs_resources, num_fs_resources; }; struct r600_draw { @@ -275,9 +275,9 @@ struct r600_draw { int r600_context_init(struct r600_context *ctx, struct radeon *radeon); void r600_context_fini(struct r600_context *ctx); void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state); -void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); -void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); -void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); +void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); +void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); +void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void r600_context_flush(struct r600_context *ctx); @@ -303,9 +303,9 @@ void r600_context_flush_dest_caches(struct r600_context *ctx); int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon); void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *draw); void evergreen_context_flush_dest_caches(struct r600_context *ctx); -void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); -void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); -void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid); +void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); +void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); +void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void evergreen_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 65e539eba35..3196d97dbbb 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1088,7 +1088,7 @@ static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *al /* Collect required cache lines. */ for (i = 0; i < 3; ++i) { - bool found = false; + boolean found = false; unsigned int line; if (alu->src[i].sel < 512) @@ -1140,7 +1140,7 @@ static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *al /* Setup the kcache lines. */ for (i = 0; i < count; ++i) { - bool found = false; + boolean found = false; for (j = 0; j < 2; ++j) { if (kcache[j].mode == V_SQ_CF_KCACHE_LOCK_2 && diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 8002d943abd..d92b74ebc4e 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -82,12 +82,12 @@ struct r600_screen { struct r600_pipe_sampler_view { struct pipe_sampler_view base; - struct r600_pipe_state state; + struct r600_pipe_resource_state state; }; struct r600_pipe_rasterizer { struct r600_pipe_state rstate; - bool flatshade; + boolean flatshade; unsigned sprite_coord_enable; float offset_units; float offset_scale; @@ -173,7 +173,7 @@ struct r600_pipe_context { struct r600_pipe_state *states[R600_PIPE_NSTATES]; struct r600_context ctx; struct r600_vertex_element *vertex_elements; - struct r600_pipe_state fs_resource[PIPE_MAX_ATTRIBS]; + struct r600_pipe_resource_state fs_resource[PIPE_MAX_ATTRIBS]; struct pipe_framebuffer_state framebuffer; struct pipe_index_buffer index_buffer; unsigned cb_target_mask; @@ -185,25 +185,25 @@ struct r600_pipe_context { struct r600_pipe_shader *ps_shader; struct r600_pipe_shader *vs_shader; struct r600_pipe_state vs_const_buffer; - struct r600_pipe_state vs_const_buffer_resource[R600_MAX_CONST_BUFFERS]; + struct r600_pipe_resource_state vs_const_buffer_resource[R600_MAX_CONST_BUFFERS]; struct r600_pipe_state ps_const_buffer; - struct r600_pipe_state ps_const_buffer_resource[R600_MAX_CONST_BUFFERS]; + struct r600_pipe_resource_state ps_const_buffer_resource[R600_MAX_CONST_BUFFERS]; struct r600_pipe_rasterizer *rasterizer; struct r600_pipe_state vgt; struct r600_pipe_state spi; /* shader information */ unsigned sprite_coord_enable; - bool flatshade; - bool export_16bpc; + boolean flatshade; + boolean export_16bpc; unsigned alpha_ref; - bool alpha_ref_dirty; + boolean alpha_ref_dirty; struct r600_textures_info ps_samplers; struct r600_pipe_fences fences; struct u_vbuf_mgr *vbuf_mgr; struct util_slab_mempool pool_transfers; - bool blit; + boolean blit; }; @@ -224,10 +224,8 @@ void evergreen_fetch_shader(struct pipe_context *ctx, struct r600_vertex_element void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx); void evergreen_polygon_offset_update(struct r600_pipe_context *rctx); void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx, - struct r600_pipe_state *rstate, - struct r600_resource *rbuffer, - unsigned offset, unsigned stride); -void evergreen_pipe_mod_buffer_resource(struct r600_pipe_state *rstate, + struct r600_pipe_resource_state *rstate); +void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride); @@ -268,10 +266,8 @@ void r600_fetch_shader(struct pipe_context *ctx, struct r600_vertex_element *ve) void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx); void r600_polygon_offset_update(struct r600_pipe_context *rctx); void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx, - struct r600_pipe_state *rstate, - struct r600_resource *rbuffer, - unsigned offset, unsigned stride); -void r600_pipe_mod_buffer_resource(struct r600_pipe_state *rstate, + struct r600_pipe_resource_state *rstate); +void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 46fdbfed34a..a6cfa704ca5 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -414,7 +414,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_pipe_sampler_view *resource = CALLOC_STRUCT(r600_pipe_sampler_view); - struct r600_pipe_state *rstate; + struct r600_pipe_resource_state *rstate; const struct util_format_description *desc; struct r600_resource_texture *tmp; struct r600_resource *rbuffer; @@ -477,33 +477,29 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c depth = texture->array_size; } - r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, - S_038000_DIM(r600_tex_dim(texture->target)) | - S_038000_TILE_MODE(array_mode) | - S_038000_TILE_TYPE(tile_type) | - S_038000_PITCH((pitch / 8) - 1) | - S_038000_TEX_WIDTH(texture->width0 - 1), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, - S_038004_TEX_HEIGHT(height - 1) | - S_038004_TEX_DEPTH(depth - 1) | - S_038004_DATA_FORMAT(format), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, - (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]); - r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, - (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]); - r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, - word4 | - S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | - S_038010_REQUEST_SIZE(1) | - S_038010_ENDIAN_SWAP(endian) | - S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, - S_038014_LAST_LEVEL(state->u.tex.last_level) | - S_038014_BASE_ARRAY(state->u.tex.first_layer) | - S_038014_LAST_ARRAY(state->u.tex.last_layer), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, - S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE) | - S_038018_MAX_ANISO(4 /* max 16 samples */), 0xFFFFFFFF, NULL); + rstate->bo[0] = bo[0]; + rstate->bo[1] = bo[1]; + + rstate->val[0] = (S_038000_DIM(r600_tex_dim(texture->target)) | + S_038000_TILE_MODE(array_mode) | + S_038000_TILE_TYPE(tile_type) | + S_038000_PITCH((pitch / 8) - 1) | + S_038000_TEX_WIDTH(texture->width0 - 1)); + rstate->val[1] = (S_038004_TEX_HEIGHT(height - 1) | + S_038004_TEX_DEPTH(depth - 1) | + S_038004_DATA_FORMAT(format)); + rstate->val[2] = (tmp->offset[0] + r600_bo_offset(bo[0])) >> 8; + rstate->val[3] = (tmp->offset[1] + r600_bo_offset(bo[1])) >> 8; + rstate->val[4] = (word4 | + S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | + S_038010_REQUEST_SIZE(1) | + S_038010_ENDIAN_SWAP(endian) | + S_038010_BASE_LEVEL(state->u.tex.first_level)); + rstate->val[5] = (S_038014_LAST_LEVEL(state->u.tex.last_level) | + S_038014_BASE_ARRAY(state->u.tex.first_layer) | + S_038014_LAST_ARRAY(state->u.tex.last_layer)); + rstate->val[6] = (S_038018_TYPE(V_038010_SQ_TEX_VTX_VALID_TEXTURE) | + S_038018_MAX_ANISO(4 /* max 16 samples */)); return &resource->base; } @@ -1486,37 +1482,27 @@ void *r600_create_db_flush_dsa(struct r600_pipe_context *rctx) } void r600_pipe_init_buffer_resource(struct r600_pipe_context *rctx, - struct r600_pipe_state *rstate, - struct r600_resource *rbuffer, - unsigned offset, unsigned stride) + struct r600_pipe_resource_state *rstate) { rstate->id = R600_PIPE_STATE_RESOURCE; - rstate->nregs = 0; - r600_pipe_state_add_reg(rstate, R_038000_RESOURCE0_WORD0, - offset, 0xFFFFFFFF, rbuffer->bo); - r600_pipe_state_add_reg(rstate, R_038004_RESOURCE0_WORD1, - rbuffer->bo_size - offset - 1, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038008_RESOURCE0_WORD2, - S_038008_ENDIAN_SWAP(r600_endian_swap(32)) | - S_038008_STRIDE(stride), 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_03800C_RESOURCE0_WORD3, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038014_RESOURCE0_WORD5, - 0x00000000, 0xFFFFFFFF, NULL); - r600_pipe_state_add_reg(rstate, R_038018_RESOURCE0_WORD6, - 0xC0000000, 0xFFFFFFFF, NULL); + + rstate->bo[0] = NULL; + rstate->val[0] = 0; + rstate->val[1] = 0; + rstate->val[2] = 0; + rstate->val[3] = 0; + rstate->val[4] = 0; + rstate->val[5] = 0; + rstate->val[6] = 0xc0000000; } -void r600_pipe_mod_buffer_resource(struct r600_pipe_state *rstate, +void r600_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride) { - rstate->nregs = 0; - r600_pipe_state_mod_reg_bo(rstate, offset, rbuffer->bo); - r600_pipe_state_mod_reg(rstate, rbuffer->bo_size - offset - 1); - r600_pipe_state_mod_reg(rstate, S_038008_ENDIAN_SWAP(r600_endian_swap(32)) | - S_038008_STRIDE(stride)); - rstate->nregs = 7; + rstate->val[0] = offset; + rstate->bo[0] = rbuffer->bo; + rstate->val[1] = rbuffer->bo_size - offset - 1; + rstate->val[2] = S_038008_ENDIAN_SWAP(r600_endian_swap(32)) | + S_038008_STRIDE(stride); } diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 48ab15f9323..a670ac02be2 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -318,12 +318,10 @@ void r600_delete_vs_shader(struct pipe_context *ctx, void *state) static void r600_update_alpha_ref(struct r600_pipe_context *rctx) { - unsigned alpha_ref = rctx->alpha_ref; + unsigned alpha_ref; struct r600_pipe_state rstate; - if (!rctx->alpha_ref_dirty) - return; - + alpha_ref = rctx->alpha_ref; rstate.nregs = 0; if (rctx->export_16bpc) alpha_ref &= ~0x1FFF; @@ -388,7 +386,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, { struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_resource_buffer *rbuffer = r600_buffer(buffer); - struct r600_pipe_state *rstate; + struct r600_pipe_resource_state *rstate; uint32_t offset; /* Note that the state tracker can unbind constant buffers by @@ -416,9 +414,9 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, rstate = &rctx->vs_const_buffer_resource[index]; if (!rstate->id) { if (rctx->family >= CHIP_CEDAR) { - evergreen_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + evergreen_pipe_init_buffer_resource(rctx, rstate); } else { - r600_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + r600_pipe_init_buffer_resource(rctx, rstate); } } @@ -444,9 +442,9 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, rstate = &rctx->ps_const_buffer_resource[index]; if (!rstate->id) { if (rctx->family >= CHIP_CEDAR) { - evergreen_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + evergreen_pipe_init_buffer_resource(rctx, rstate); } else { - r600_pipe_init_buffer_resource(rctx, rstate, &rbuffer->r, offset, 16); + r600_pipe_init_buffer_resource(rctx, rstate); } } if (rctx->family >= CHIP_CEDAR) { @@ -468,7 +466,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) { - struct r600_pipe_state *rstate; + struct r600_pipe_resource_state *rstate; struct r600_resource *rbuffer; struct pipe_vertex_buffer *vertex_buffer; unsigned i, count, offset; @@ -503,9 +501,9 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) if (!rstate->id) { if (rctx->family >= CHIP_CEDAR) { - evergreen_pipe_init_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride); + evergreen_pipe_init_buffer_resource(rctx, rstate); } else { - r600_pipe_init_buffer_resource(rctx, rstate, rbuffer, offset, vertex_buffer->stride); + r600_pipe_init_buffer_resource(rctx, rstate); } } @@ -595,7 +593,8 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) return; } - r600_update_alpha_ref(rctx); + if (rctx->alpha_ref_dirty) + r600_update_alpha_ref(rctx); mask = 0; for (int i = 0; i < rctx->framebuffer.nr_cbufs; i++) { diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index bb72cf63bc5..ac2e65b988e 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -83,7 +83,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_SM3: return 1; case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; + return 1; case PIPE_CAP_POINT_SPRITE: return 1; case PIPE_CAP_MAX_RENDER_TARGETS: @@ -163,7 +163,7 @@ softpipe_get_paramf(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_MAX_POINT_WIDTH_AA: return 255.0; /* arbitrary */ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: - return 16.0; /* not actually signficant at this time */ + return 16.0; case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: return 16.0; /* arbitrary */ default: diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 1446aee2aa4..90766f4119c 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -1709,6 +1709,317 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler, } +/* For anisotropic filtering */ +#define WEIGHT_LUT_SIZE 1024 + +static float *weightLut = NULL; + +/** + * Creates the look-up table used to speed-up EWA sampling + */ +static void +create_filter_table(void) +{ + unsigned i; + if (!weightLut) { + weightLut = (float *) malloc(WEIGHT_LUT_SIZE * sizeof(float)); + + for (i = 0; i < WEIGHT_LUT_SIZE; ++i) { + float alpha = 2; + float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1); + float weight = (float) exp(-alpha * r2); + weightLut[i] = weight; + } + } +} + + +/** + * Elliptical weighted average (EWA) filter for producing high quality + * anisotropic filtered results. + * Based on the Higher Quality Elliptical Weighted Avarage Filter + * published by Paul S. Heckbert in his Master's Thesis + * "Fundamentals of Texture Mapping and Image Warping" (1989) + */ +static void +img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, + const float dudx, const float dvdx, + const float dudy, const float dvdy, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; + + unsigned level0 = samp->level > 0 ? samp->level : 0; + float scaling = 1.0 / (1 << level0); + int width = u_minify(texture->width0, level0); + int height = u_minify(texture->height0, level0); + + float ux = dudx * scaling; + float vx = dvdx * scaling; + float uy = dudy * scaling; + float vy = dvdy * scaling; + + /* compute ellipse coefficients to bound the region: + * A*x*x + B*x*y + C*y*y = F. + */ + float A = vx*vx+vy*vy+1; + float B = -2*(ux*vx+uy*vy); + float C = ux*ux+uy*uy+1; + float F = A*C-B*B/4.0; + + /* check if it is an ellipse */ + /* ASSERT(F > 0.0); */ + + /* Compute the ellipse's (u,v) bounding box in texture space */ + float d = -B*B+4.0*C*A; + float box_u = 2.0 / d * sqrt(d*C*F); /* box_u -> half of bbox with */ + float box_v = 2.0 / d * sqrt(A*d*F); /* box_v -> half of bbox height */ + + float rgba_temp[NUM_CHANNELS][QUAD_SIZE]; + float s_buffer[QUAD_SIZE]; + float t_buffer[QUAD_SIZE]; + float weight_buffer[QUAD_SIZE]; + unsigned buffer_next; + int j; + float den;// = 0.0F; + float ddq; + float U;// = u0 - tex_u; + int v; + + /* Scale ellipse formula to directly index the Filter Lookup Table. + * i.e. scale so that F = WEIGHT_LUT_SIZE-1 + */ + double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F; + A *= formScale; + B *= formScale; + C *= formScale; + /* F *= formScale; */ /* no need to scale F as we don't use it below here */ + + /* For each quad, the du and dx values are the same and so the ellipse is + * also the same. Note that texel/image access can only be performed using + * a quad, i.e. it is not possible to get the pixel value for a single + * tex coord. In order to have a better performance, the access is buffered + * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is full, + * then the pixel values are read from the image. + */ + ddq = 2 * A; + + for (j = 0; j < QUAD_SIZE; j++) { + /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse + * and incrementally update the value of Ax^2+Bxy*Cy^2; when this + * value, q, is less than F, we're inside the ellipse + */ + float tex_u=-0.5 + s[j] * texture->width0 * scaling; + float tex_v=-0.5 + t[j] * texture->height0 * scaling; + + int u0 = floor(tex_u - box_u); + int u1 = ceil (tex_u + box_u); + int v0 = floor(tex_v - box_v); + int v1 = ceil (tex_v + box_v); + + float num[4] = {0.0F, 0.0F, 0.0F, 0.0F}; + buffer_next = 0; + den = 0; + U = u0 - tex_u; + for (v = v0; v <= v1; ++v) { + float V = v - tex_v; + float dq = A * (2 * U + 1) + B * V; + float q = (C * V + B * U) * V + A * U * U; + + int u; + for (u = u0; u <= u1; ++u) { + /* Note that the ellipse has been pre-scaled so F = WEIGHT_LUT_SIZE - 1 */ + if (q < WEIGHT_LUT_SIZE) { + /* as a LUT is used, q must never be negative; + * should not happen, though + */ + const int qClamped = q >= 0.0F ? q : 0; + float weight = weightLut[qClamped]; + + weight_buffer[buffer_next] = weight; + s_buffer[buffer_next] = u / ((float) width); + t_buffer[buffer_next] = v / ((float) height); + + buffer_next++; + if (buffer_next == QUAD_SIZE) { + /* 4 texel coords are in the buffer -> read it now */ + int jj; + /* it is assumed that samp->min_img_filter is set to + * img_filter_2d_nearest or one of the + * accelerated img_filter_2d_nearest_XXX functions. + */ + samp->min_img_filter(tgsi_sampler, s_buffer, t_buffer, p, NULL, + tgsi_sampler_lod_bias, rgba_temp); + for (jj = 0; jj < buffer_next; jj++) { + num[0] += weight_buffer[jj] * rgba_temp[0][jj]; + num[1] += weight_buffer[jj] * rgba_temp[1][jj]; + num[2] += weight_buffer[jj] * rgba_temp[2][jj]; + num[3] += weight_buffer[jj] * rgba_temp[3][jj]; + } + + buffer_next = 0; + } + + den += weight; + } + q += dq; + dq += ddq; + } + } + + /* if the tex coord buffer contains unread values, we will read them now. + * Note that in most cases we have to read more pixel values than required, + * however, as the img_filter_2d_nearest function(s) does not have a count + * parameter, we need to read the whole quad and ignore the unused values + */ + if (buffer_next > 0) { + int jj; + /* it is assumed that samp->min_img_filter is set to + * img_filter_2d_nearest or one of the + * accelerated img_filter_2d_nearest_XXX functions. + */ + samp->min_img_filter(tgsi_sampler, s_buffer, t_buffer, p, NULL, + tgsi_sampler_lod_bias, rgba_temp); + for (jj = 0; jj < buffer_next; jj++) { + num[0] += weight_buffer[jj] * rgba_temp[0][jj]; + num[1] += weight_buffer[jj] * rgba_temp[1][jj]; + num[2] += weight_buffer[jj] * rgba_temp[2][jj]; + num[3] += weight_buffer[jj] * rgba_temp[3][jj]; + } + } + + if (den <= 0.0F) { + /* Reaching this place would mean + * that no pixels intersected the ellipse. + * This should never happen because + * the filter we use always + * intersects at least one pixel. + */ + + /*rgba[0]=0; + rgba[1]=0; + rgba[2]=0; + rgba[3]=0;*/ + /* not enough pixels in resampling, resort to direct interpolation */ + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba_temp); + den = 1; + num[0] = rgba_temp[0][j]; + num[1] = rgba_temp[1][j]; + num[2] = rgba_temp[2][j]; + num[3] = rgba_temp[3][j]; + } + + rgba[0][j] = num[0] / den; + rgba[1][j] = num[1] / den; + rgba[2][j] = num[2] / den; + rgba[3][j] = num[3] / den; + } +} + + +/** + * Sample 2D texture using an anisotropic filter. + */ +static void +mip_filter_linear_aniso(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + const float c0[QUAD_SIZE], + enum tgsi_sampler_control control, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); + const struct pipe_resource *texture = samp->view->texture; + int level0; + float lambda; + float lod[QUAD_SIZE]; + + float s_to_u = u_minify(texture->width0, samp->view->u.tex.first_level); + float t_to_v = u_minify(texture->height0, samp->view->u.tex.first_level); + float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u; + float dudy = (s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]) * s_to_u; + float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v; + float dvdy = (t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]) * t_to_v; + + if (control == tgsi_sampler_lod_bias) { + /* note: instead of working with Px and Py, we will use the + * squared length instead, to avoid sqrt. + */ + float Px2 = dudx * dudx + dvdx * dvdx; + float Py2 = dudy * dudy + dvdy * dvdy; + + float Pmax2; + float Pmin2; + float e; + const float maxEccentricity = samp->sampler->max_anisotropy * samp->sampler->max_anisotropy; + + if (Px2 < Py2) { + Pmax2 = Py2; + Pmin2 = Px2; + } + else { + Pmax2 = Px2; + Pmin2 = Py2; + } + + /* if the eccentricity of the ellipse is too big, scale up the shorter + * of the two vectors to limit the maximum amount of work per pixel + */ + e = Pmax2 / Pmin2; + if (e > maxEccentricity) { + /* float s=e / maxEccentricity; + minor[0] *= s; + minor[1] *= s; + Pmin2 *= s; */ + Pmin2 = Pmax2 / maxEccentricity; + } + + /* note: we need to have Pmin=sqrt(Pmin2) here, but we can avoid + * this since 0.5*log(x) = log(sqrt(x)) + */ + lambda = 0.5 * util_fast_log2(Pmin2) + samp->sampler->lod_bias; + compute_lod(samp->sampler, lambda, c0, lod); + } + else { + assert(control == tgsi_sampler_lod_explicit); + + memcpy(lod, c0, sizeof(lod)); + } + + /* XXX: Take into account all lod values. + */ + lambda = lod[0]; + level0 = samp->view->u.tex.first_level + (int)lambda; + + /* If the ellipse covers the whole image, we can + * simply return the average of the whole image. + */ + if (level0 >= texture->last_level) { + samp->level = texture->last_level; + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba); + } + else { + /* don't bother interpolating between multiple LODs; it doesn't + * seem to be worth the extra running time. + */ + samp->level = level0; + img_filter_2d_ewa(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, + dudx, dvdx, dudy, dvdy, rgba); + } + + if (DEBUG_TEX) { + print_sample(__FUNCTION__, rgba); + } +} + + /** * Specialized version of mip_filter_linear with hard-wired calls to @@ -2316,14 +2627,33 @@ sp_create_sampler_variant( const struct pipe_sampler_state *sampler, sampler->normalized_coords && sampler->wrap_s == PIPE_TEX_WRAP_REPEAT && sampler->wrap_t == PIPE_TEX_WRAP_REPEAT && - sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR) - { + sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR) { samp->mip_filter = mip_filter_linear_2d_linear_repeat_POT; } - else - { + else { samp->mip_filter = mip_filter_linear; } + + /* Anisotropic filtering extension. */ + if (sampler->max_anisotropy > 1) { + samp->mip_filter = mip_filter_linear_aniso; + + /* Override min_img_filter: + * min_img_filter needs to be set to NEAREST since we need to access + * each texture pixel as it is and weight it later; using linear + * filters will have incorrect results. + * By setting the filter to NEAREST here, we can avoid calling the + * generic img_filter_2d_nearest in the anisotropic filter function, + * making it possible to use one of the accelerated implementations + */ + samp->min_img_filter = get_img_filter(key, PIPE_TEX_FILTER_NEAREST, sampler); + + /* on first access create the lookup table containing the filter weights. */ + if (!weightLut) { + create_filter_table(); + } + } + break; } diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c index 1ed1d5d25bb..ebcd4bcaf10 100644 --- a/src/gallium/drivers/svga/svga_cmd.c +++ b/src/gallium/drivers/svga/svga_cmd.c @@ -46,10 +46,10 @@ * to have allocated the fifo space before converting. * * Results: - * id is filld out. + * id is filled out. * * Side effects: - * One surface relocation is preformed for texture handle. + * One surface relocation is performed for texture handle. * *---------------------------------------------------------------------- */ @@ -224,7 +224,7 @@ SVGA3D_DestroyContext(struct svga_winsys_context *swc) // IN * containers for host VRAM objects like textures, vertex * buffers, and depth/stencil buffers. * - * Surfaces are hierarchial: + * Surfaces are hierarchical: * * - Surface may have multiple faces (for cube maps) * @@ -376,11 +376,9 @@ SVGA3D_DestroySurface(struct svga_winsys_context *swc, /* *---------------------------------------------------------------------- * - * SVGA3D_BeginSurfaceDMA-- + * SVGA3D_SurfaceDMA-- * - * Begin a SURFACE_DMA command. This reserves space for it in - * the FIFO, and returns a pointer to the command's box array. - * This function must be paired with SVGA_FIFOCommitAll(). + * Emit a SURFACE_DMA command. * * When the SVGA3D device asynchronously processes this FIFO * command, a DMA operation is performed between host VRAM and diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 86ef255cd2e..d442c15c02a 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -372,19 +372,21 @@ struct pipe_resource unsigned flags; /**< bitmask of PIPE_RESOURCE_FLAG_x */ }; + +/** + * Stream output for vertex transform feedback. + */ struct pipe_stream_output_state { - /**< number of the output buffer to insert each element into */ + /** number of the output buffer to insert each element into */ int output_buffer[PIPE_MAX_SHADER_OUTPUTS]; - /**< which register to grab each output from */ + /** which register to grab each output from */ int register_index[PIPE_MAX_SHADER_OUTPUTS]; - /**< TGSI_WRITEMASK signifying which components to output */ + /** TGSI_WRITEMASK signifying which components to output */ ubyte register_mask[PIPE_MAX_SHADER_OUTPUTS]; - /**< number of outputs */ + /** number of outputs */ int num_outputs; - - /**< stride for an entire vertex, only used if all output_buffers - * are 0 */ + /** stride for an entire vertex, only used if all output_buffers are 0 */ unsigned stride; }; diff --git a/src/gallium/state_trackers/egl/common/egl_g3d.c b/src/gallium/state_trackers/egl/common/egl_g3d.c index 4bd865638a3..29dbbefbf48 100644 --- a/src/gallium/state_trackers/egl/common/egl_g3d.c +++ b/src/gallium/state_trackers/egl/common/egl_g3d.c @@ -259,6 +259,10 @@ init_config_attributes(_EGLConfig *conf, const struct native_config *nconf, conf->DepthSize = depth_stencil[0]; conf->StencilSize = depth_stencil[1]; + /* st/vega will allocate the mask on demand */ + if (api_mask & EGL_OPENVG_BIT) + conf->AlphaMaskSize = 8; + conf->SurfaceType = surface_type; conf->NativeRenderable = EGL_TRUE; diff --git a/src/gallium/state_trackers/egl/drm/native_drm.c b/src/gallium/state_trackers/egl/drm/native_drm.c index c89a6d4767e..725fe28e4e2 100644 --- a/src/gallium/state_trackers/egl/drm/native_drm.c +++ b/src/gallium/state_trackers/egl/drm/native_drm.c @@ -33,10 +33,6 @@ #include "native_drm.h" -/* see get_drm_screen_name */ -#include <radeon_drm.h> -#include "radeon/drm/radeon_drm_public.h" - #ifdef HAVE_LIBUDEV #include <libudev.h> #endif @@ -140,27 +136,6 @@ drm_display_destroy(struct native_display *ndpy) FREE(drmdpy); } -static const char * -get_drm_screen_name(int fd, drmVersionPtr version) -{ - const char *name = version->name; - - if (name && !strcmp(name, "radeon")) { - int chip_id; - struct drm_radeon_info info; - - memset(&info, 0, sizeof(info)); - info.request = RADEON_INFO_DEVICE_ID; - info.value = pointer_to_intptr(&chip_id); - if (drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)) != 0) - return NULL; - - name = is_r3xx(chip_id) ? "r300" : "r600"; - } - - return name; -} - /** * Initialize KMS and pipe screen. */ @@ -169,7 +144,6 @@ drm_display_init_screen(struct native_display *ndpy) { struct drm_display *drmdpy = drm_display(ndpy); drmVersionPtr version; - const char *name; version = drmGetVersion(drmdpy->fd); if (!version) { @@ -177,11 +151,8 @@ drm_display_init_screen(struct native_display *ndpy) return FALSE; } - name = get_drm_screen_name(drmdpy->fd, version); - if (name) { - drmdpy->base.screen = - drmdpy->event_handler->new_drm_screen(&drmdpy->base, name, drmdpy->fd); - } + drmdpy->base.screen = + drmdpy->event_handler->new_drm_screen(&drmdpy->base, NULL, drmdpy->fd); drmFreeVersion(version); if (!drmdpy->base.screen) { diff --git a/src/gallium/state_trackers/egl/wayland/native_drm.c b/src/gallium/state_trackers/egl/wayland/native_drm.c index f643c7cbbba..15383e89301 100644 --- a/src/gallium/state_trackers/egl/wayland/native_drm.c +++ b/src/gallium/state_trackers/egl/wayland/native_drm.c @@ -37,10 +37,6 @@ #include "native_wayland.h" -/* see get_drm_screen_name */ -#include <radeon_drm.h> -#include "radeon/drm/radeon_drm_public.h" - #include <wayland-client.h> #include "wayland-drm-client-protocol.h" #include "wayland-egl-priv.h" @@ -143,27 +139,6 @@ wayland_create_drm_buffer(struct wayland_display *display, width, height, wsh.stride, visual); } -static const char * -get_drm_screen_name(int fd, drmVersionPtr version) -{ - const char *name = version->name; - - if (name && !strcmp(name, "radeon")) { - int chip_id; - struct drm_radeon_info info; - - memset(&info, 0, sizeof(info)); - info.request = RADEON_INFO_DEVICE_ID; - info.value = pointer_to_intptr(&chip_id); - if (drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)) != 0) - return NULL; - - name = is_r3xx(chip_id) ? "r300" : "r600"; - } - - return name; -} - static void drm_handle_device(void *data, struct wl_drm *drm, const char *device) { @@ -202,8 +177,6 @@ static boolean wayland_drm_display_init_screen(struct native_display *ndpy) { struct wayland_drm_display *drmdpy = wayland_drm_display(ndpy); - drmVersionPtr version; - const char *driver_name; uint32_t id; id = wl_display_get_global(drmdpy->base.dpy, "wl_drm", 1); @@ -226,20 +199,9 @@ wayland_drm_display_init_screen(struct native_display *ndpy) if (!drmdpy->authenticated) return FALSE; - version = drmGetVersion(drmdpy->fd); - if (!version) { - _eglLog(_EGL_WARNING, "invalid fd %d", drmdpy->fd); - return FALSE; - } - - /* FIXME: share this with native_drm or egl_dri2 */ - driver_name = get_drm_screen_name(drmdpy->fd, version); - drmdpy->base.base.screen = drmdpy->event_handler->new_drm_screen(&drmdpy->base.base, - driver_name, drmdpy->fd); - drmFreeVersion(version); - + NULL, drmdpy->fd); if (!drmdpy->base.base.screen) { _eglLog(_EGL_WARNING, "failed to create DRM screen"); return FALSE; diff --git a/src/gallium/state_trackers/glx/xlib/glx_getproc.c b/src/gallium/state_trackers/glx/xlib/glx_getproc.c index 26fcae78ece..bc29c31ffa7 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_getproc.c +++ b/src/gallium/state_trackers/glx/xlib/glx_getproc.c @@ -168,6 +168,9 @@ static struct name_address_pair GLX_functions[] = { /*** GLX_ARB_get_proc_address ***/ { "glXGetProcAddressARB", (__GLXextFuncPtr) glXGetProcAddressARB }, + /*** GLX_ARB_create_context ***/ + { "glXCreateContextAttribsARB", (__GLXextFuncPtr) glXCreateContextAttribsARB }, + /*** GLX_EXT_texture_from_pixmap ***/ { "glXBindTexImageEXT", (__GLXextFuncPtr) glXBindTexImageEXT }, { "glXReleaseTexImageEXT", (__GLXextFuncPtr) glXReleaseTexImageEXT }, diff --git a/src/gallium/targets/egl/egl.c b/src/gallium/targets/egl/egl.c index 61fe5069e91..3467aea9991 100644 --- a/src/gallium/targets/egl/egl.c +++ b/src/gallium/targets/egl/egl.c @@ -37,6 +37,12 @@ #include "state_tracker/drm_driver.h" #include "common/egl_g3d_loader.h" +#ifdef HAVE_LIBUDEV +#include <libudev.h> +#define DRIVER_MAP_GALLIUM_ONLY +#include "pci_ids/pci_id_driver_map.h" +#endif + #include "egl.h" struct egl_g3d_loader egl_g3d_loader; @@ -306,10 +312,83 @@ get_pipe_module(const char *name) return pmod; } +static char * +drm_fd_get_screen_name(int fd) +{ + char *driver = NULL; +#ifdef HAVE_LIBUDEV + struct udev *udev; + struct udev_device *device, *parent; + struct stat buf; + const char *pci_id; + int vendor_id, chip_id, i, j; + + udev = udev_new(); + if (fstat(fd, &buf) < 0) { + _eglLog(_EGL_WARNING, "failed to stat fd %d", fd); + return NULL; + } + + device = udev_device_new_from_devnum(udev, 'c', buf.st_rdev); + if (device == NULL) { + _eglLog(_EGL_WARNING, + "could not create udev device for fd %d", fd); + return NULL; + } + + parent = udev_device_get_parent(device); + if (parent == NULL) { + _eglLog(_EGL_WARNING, "could not get parent device"); + goto out; + } + + pci_id = udev_device_get_property_value(parent, "PCI_ID"); + if (pci_id == NULL || + sscanf(pci_id, "%x:%x", &vendor_id, &chip_id) != 2) { + _eglLog(_EGL_WARNING, "malformed or no PCI ID"); + goto out; + } + + for (i = 0; driver_map[i].driver; i++) { + if (vendor_id != driver_map[i].vendor_id) + continue; + if (driver_map[i].num_chips_ids == -1) { + driver = strdup(driver_map[i].driver); + _eglLog(_EGL_WARNING, + "pci id for %d: %04x:%04x, driver %s", + fd, vendor_id, chip_id, driver); + goto out; + } + + for (j = 0; j < driver_map[i].num_chips_ids; j++) + if (driver_map[i].chip_ids[j] == chip_id) { + driver = strdup(driver_map[i].driver); + _eglLog(_EGL_WARNING, + "pci id for %d: %04x:%04x, driver %s", + fd, vendor_id, chip_id, driver); + goto out; + } + } + +out: + udev_device_unref(device); + udev_unref(udev); + +#endif + return driver; +} + static struct pipe_screen * create_drm_screen(const char *name, int fd) { - struct pipe_module *pmod = get_pipe_module(name); + struct pipe_module *pmod; + const char *screen_name = name; + + if (screen_name == NULL) + if ((screen_name = drm_fd_get_screen_name(fd)) == NULL) + return NULL; + pmod = get_pipe_module(screen_name); + return (pmod && pmod->drmdd && pmod->drmdd->create_screen) ? pmod->drmdd->create_screen(fd) : NULL; } diff --git a/src/gallium/tests/unit/u_format_test.c b/src/gallium/tests/unit/u_format_test.c new file mode 100644 index 00000000000..f831e5dd754 --- /dev/null +++ b/src/gallium/tests/unit/u_format_test.c @@ -0,0 +1,716 @@ +/************************************************************************** + * + * Copyright 2009-2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdlib.h> +#include <stdio.h> +#include <float.h> + +#include "util/u_half.h" +#include "util/u_format.h" +#include "util/u_format_tests.h" +#include "util/u_format_s3tc.h" + + +static boolean +compare_float(float x, float y) +{ + float error = y - x; + + if (error < 0.0f) + error = -error; + + if (error > FLT_EPSILON) { + return FALSE; + } + + return TRUE; +} + + +static void +print_packed(const struct util_format_description *format_desc, + const char *prefix, + const uint8_t *packed, + const char *suffix) +{ + unsigned i; + const char *sep = ""; + + printf("%s", prefix); + for (i = 0; i < format_desc->block.bits/8; ++i) { + printf("%s%02x", sep, packed[i]); + sep = " "; + } + printf("%s", suffix); + fflush(stdout); +} + + +static void +print_unpacked_rgba_doubl(const struct util_format_description *format_desc, + const char *prefix, + const double unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4], + const char *suffix) +{ + unsigned i, j; + const char *sep = ""; + + printf("%s", prefix); + for (i = 0; i < format_desc->block.height; ++i) { + for (j = 0; j < format_desc->block.width; ++j) { + printf("%s{%f, %f, %f, %f}", sep, unpacked[i][j][0], unpacked[i][j][1], unpacked[i][j][2], unpacked[i][j][3]); + sep = ", "; + } + sep = ",\n"; + } + printf("%s", suffix); + fflush(stdout); +} + + +static void +print_unpacked_rgba_float(const struct util_format_description *format_desc, + const char *prefix, + float unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4], + const char *suffix) +{ + unsigned i, j; + const char *sep = ""; + + printf("%s", prefix); + for (i = 0; i < format_desc->block.height; ++i) { + for (j = 0; j < format_desc->block.width; ++j) { + printf("%s{%f, %f, %f, %f}", sep, unpacked[i][j][0], unpacked[i][j][1], unpacked[i][j][2], unpacked[i][j][3]); + sep = ", "; + } + sep = ",\n"; + } + printf("%s", suffix); + fflush(stdout); +} + + +static void +print_unpacked_rgba_8unorm(const struct util_format_description *format_desc, + const char *prefix, + uint8_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4], + const char *suffix) +{ + unsigned i, j; + const char *sep = ""; + + printf("%s", prefix); + for (i = 0; i < format_desc->block.height; ++i) { + for (j = 0; j < format_desc->block.width; ++j) { + printf("%s{0x%02x, 0x%02x, 0x%02x, 0x%02x}", sep, unpacked[i][j][0], unpacked[i][j][1], unpacked[i][j][2], unpacked[i][j][3]); + sep = ", "; + } + } + printf("%s", suffix); + fflush(stdout); +} + + +static void +print_unpacked_z_float(const struct util_format_description *format_desc, + const char *prefix, + float unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH], + const char *suffix) +{ + unsigned i, j; + const char *sep = ""; + + printf("%s", prefix); + for (i = 0; i < format_desc->block.height; ++i) { + for (j = 0; j < format_desc->block.width; ++j) { + printf("%s%f", sep, unpacked[i][j]); + sep = ", "; + } + sep = ",\n"; + } + printf("%s", suffix); + fflush(stdout); +} + + +static void +print_unpacked_z_32unorm(const struct util_format_description *format_desc, + const char *prefix, + uint32_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH], + const char *suffix) +{ + unsigned i, j; + const char *sep = ""; + + printf("%s", prefix); + for (i = 0; i < format_desc->block.height; ++i) { + for (j = 0; j < format_desc->block.width; ++j) { + printf("%s0x%08x", sep, unpacked[i][j]); + sep = ", "; + } + } + printf("%s", suffix); + fflush(stdout); +} + + +static void +print_unpacked_s_8uscaled(const struct util_format_description *format_desc, + const char *prefix, + uint8_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH], + const char *suffix) +{ + unsigned i, j; + const char *sep = ""; + + printf("%s", prefix); + for (i = 0; i < format_desc->block.height; ++i) { + for (j = 0; j < format_desc->block.width; ++j) { + printf("%s0x%02x", sep, unpacked[i][j]); + sep = ", "; + } + } + printf("%s", suffix); + fflush(stdout); +} + + +static boolean +test_format_fetch_rgba_float(const struct util_format_description *format_desc, + const struct util_format_test_case *test) +{ + float unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } }; + unsigned i, j, k; + boolean success; + + success = TRUE; + for (i = 0; i < format_desc->block.height; ++i) { + for (j = 0; j < format_desc->block.width; ++j) { + format_desc->fetch_rgba_float(unpacked[i][j], test->packed, j, i); + for (k = 0; k < 4; ++k) { + if (!compare_float(test->unpacked[i][j][k], unpacked[i][j][k])) { + success = FALSE; + } + } + } + } + + if (!success) { + print_unpacked_rgba_float(format_desc, "FAILED: ", unpacked, " obtained\n"); + print_unpacked_rgba_doubl(format_desc, " ", test->unpacked, " expected\n"); + } + + return success; +} + + +static boolean +test_format_unpack_rgba_float(const struct util_format_description *format_desc, + const struct util_format_test_case *test) +{ + float unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } }; + unsigned i, j, k; + boolean success; + + format_desc->unpack_rgba_float(&unpacked[0][0][0], sizeof unpacked[0], + test->packed, 0, + format_desc->block.width, format_desc->block.height); + + success = TRUE; + for (i = 0; i < format_desc->block.height; ++i) { + for (j = 0; j < format_desc->block.width; ++j) { + for (k = 0; k < 4; ++k) { + if (!compare_float(test->unpacked[i][j][k], unpacked[i][j][k])) { + success = FALSE; + } + } + } + } + + if (!success) { + print_unpacked_rgba_float(format_desc, "FAILED: ", unpacked, " obtained\n"); + print_unpacked_rgba_doubl(format_desc, " ", test->unpacked, " expected\n"); + } + + return success; +} + + +static boolean +test_format_pack_rgba_float(const struct util_format_description *format_desc, + const struct util_format_test_case *test) +{ + float unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4]; + uint8_t packed[UTIL_FORMAT_MAX_PACKED_BYTES]; + unsigned i, j, k; + boolean success; + + if (test->format == PIPE_FORMAT_DXT1_RGBA) { + /* + * Skip S3TC as packed representation is not canonical. + * + * TODO: Do a round trip conversion. + */ + return TRUE; + } + + memset(packed, 0, sizeof packed); + for (i = 0; i < format_desc->block.height; ++i) { + for (j = 0; j < format_desc->block.width; ++j) { + for (k = 0; k < 4; ++k) { + unpacked[i][j][k] = (float) test->unpacked[i][j][k]; + } + } + } + + format_desc->pack_rgba_float(packed, 0, + &unpacked[0][0][0], sizeof unpacked[0], + format_desc->block.width, format_desc->block.height); + + success = TRUE; + for (i = 0; i < format_desc->block.bits/8; ++i) + if ((test->packed[i] & test->mask[i]) != (packed[i] & test->mask[i])) + success = FALSE; + + if (!success) { + print_packed(format_desc, "FAILED: ", packed, " obtained\n"); + print_packed(format_desc, " ", test->packed, " expected\n"); + } + + return success; +} + + +static boolean +convert_float_to_8unorm(uint8_t *dst, const double *src) +{ + unsigned i; + boolean accurate = TRUE; + + for (i = 0; i < UTIL_FORMAT_MAX_UNPACKED_HEIGHT*UTIL_FORMAT_MAX_UNPACKED_WIDTH*4; ++i) { + if (src[i] < 0.0) { + accurate = FALSE; + dst[i] = 0; + } + else if (src[i] > 1.0) { + accurate = FALSE; + dst[i] = 255; + } + else { + dst[i] = src[i] * 255.0; + } + } + + return accurate; +} + + +static boolean +test_format_unpack_rgba_8unorm(const struct util_format_description *format_desc, + const struct util_format_test_case *test) +{ + uint8_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } }; + uint8_t expected[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } }; + unsigned i, j, k; + boolean success; + + format_desc->unpack_rgba_8unorm(&unpacked[0][0][0], sizeof unpacked[0], + test->packed, 0, + format_desc->block.width, format_desc->block.height); + + convert_float_to_8unorm(&expected[0][0][0], &test->unpacked[0][0][0]); + + success = TRUE; + for (i = 0; i < format_desc->block.height; ++i) { + for (j = 0; j < format_desc->block.width; ++j) { + for (k = 0; k < 4; ++k) { + if (expected[i][j][k] != unpacked[i][j][k]) { + success = FALSE; + } + } + } + } + + if (!success) { + print_unpacked_rgba_8unorm(format_desc, "FAILED: ", unpacked, " obtained\n"); + print_unpacked_rgba_8unorm(format_desc, " ", expected, " expected\n"); + } + + return success; +} + + +static boolean +test_format_pack_rgba_8unorm(const struct util_format_description *format_desc, + const struct util_format_test_case *test) +{ + uint8_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4]; + uint8_t packed[UTIL_FORMAT_MAX_PACKED_BYTES]; + unsigned i; + boolean success; + + if (test->format == PIPE_FORMAT_DXT1_RGBA) { + /* + * Skip S3TC as packed representation is not canonical. + * + * TODO: Do a round trip conversion. + */ + return TRUE; + } + + if (!convert_float_to_8unorm(&unpacked[0][0][0], &test->unpacked[0][0][0])) { + /* + * Skip test cases which cannot be represented by four unorm bytes. + */ + return TRUE; + } + + memset(packed, 0, sizeof packed); + + format_desc->pack_rgba_8unorm(packed, 0, + &unpacked[0][0][0], sizeof unpacked[0], + format_desc->block.width, format_desc->block.height); + + success = TRUE; + for (i = 0; i < format_desc->block.bits/8; ++i) + if ((test->packed[i] & test->mask[i]) != (packed[i] & test->mask[i])) + success = FALSE; + + if (!success) { + print_packed(format_desc, "FAILED: ", packed, " obtained\n"); + print_packed(format_desc, " ", test->packed, " expected\n"); + } + + return success; +} + + +static boolean +test_format_unpack_z_float(const struct util_format_description *format_desc, + const struct util_format_test_case *test) +{ + float unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH] = { { 0 } }; + unsigned i, j; + boolean success; + + format_desc->unpack_z_float(&unpacked[0][0], sizeof unpacked[0], + test->packed, 0, + format_desc->block.width, format_desc->block.height); + + success = TRUE; + for (i = 0; i < format_desc->block.height; ++i) { + for (j = 0; j < format_desc->block.width; ++j) { + if (!compare_float(test->unpacked[i][j][0], unpacked[i][j])) { + success = FALSE; + } + } + } + + if (!success) { + print_unpacked_z_float(format_desc, "FAILED: ", unpacked, " obtained\n"); + print_unpacked_rgba_doubl(format_desc, " ", test->unpacked, " expected\n"); + } + + return success; +} + + +static boolean +test_format_pack_z_float(const struct util_format_description *format_desc, + const struct util_format_test_case *test) +{ + float unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH]; + uint8_t packed[UTIL_FORMAT_MAX_PACKED_BYTES]; + unsigned i, j; + boolean success; + + memset(packed, 0, sizeof packed); + for (i = 0; i < format_desc->block.height; ++i) { + for (j = 0; j < format_desc->block.width; ++j) { + unpacked[i][j] = (float) test->unpacked[i][j][0]; + if (test->unpacked[i][j][1]) { + return TRUE; + } + } + } + + format_desc->pack_z_float(packed, 0, + &unpacked[0][0], sizeof unpacked[0], + format_desc->block.width, format_desc->block.height); + + success = TRUE; + for (i = 0; i < format_desc->block.bits/8; ++i) + if ((test->packed[i] & test->mask[i]) != (packed[i] & test->mask[i])) + success = FALSE; + + if (!success) { + print_packed(format_desc, "FAILED: ", packed, " obtained\n"); + print_packed(format_desc, " ", test->packed, " expected\n"); + } + + return success; +} + + +static boolean +test_format_unpack_z_32unorm(const struct util_format_description *format_desc, + const struct util_format_test_case *test) +{ + uint32_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH] = { { 0 } }; + uint32_t expected[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH] = { { 0 } }; + unsigned i, j; + boolean success; + + format_desc->unpack_z_32unorm(&unpacked[0][0], sizeof unpacked[0], + test->packed, 0, + format_desc->block.width, format_desc->block.height); + + for (i = 0; i < format_desc->block.height; ++i) { + for (j = 0; j < format_desc->block.width; ++j) { + expected[i][j] = test->unpacked[i][j][0] * 0xffffffff; + } + } + + success = TRUE; + for (i = 0; i < format_desc->block.height; ++i) { + for (j = 0; j < format_desc->block.width; ++j) { + if (expected[i][j] != unpacked[i][j]) { + success = FALSE; + } + } + } + + if (!success) { + print_unpacked_z_32unorm(format_desc, "FAILED: ", unpacked, " obtained\n"); + print_unpacked_z_32unorm(format_desc, " ", expected, " expected\n"); + } + + return success; +} + + +static boolean +test_format_pack_z_32unorm(const struct util_format_description *format_desc, + const struct util_format_test_case *test) +{ + uint32_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH]; + uint8_t packed[UTIL_FORMAT_MAX_PACKED_BYTES]; + unsigned i, j; + boolean success; + + for (i = 0; i < format_desc->block.height; ++i) { + for (j = 0; j < format_desc->block.width; ++j) { + unpacked[i][j] = test->unpacked[i][j][0] * 0xffffffff; + if (test->unpacked[i][j][1]) { + return TRUE; + } + } + } + + memset(packed, 0, sizeof packed); + + format_desc->pack_z_32unorm(packed, 0, + &unpacked[0][0], sizeof unpacked[0], + format_desc->block.width, format_desc->block.height); + + success = TRUE; + for (i = 0; i < format_desc->block.bits/8; ++i) + if ((test->packed[i] & test->mask[i]) != (packed[i] & test->mask[i])) + success = FALSE; + + if (!success) { + print_packed(format_desc, "FAILED: ", packed, " obtained\n"); + print_packed(format_desc, " ", test->packed, " expected\n"); + } + + return success; +} + + +static boolean +test_format_unpack_s_8uscaled(const struct util_format_description *format_desc, + const struct util_format_test_case *test) +{ + uint8_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH] = { { 0 } }; + uint8_t expected[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH] = { { 0 } }; + unsigned i, j; + boolean success; + + format_desc->unpack_s_8uscaled(&unpacked[0][0], sizeof unpacked[0], + test->packed, 0, + format_desc->block.width, format_desc->block.height); + + for (i = 0; i < format_desc->block.height; ++i) { + for (j = 0; j < format_desc->block.width; ++j) { + expected[i][j] = test->unpacked[i][j][1]; + } + } + + success = TRUE; + for (i = 0; i < format_desc->block.height; ++i) { + for (j = 0; j < format_desc->block.width; ++j) { + if (expected[i][j] != unpacked[i][j]) { + success = FALSE; + } + } + } + + if (!success) { + print_unpacked_s_8uscaled(format_desc, "FAILED: ", unpacked, " obtained\n"); + print_unpacked_s_8uscaled(format_desc, " ", expected, " expected\n"); + } + + return success; +} + + +static boolean +test_format_pack_s_8uscaled(const struct util_format_description *format_desc, + const struct util_format_test_case *test) +{ + uint8_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH]; + uint8_t packed[UTIL_FORMAT_MAX_PACKED_BYTES]; + unsigned i, j; + boolean success; + + for (i = 0; i < format_desc->block.height; ++i) { + for (j = 0; j < format_desc->block.width; ++j) { + unpacked[i][j] = test->unpacked[i][j][1]; + if (test->unpacked[i][j][0]) { + return TRUE; + } + } + } + + memset(packed, 0, sizeof packed); + + format_desc->pack_s_8uscaled(packed, 0, + &unpacked[0][0], sizeof unpacked[0], + format_desc->block.width, format_desc->block.height); + + success = TRUE; + for (i = 0; i < format_desc->block.bits/8; ++i) + if ((test->packed[i] & test->mask[i]) != (packed[i] & test->mask[i])) + success = FALSE; + + if (!success) { + print_packed(format_desc, "FAILED: ", packed, " obtained\n"); + print_packed(format_desc, " ", test->packed, " expected\n"); + } + + return success; +} + + +typedef boolean +(*test_func_t)(const struct util_format_description *format_desc, + const struct util_format_test_case *test); + + +static boolean +test_one_func(const struct util_format_description *format_desc, + test_func_t func, + const char *suffix) +{ + unsigned i; + boolean success = TRUE; + + printf("Testing util_format_%s_%s ...\n", + format_desc->short_name, suffix); + fflush(stdout); + + for (i = 0; i < util_format_nr_test_cases; ++i) { + const struct util_format_test_case *test = &util_format_test_cases[i]; + + if (test->format == format_desc->format) { + if (!func(format_desc, &util_format_test_cases[i])) { + success = FALSE; + } + } + } + + return success; +} + + +static boolean +test_all(void) +{ + enum pipe_format format; + boolean success = TRUE; + + for (format = 1; format < PIPE_FORMAT_COUNT; ++format) { + const struct util_format_description *format_desc; + + format_desc = util_format_description(format); + if (!format_desc) { + continue; + } + + if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && + !util_format_s3tc_enabled) { + continue; + } + +# define TEST_ONE_FUNC(name) \ + if (format_desc->name) { \ + if (!test_one_func(format_desc, &test_format_##name, #name)) { \ + success = FALSE; \ + } \ + } + + TEST_ONE_FUNC(fetch_rgba_float); + TEST_ONE_FUNC(pack_rgba_float); + TEST_ONE_FUNC(unpack_rgba_float); + TEST_ONE_FUNC(pack_rgba_8unorm); + TEST_ONE_FUNC(unpack_rgba_8unorm); + + TEST_ONE_FUNC(unpack_z_32unorm); + TEST_ONE_FUNC(pack_z_32unorm); + TEST_ONE_FUNC(unpack_z_float); + TEST_ONE_FUNC(pack_z_float); + TEST_ONE_FUNC(unpack_s_8uscaled); + TEST_ONE_FUNC(pack_s_8uscaled); + +# undef TEST_ONE_FUNC + } + + return success; +} + + +int main(int argc, char **argv) +{ + boolean success; + + util_format_s3tc_init(); + + success = test_all(); + + return success ? 0 : 1; +} diff --git a/src/gallium/winsys/r600/drm/Makefile b/src/gallium/winsys/r600/drm/Makefile index 7310734f051..fb7b09b3a0d 100644 --- a/src/gallium/winsys/r600/drm/Makefile +++ b/src/gallium/winsys/r600/drm/Makefile @@ -15,6 +15,7 @@ C_SOURCES = \ r600_bomgr.c LIBRARY_INCLUDES = -I$(TOP)/src/gallium/drivers/r600 \ + -I$(TOP)/include \ $(shell pkg-config libdrm --cflags-only-I) include ../../../Makefile.template diff --git a/src/gallium/winsys/r600/drm/evergreen_hw_context.c b/src/gallium/winsys/r600/drm/evergreen_hw_context.c index cf8ae5185b4..e4ab690c560 100644 --- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c +++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c @@ -43,31 +43,33 @@ static const struct r600_reg evergreen_config_reg_list[] = { {R_008958_VGT_PRIMITIVE_TYPE, 0, 0, 0}, {R_008A14_PA_CL_ENHANCE, 0, 0, 0}, - {R_008C00_SQ_CONFIG, 0, 0, 0}, - {R_008C04_SQ_GPR_RESOURCE_MGMT_1, 0, 0, 0}, - {R_008C08_SQ_GPR_RESOURCE_MGMT_2, 0, 0, 0}, - {R_008C0C_SQ_THREAD_RESOURCE_MGMT, 0, 0, 0}, - {R_008C18_SQ_THREAD_RESOURCE_MGMT_1, 0, 0, 0}, - {R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, 0, 0, 0}, - {R_008C20_SQ_STACK_RESOURCE_MGMT_1, 0, 0, 0}, - {R_008C24_SQ_STACK_RESOURCE_MGMT_2, 0, 0, 0}, - {R_008C28_SQ_STACK_RESOURCE_MGMT_3, 0, 0, 0}, - {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0, 0, 0}, - {R_009100_SPI_CONFIG_CNTL, 0, 0, 0}, - {R_00913C_SPI_CONFIG_CNTL_1, 0, 0, 0}, + {R_008C00_SQ_CONFIG, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C04_SQ_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C08_SQ_GPR_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C0C_SQ_THREAD_RESOURCE_MGMT, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C18_SQ_THREAD_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C1C_SQ_THREAD_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C20_SQ_STACK_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C24_SQ_STACK_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C28_SQ_STACK_RESOURCE_MGMT_3, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_009100_SPI_CONFIG_CNTL, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_00913C_SPI_CONFIG_CNTL_1, REG_FLAG_ENABLE_ALWAYS, 0, 0}, }; static const struct r600_reg cayman_config_reg_list[] = { {R_008958_VGT_PRIMITIVE_TYPE, 0, 0, 0}, {R_008A14_PA_CL_ENHANCE, 0, 0, 0}, - {R_008C00_SQ_CONFIG, 0, 0, 0}, - {R_008C04_SQ_GPR_RESOURCE_MGMT_1, 0, 0, 0}, - {CM_R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, 0, 0, 0}, - {CM_R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, 0, 0, 0}, - {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0, 0, 0}, - {R_009100_SPI_CONFIG_CNTL, 0, 0, 0}, - {R_00913C_SPI_CONFIG_CNTL_1, 0, 0, 0}, + {R_008C00_SQ_CONFIG, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C04_SQ_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_009100_SPI_CONFIG_CNTL, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_00913C_SPI_CONFIG_CNTL_1, REG_FLAG_ENABLE_ALWAYS, 0, 0}, }; static const struct r600_reg evergreen_ctl_const_list[] = { @@ -125,6 +127,8 @@ static const struct r600_reg evergreen_context_reg_list[] = { {R_028244_PA_SC_GENERIC_SCISSOR_BR, 0, 0, 0}, {R_028250_PA_SC_VPORT_SCISSOR_0_TL, 0, 0, 0}, {R_028254_PA_SC_VPORT_SCISSOR_0_BR, 0, 0, 0}, + {R_0282D0_PA_SC_VPORT_ZMIN_0, 0, 0, 0}, + {R_0282D4_PA_SC_VPORT_ZMAX_0, 0, 0, 0}, {R_028350_SX_MISC, 0, 0, 0}, {R_028380_SQ_VTX_SEMANTIC_0, 0, 0, 0}, {R_028384_SQ_VTX_SEMANTIC_1, 0, 0, 0}, @@ -158,12 +162,12 @@ static const struct r600_reg evergreen_context_reg_list[] = { {R_0283F4_SQ_VTX_SEMANTIC_29, 0, 0, 0}, {R_0283F8_SQ_VTX_SEMANTIC_30, 0, 0, 0}, {R_0283FC_SQ_VTX_SEMANTIC_31, 0, 0, 0}, - {R_0282D0_PA_SC_VPORT_ZMIN_0, 0, 0, 0}, - {R_0282D4_PA_SC_VPORT_ZMAX_0, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, {R_028400_VGT_MAX_VTX_INDX, 0, 0, 0}, {R_028404_VGT_MIN_VTX_INDX, 0, 0, 0}, {R_028408_VGT_INDX_OFFSET, 0, 0, 0}, {R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, {R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0}, {R_028414_CB_BLEND_RED, 0, 0, 0}, {R_028418_CB_BLEND_GREEN, 0, 0, 0}, @@ -487,6 +491,8 @@ static const struct r600_reg cayman_context_reg_list[] = { {R_028244_PA_SC_GENERIC_SCISSOR_BR, 0, 0, 0}, {R_028250_PA_SC_VPORT_SCISSOR_0_TL, 0, 0, 0}, {R_028254_PA_SC_VPORT_SCISSOR_0_BR, 0, 0, 0}, + {R_0282D0_PA_SC_VPORT_ZMIN_0, 0, 0, 0}, + {R_0282D4_PA_SC_VPORT_ZMAX_0, 0, 0, 0}, {R_028350_SX_MISC, 0, 0, 0}, {R_028380_SQ_VTX_SEMANTIC_0, 0, 0, 0}, {R_028384_SQ_VTX_SEMANTIC_1, 0, 0, 0}, @@ -520,12 +526,12 @@ static const struct r600_reg cayman_context_reg_list[] = { {R_0283F4_SQ_VTX_SEMANTIC_29, 0, 0, 0}, {R_0283F8_SQ_VTX_SEMANTIC_30, 0, 0, 0}, {R_0283FC_SQ_VTX_SEMANTIC_31, 0, 0, 0}, - {R_0282D0_PA_SC_VPORT_ZMIN_0, 0, 0, 0}, - {R_0282D4_PA_SC_VPORT_ZMAX_0, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, {R_028400_VGT_MAX_VTX_INDX, 0, 0, 0}, {R_028404_VGT_MIN_VTX_INDX, 0, 0, 0}, {R_028408_VGT_INDX_OFFSET, 0, 0, 0}, {R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, 0, 0, 0}, + {GROUP_FORCE_NEW_BLOCK, 0, 0, 0}, {R_028410_SX_ALPHA_TEST_CONTROL, 0, 0, 0}, {R_028414_CB_BLEND_RED, 0, 0, 0}, {R_028418_CB_BLEND_GREEN, 0, 0, 0}, @@ -817,13 +823,13 @@ static const struct r600_reg cayman_context_reg_list[] = { }; /* SHADER RESOURCE R600/R700 */ -static int evergreen_state_resource_init(struct r600_context *ctx, u32 offset) +static int r600_resource_range_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride) { struct r600_reg r600_shader_resource[] = { - {R_030000_RESOURCE0_WORD0, 0, 0, 0}, - {R_030004_RESOURCE0_WORD1, 0, 0, 0}, - {R_030008_RESOURCE0_WORD2, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, - {R_03000C_RESOURCE0_WORD3, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, + {R_030000_RESOURCE0_WORD0, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, + {R_030004_RESOURCE0_WORD1, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, + {R_030008_RESOURCE0_WORD2, 0, 0, 0}, + {R_03000C_RESOURCE0_WORD3, 0, 0, 0}, {R_030010_RESOURCE0_WORD4, 0, 0, 0}, {R_030014_RESOURCE0_WORD5, 0, 0, 0}, {R_030018_RESOURCE0_WORD6, 0, 0, 0}, @@ -831,10 +837,7 @@ static int evergreen_state_resource_init(struct r600_context *ctx, u32 offset) }; unsigned nreg = Elements(r600_shader_resource); - for (int i = 0; i < nreg; i++) { - r600_shader_resource[i].offset += offset; - } - return r600_context_add_block(ctx, r600_shader_resource, nreg, PKT3_SET_RESOURCE, EVERGREEN_RESOURCE_OFFSET); + return r600_resource_init(ctx, range, offset, nblocks, stride, r600_shader_resource, nreg, EVERGREEN_RESOURCE_OFFSET); } /* SHADER SAMPLER R600/R700 */ @@ -907,6 +910,11 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) ctx->radeon = radeon; LIST_INITHEAD(&ctx->query_list); + /* init dirty list */ + LIST_INITHEAD(&ctx->dirty); + LIST_INITHEAD(&ctx->resource_dirty); + LIST_INITHEAD(&ctx->enable_list); + ctx->range = calloc(NUM_RANGES, sizeof(struct r600_range)); if (!ctx->range) { r = -ENOMEM; @@ -960,24 +968,19 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) if (r) goto out_err; } - /* PS RESOURCE */ - for (int j = 0, offset = 0; j < 176; j++, offset += 0x20) { - r = evergreen_state_resource_init(ctx, offset); - if (r) - goto out_err; - } - /* VS RESOURCE */ - for (int j = 0, offset = 0x1600; j < 160; j++, offset += 0x20) { - r = evergreen_state_resource_init(ctx, offset); - if (r) - goto out_err; - } - /* FS RESOURCE */ - for (int j = 0, offset = 0x7C00; j < 16; j++, offset += 0x20) { - r = evergreen_state_resource_init(ctx, offset); - if (r) - goto out_err; - } + + ctx->num_ps_resources = 176; + ctx->num_vs_resources = 160; + ctx->num_fs_resources = 16; + r = r600_resource_range_init(ctx, &ctx->ps_resources, 0, 176, 0x20); + if (r) + goto out_err; + r = r600_resource_range_init(ctx, &ctx->vs_resources, 0x1600, 160, 0x20); + if (r) + goto out_err; + r = r600_resource_range_init(ctx, &ctx->fs_resources, 0x7C00, 16, 0x20); + if (r) + goto out_err; /* PS loop const */ evergreen_loop_const_init(ctx, 0); @@ -1015,33 +1018,31 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon) LIST_INITHEAD(&ctx->fenced_bo); - /* init dirty list */ - LIST_INITHEAD(&ctx->dirty); return 0; out_err: r600_context_fini(ctx); return r; } -void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +void evergreen_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) { - unsigned offset = R_030000_SQ_TEX_RESOURCE_WORD0_0 + 0x20 * rid; + struct r600_block *block = ctx->ps_resources.blocks[rid]; - r600_context_pipe_state_set_resource(ctx, state, offset); + r600_context_pipe_state_set_resource(ctx, state, block); } -void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +void evergreen_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) { - unsigned offset = R_030000_SQ_TEX_RESOURCE_WORD0_0 + 0x1600 + 0x20 * rid; + struct r600_block *block = ctx->vs_resources.blocks[rid]; - r600_context_pipe_state_set_resource(ctx, state, offset); + r600_context_pipe_state_set_resource(ctx, state, block); } -void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +void evergreen_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) { - unsigned offset = R_030000_SQ_TEX_RESOURCE_WORD0_0 + 0x7C00 + 0x20 * rid; + struct r600_block *block = ctx->fs_resources.blocks[rid]; - r600_context_pipe_state_set_resource(ctx, state, offset); + r600_context_pipe_state_set_resource(ctx, state, block); } static inline void evergreen_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) @@ -1056,6 +1057,7 @@ static inline void evergreen_context_pipe_state_set_sampler(struct r600_context if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); LIST_DELINIT(&block->list); + LIST_DELINIT(&block->enable_list); return; } dirty = block->status & R600_BLOCK_STATUS_DIRTY; @@ -1066,8 +1068,8 @@ static inline void evergreen_context_pipe_state_set_sampler(struct r600_context block->reg[i] = state->regs[i].value; } } - - r600_context_dirty_block(ctx, block, dirty, 2); + if (dirty) + r600_context_dirty_block(ctx, block, dirty, 2); } static inline void evergreen_context_ps_partial_flush(struct r600_context *ctx) @@ -1094,6 +1096,7 @@ static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_c if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); LIST_DELINIT(&block->list); + LIST_DELINIT(&block->enable_list); return; } if (state->nregs <= 3) { @@ -1119,7 +1122,8 @@ static inline void evergreen_context_pipe_state_set_sampler_border(struct r600_c if (dirty & R600_BLOCK_STATUS_DIRTY) evergreen_context_ps_partial_flush(ctx); - r600_context_dirty_block(ctx, block, dirty, 4); + if (dirty) + r600_context_dirty_block(ctx, block, dirty, 4); } void evergreen_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) @@ -1146,6 +1150,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr unsigned ndwords = 7; struct r600_block *dirty_block = NULL; struct r600_block *next_block; + uint32_t *pm4; if (draw->indices) { ndwords = 11; @@ -1186,25 +1191,31 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr r600_context_block_emit_dirty(ctx, dirty_block); } + LIST_FOR_EACH_ENTRY_SAFE(dirty_block, next_block, &ctx->resource_dirty,list) { + r600_context_block_resource_emit_dirty(ctx, dirty_block); + } + /* draw packet */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances; + pm4 = &ctx->pm4[ctx->pm4_cdwords]; + pm4[0] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing); + pm4[1] = draw->vgt_index_type; + pm4[2] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing); + pm4[3] = draw->vgt_num_instances; if (draw->indices) { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset + r600_bo_offset(draw->indices); - ctx->pm4[ctx->pm4_cdwords++] = 0; - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices); + pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); + pm4[5] = draw->indices_bo_offset + r600_bo_offset(draw->indices); + pm4[6] = 0; + pm4[7] = draw->vgt_num_indices; + pm4[8] = draw->vgt_draw_initiator; + pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); + pm4[10] = 0; + r600_context_bo_reloc(ctx, &pm4[10], draw->indices); } else { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; + pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); + pm4[5] = draw->vgt_num_indices; + pm4[6] = draw->vgt_draw_initiator; } + ctx->pm4_cdwords += ndwords; ctx->flags |= (R600_CONTEXT_DRAW_PENDING | R600_CONTEXT_DST_CACHES_DIRTY); diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c index 63d5f0bf9bb..d7e27e07e3b 100644 --- a/src/gallium/winsys/r600/drm/r600_bo.c +++ b/src/gallium/winsys/r600/drm/r600_bo.c @@ -193,16 +193,6 @@ void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo) free(bo); } -void r600_bo_reference(struct radeon *radeon, struct r600_bo **dst, struct r600_bo *src) -{ - struct r600_bo *old = *dst; - - if (pipe_reference(&(*dst)->reference, &src->reference)) { - r600_bo_destroy(radeon, old); - } - *dst = src; -} - boolean r600_bo_get_winsys_handle(struct radeon *radeon, struct r600_bo *bo, unsigned stride, struct winsys_handle *whandle) { diff --git a/src/gallium/winsys/r600/drm/r600_bomgr.c b/src/gallium/winsys/r600/drm/r600_bomgr.c index 446ef0f9cfc..4918d5eb0b1 100644 --- a/src/gallium/winsys/r600/drm/r600_bomgr.c +++ b/src/gallium/winsys/r600/drm/r600_bomgr.c @@ -116,7 +116,7 @@ void r600_bomgr_bo_init(struct r600_bomgr *mgr, struct r600_bo *bo) bo->manager_id = 1; } -bool r600_bomgr_bo_destroy(struct r600_bomgr *mgr, struct r600_bo *bo) +boolean r600_bomgr_bo_destroy(struct r600_bomgr *mgr, struct r600_bo *bo) { bo->start = os_time_get(); bo->end = bo->start + mgr->usecs; diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c index af80aa67a44..711ce18c6ca 100644 --- a/src/gallium/winsys/r600/drm/r600_hw_context.c +++ b/src/gallium/winsys/r600/drm/r600_hw_context.c @@ -79,6 +79,74 @@ static void INLINE r600_context_fence_wraparound(struct r600_context *ctx, unsig } } +static void r600_init_block(struct r600_context *ctx, + struct r600_block *block, + const struct r600_reg *reg, int index, int nreg, + unsigned opcode, unsigned offset_base) +{ + int i = index; + int j, n = nreg; + + /* initialize block */ + if (opcode == PKT3_SET_RESOURCE) { + block->flags = BLOCK_FLAG_RESOURCE; + block->status |= R600_BLOCK_STATUS_RESOURCE_DIRTY; /* dirty all blocks at start */ + } else { + block->flags = 0; + block->status |= R600_BLOCK_STATUS_DIRTY; /* dirty all blocks at start */ + } + block->start_offset = reg[i].offset; + block->pm4[block->pm4_ndwords++] = PKT3(opcode, n, 0); + block->pm4[block->pm4_ndwords++] = (block->start_offset - offset_base) >> 2; + block->reg = &block->pm4[block->pm4_ndwords]; + block->pm4_ndwords += n; + block->nreg = n; + block->nreg_dirty = n; + LIST_INITHEAD(&block->list); + LIST_INITHEAD(&block->enable_list); + + for (j = 0; j < n; j++) { + if (reg[i+j].flags & REG_FLAG_DIRTY_ALWAYS) { + block->flags |= REG_FLAG_DIRTY_ALWAYS; + } + if (reg[i+j].flags & REG_FLAG_ENABLE_ALWAYS) { + if (!(block->status & R600_BLOCK_STATUS_ENABLED)) { + block->status |= R600_BLOCK_STATUS_ENABLED; + LIST_ADDTAIL(&block->enable_list, &ctx->enable_list); + LIST_ADDTAIL(&block->list,&ctx->dirty); + } + } + + if (reg[i+j].flags & REG_FLAG_NEED_BO) { + block->nbo++; + assert(block->nbo < R600_BLOCK_MAX_BO); + block->pm4_bo_index[j] = block->nbo; + block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0, 0); + block->pm4[block->pm4_ndwords++] = 0x00000000; + if (reg[i+j].flags & REG_FLAG_RV6XX_SBU) { + block->reloc[block->nbo].flush_flags = 0; + block->reloc[block->nbo].flush_mask = 0; + } else { + block->reloc[block->nbo].flush_flags = reg[i+j].flush_flags; + block->reloc[block->nbo].flush_mask = reg[i+j].flush_mask; + } + block->reloc[block->nbo].bo_pm4_index = block->pm4_ndwords - 1; + } + if ((ctx->radeon->family > CHIP_R600) && + (ctx->radeon->family < CHIP_RV770) && reg[i+j].flags & REG_FLAG_RV6XX_SBU) { + block->pm4[block->pm4_ndwords++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0); + block->pm4[block->pm4_ndwords++] = reg[i+j].flush_flags; + } + } + for (j = 0; j < n; j++) { + if (reg[i+j].flush_flags) { + block->pm4_flush_ndwords += 7; + } + } + /* check that we stay in limit */ + assert(block->pm4_ndwords < R600_BLOCK_MAX_REG); +} + int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg, unsigned opcode, unsigned offset_base) { @@ -87,8 +155,6 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, int offset; for (unsigned i = 0, n = 0; i < nreg; i += n) { - u32 j; - /* ignore new block balise */ if (reg[i].offset == GROUP_FORCE_NEW_BLOCK) { n = 1; @@ -131,50 +197,8 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, range->blocks[CTX_BLOCK_ID(reg[i + j].offset)] = block; } - /* initialize block */ - block->status |= R600_BLOCK_STATUS_DIRTY; /* dirty all blocks at start */ - block->start_offset = reg[i].offset; - block->pm4[block->pm4_ndwords++] = PKT3(opcode, n, 0); - block->pm4[block->pm4_ndwords++] = (block->start_offset - offset_base) >> 2; - block->reg = &block->pm4[block->pm4_ndwords]; - block->pm4_ndwords += n; - block->nreg = n; - block->nreg_dirty = n; - block->flags = 0; - LIST_INITHEAD(&block->list); + r600_init_block(ctx, block, reg, i, n, opcode, offset_base); - for (j = 0; j < n; j++) { - if (reg[i+j].flags & REG_FLAG_DIRTY_ALWAYS) { - block->flags |= REG_FLAG_DIRTY_ALWAYS; - } - if (reg[i+j].flags & REG_FLAG_NEED_BO) { - block->nbo++; - assert(block->nbo < R600_BLOCK_MAX_BO); - block->pm4_bo_index[j] = block->nbo; - block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0, 0); - block->pm4[block->pm4_ndwords++] = 0x00000000; - if (reg[i+j].flags & REG_FLAG_RV6XX_SBU) { - block->reloc[block->nbo].flush_flags = 0; - block->reloc[block->nbo].flush_mask = 0; - } else { - block->reloc[block->nbo].flush_flags = reg[i+j].flush_flags; - block->reloc[block->nbo].flush_mask = reg[i+j].flush_mask; - } - block->reloc[block->nbo].bo_pm4_index = block->pm4_ndwords - 1; - } - if ((ctx->radeon->family > CHIP_R600) && - (ctx->radeon->family < CHIP_RV770) && reg[i+j].flags & REG_FLAG_RV6XX_SBU) { - block->pm4[block->pm4_ndwords++] = PKT3(PKT3_SURFACE_BASE_UPDATE, 0, 0); - block->pm4[block->pm4_ndwords++] = reg[i+j].flush_flags; - } - } - for (j = 0; j < n; j++) { - if (reg[i+j].flush_flags) { - block->pm4_flush_ndwords += 7; - } - } - /* check that we stay in limit */ - assert(block->pm4_ndwords < R600_BLOCK_MAX_REG); } return 0; } @@ -182,17 +206,17 @@ int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, /* R600/R700 configuration */ static const struct r600_reg r600_config_reg_list[] = { {R_008958_VGT_PRIMITIVE_TYPE, 0, 0, 0}, - {R_008C00_SQ_CONFIG, 0, 0, 0}, - {R_008C04_SQ_GPR_RESOURCE_MGMT_1, 0, 0, 0}, - {R_008C08_SQ_GPR_RESOURCE_MGMT_2, 0, 0, 0}, - {R_008C0C_SQ_THREAD_RESOURCE_MGMT, 0, 0, 0}, - {R_008C10_SQ_STACK_RESOURCE_MGMT_1, 0, 0, 0}, - {R_008C14_SQ_STACK_RESOURCE_MGMT_2, 0, 0, 0}, - {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0, 0, 0}, - {R_009508_TA_CNTL_AUX, 0, 0, 0}, - {R_009714_VC_ENHANCE, 0, 0, 0}, - {R_009830_DB_DEBUG, 0, 0, 0}, - {R_009838_DB_WATERMARKS, 0, 0, 0}, + {R_008C00_SQ_CONFIG, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C04_SQ_GPR_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C08_SQ_GPR_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C0C_SQ_THREAD_RESOURCE_MGMT, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C10_SQ_STACK_RESOURCE_MGMT_1, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008C14_SQ_STACK_RESOURCE_MGMT_2, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_009508_TA_CNTL_AUX, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_009714_VC_ENHANCE, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_009830_DB_DEBUG, REG_FLAG_ENABLE_ALWAYS, 0, 0}, + {R_009838_DB_WATERMARKS, REG_FLAG_ENABLE_ALWAYS, 0, 0}, }; static const struct r600_reg r600_ctl_const_list[] = { @@ -552,23 +576,44 @@ static const struct r600_reg r600_context_reg_list[] = { }; /* SHADER RESOURCE R600/R700 */ -static int r600_state_resource_init(struct r600_context *ctx, u32 offset) +int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride, struct r600_reg *reg, int nreg, unsigned offset_base) +{ + int i; + struct r600_block *block; + range->blocks = calloc(nblocks, sizeof(struct r600_block *)); + if (range->blocks == NULL) + return -ENOMEM; + + reg[0].offset += offset; + for (i = 0; i < nblocks; i++) { + block = calloc(1, sizeof(struct r600_block)); + if (block == NULL) { + return -ENOMEM; + } + ctx->nblocks++; + range->blocks[i] = block; + r600_init_block(ctx, block, reg, 0, nreg, PKT3_SET_RESOURCE, offset_base); + + reg[0].offset += stride; + } + return 0; +} + + +static int r600_resource_range_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride) { struct r600_reg r600_shader_resource[] = { - {R_038000_RESOURCE0_WORD0, 0, 0, 0}, - {R_038004_RESOURCE0_WORD1, 0, 0, 0}, - {R_038008_RESOURCE0_WORD2, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, - {R_03800C_RESOURCE0_WORD3, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, + {R_038000_RESOURCE0_WORD0, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, + {R_038004_RESOURCE0_WORD1, REG_FLAG_NEED_BO, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), 0xFFFFFFFF}, + {R_038008_RESOURCE0_WORD2, 0, 0, 0}, + {R_03800C_RESOURCE0_WORD3, 0, 0, 0}, {R_038010_RESOURCE0_WORD4, 0, 0, 0}, {R_038014_RESOURCE0_WORD5, 0, 0, 0}, {R_038018_RESOURCE0_WORD6, 0, 0, 0}, }; unsigned nreg = Elements(r600_shader_resource); - for (int i = 0; i < nreg; i++) { - r600_shader_resource[i].offset += offset; - } - return r600_context_add_block(ctx, r600_shader_resource, nreg, PKT3_SET_RESOURCE, R600_RESOURCE_OFFSET); + return r600_resource_init(ctx, range, offset, nblocks, stride, r600_shader_resource, nreg, R600_RESOURCE_OFFSET); } /* SHADER SAMPLER R600/R700 */ @@ -630,6 +675,22 @@ static void r600_context_clear_fenced_bo(struct r600_context *ctx) } } +static void r600_free_resource_range(struct r600_context *ctx, struct r600_range *range, int nblocks) +{ + struct r600_block *block; + int i; + for (i = 0; i < nblocks; i++) { + block = range->blocks[i]; + if (block) { + for (int k = 1; k <= block->nbo; k++) + r600_bo_reference(ctx->radeon, &block->reloc[k].bo, NULL); + free(block); + } + } + free(range->blocks); + +} + /* initialize */ void r600_context_fini(struct r600_context *ctx) { @@ -654,6 +715,9 @@ void r600_context_fini(struct r600_context *ctx) } free(ctx->range[i].blocks); } + r600_free_resource_range(ctx, &ctx->ps_resources, ctx->num_ps_resources); + r600_free_resource_range(ctx, &ctx->vs_resources, ctx->num_vs_resources); + r600_free_resource_range(ctx, &ctx->fs_resources, ctx->num_fs_resources); free(ctx->range); free(ctx->blocks); free(ctx->reloc); @@ -664,13 +728,26 @@ void r600_context_fini(struct r600_context *ctx) memset(ctx, 0, sizeof(struct r600_context)); } +static void r600_add_resource_block(struct r600_context *ctx, struct r600_range *range, int num_blocks, int *index) +{ + int c = *index; + for (int j = 0; j < num_blocks; j++) { + if (!range->blocks[j]) + continue; + + ctx->blocks[c++] = range->blocks[j]; + } + *index = c; +} + int r600_setup_block_table(struct r600_context *ctx) { /* setup block table */ + int c = 0; ctx->blocks = calloc(ctx->nblocks, sizeof(void*)); if (!ctx->blocks) return -ENOMEM; - for (int i = 0, c = 0; i < NUM_RANGES; i++) { + for (int i = 0; i < NUM_RANGES; i++) { if (!ctx->range[i].blocks) continue; for (int j = 0, add; j < (1 << HASH_SHIFT); j++) { @@ -691,6 +768,10 @@ int r600_setup_block_table(struct r600_context *ctx) } } } + + r600_add_resource_block(ctx, &ctx->ps_resources, ctx->num_ps_resources, &c); + r600_add_resource_block(ctx, &ctx->vs_resources, ctx->num_vs_resources, &c); + r600_add_resource_block(ctx, &ctx->fs_resources, ctx->num_fs_resources, &c); return 0; } @@ -702,6 +783,11 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) ctx->radeon = radeon; LIST_INITHEAD(&ctx->query_list); + /* init dirty list */ + LIST_INITHEAD(&ctx->dirty); + LIST_INITHEAD(&ctx->resource_dirty); + LIST_INITHEAD(&ctx->enable_list); + ctx->range = calloc(NUM_RANGES, sizeof(struct r600_range)); if (!ctx->range) { r = -ENOMEM; @@ -747,24 +833,19 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) if (r) goto out_err; } - /* PS RESOURCE */ - for (int j = 0, offset = 0; j < 160; j++, offset += 0x1C) { - r = r600_state_resource_init(ctx, offset); - if (r) - goto out_err; - } - /* VS RESOURCE */ - for (int j = 0, offset = 0x1180; j < 160; j++, offset += 0x1C) { - r = r600_state_resource_init(ctx, offset); - if (r) - goto out_err; - } - /* FS RESOURCE */ - for (int j = 0, offset = 0x2300; j < 16; j++, offset += 0x1C) { - r = r600_state_resource_init(ctx, offset); - if (r) - goto out_err; - } + + ctx->num_ps_resources = 160; + ctx->num_vs_resources = 160; + ctx->num_fs_resources = 16; + r = r600_resource_range_init(ctx, &ctx->ps_resources, 0, 160, 0x1c); + if (r) + goto out_err; + r = r600_resource_range_init(ctx, &ctx->vs_resources, 0x1180, 160, 0x1c); + if (r) + goto out_err; + r = r600_resource_range_init(ctx, &ctx->fs_resources, 0x2300, 16, 0x1c); + if (r) + goto out_err; /* PS loop const */ r600_loop_const_init(ctx, 0); @@ -800,9 +881,6 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon) LIST_INITHEAD(&ctx->fenced_bo); - /* init dirty list */ - LIST_INITHEAD(&ctx->dirty); - ctx->max_db = 4; return 0; @@ -874,16 +952,9 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, bo->last_flush = (bo->last_flush | flush_flags) & flush_mask; } -void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *rbo) +void r600_context_get_reloc(struct r600_context *ctx, struct r600_bo *rbo) { - struct radeon_bo *bo; - - bo = rbo->bo; - assert(bo != NULL); - if (bo->reloc) { - *pm4 = bo->reloc_id; - return; - } + struct radeon_bo *bo = rbo->bo; bo->reloc = &ctx->reloc[ctx->creloc]; bo->reloc_id = ctx->creloc * sizeof(struct r600_reloc) / 4; ctx->reloc[ctx->creloc].handle = bo->handle; @@ -893,8 +964,6 @@ void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *r radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->creloc], bo); rbo->fence = ctx->radeon->fence; ctx->creloc++; - /* set PKT3 to point to proper reloc */ - *pm4 = bo->reloc_id; } void r600_context_reg(struct r600_context *ctx, @@ -920,20 +989,24 @@ void r600_context_reg(struct r600_context *ctx, dirty |= R600_BLOCK_STATUS_DIRTY; block->reg[id] = new_val; } - r600_context_dirty_block(ctx, block, dirty, id); + if (dirty) + r600_context_dirty_block(ctx, block, dirty, id); } -void r600_context_dirty_block(struct r600_context *ctx, struct r600_block *block, +void r600_context_dirty_block(struct r600_context *ctx, + struct r600_block *block, int dirty, int index) { - if (dirty && (index + 1) > block->nreg_dirty) + if ((index + 1) > block->nreg_dirty) block->nreg_dirty = index + 1; if ((dirty != (block->status & R600_BLOCK_STATUS_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) { - - block->status |= R600_BLOCK_STATUS_ENABLED; block->status |= R600_BLOCK_STATUS_DIRTY; ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; + if (!(block->status & R600_BLOCK_STATUS_ENABLED)) { + block->status |= R600_BLOCK_STATUS_ENABLED; + LIST_ADDTAIL(&block->enable_list, &ctx->enable_list); + } LIST_ADDTAIL(&block->list,&ctx->dirty); } } @@ -970,103 +1043,121 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat dirty |= R600_BLOCK_STATUS_DIRTY; } - r600_context_dirty_block(ctx, block, dirty, id); + if (dirty) + r600_context_dirty_block(ctx, block, dirty, id); } } -void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) +static void r600_context_dirty_resource_block(struct r600_context *ctx, + struct r600_block *block, + int dirty, int index) +{ + block->nreg_dirty = index + 1; + + if ((dirty != (block->status & R600_BLOCK_STATUS_RESOURCE_DIRTY)) || !(block->status & R600_BLOCK_STATUS_ENABLED)) { + block->status |= R600_BLOCK_STATUS_RESOURCE_DIRTY; + ctx->pm4_dirty_cdwords += block->pm4_ndwords + block->pm4_flush_ndwords; + if (!(block->status & R600_BLOCK_STATUS_ENABLED)) { + block->status |= R600_BLOCK_STATUS_ENABLED; + LIST_ADDTAIL(&block->enable_list, &ctx->enable_list); + } + LIST_ADDTAIL(&block->list,&ctx->resource_dirty); + } +} + +void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, struct r600_block *block) { - struct r600_range *range; - struct r600_block *block; - int i; int dirty; int num_regs = ctx->radeon->chip_class >= EVERGREEN ? 8 : 7; + boolean is_vertex; - range = &ctx->range[CTX_RANGE_ID(offset)]; - block = range->blocks[CTX_BLOCK_ID(offset)]; if (state == NULL) { - block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); + block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_RESOURCE_DIRTY); if (block->reloc[1].bo) block->reloc[1].bo->bo->binding &= ~BO_BOUND_TEXTURE; r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL); - r600_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL); + r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL); LIST_DELINIT(&block->list); + LIST_DELINIT(&block->enable_list); return; } - dirty = block->status & R600_BLOCK_STATUS_DIRTY; + is_vertex = ((state->val[num_regs-1] & 0xc0000000) == 0xc0000000); + dirty = block->status & R600_BLOCK_STATUS_RESOURCE_DIRTY; - for (i = 0; i < num_regs; i++) { - if (block->reg[i] != state->regs[i].value) { - dirty |= R600_BLOCK_STATUS_DIRTY; - block->reg[i] = state->regs[i].value; - } + if (memcmp(block->reg, state->val, num_regs*4)) { + memcpy(block->reg, state->val, num_regs * 4); + dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; } /* if no BOs on block, force dirty */ if (!block->reloc[1].bo || !block->reloc[2].bo) - dirty |= R600_BLOCK_STATUS_DIRTY; + dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; if (!dirty) { - if (state->regs[0].bo) { - if ((block->reloc[1].bo->bo->handle != state->regs[0].bo->bo->handle) || - (block->reloc[2].bo->bo->handle != state->regs[0].bo->bo->handle)) - dirty |= R600_BLOCK_STATUS_DIRTY; + if (is_vertex) { + if (block->reloc[1].bo->bo->handle != state->bo[0]->bo->handle) + dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; } else { - if ((block->reloc[1].bo->bo->handle != state->regs[2].bo->bo->handle) || - (block->reloc[2].bo->bo->handle != state->regs[3].bo->bo->handle)) - dirty |= R600_BLOCK_STATUS_DIRTY; + if ((block->reloc[1].bo->bo->handle != state->bo[0]->bo->handle) || + (block->reloc[2].bo->bo->handle != state->bo[1]->bo->handle)) + dirty |= R600_BLOCK_STATUS_RESOURCE_DIRTY; } } if (!dirty) { - if (state->regs[0].bo) - state->regs[0].bo->fence = ctx->radeon->fence; + if (is_vertex) + state->bo[0]->fence = ctx->radeon->fence; else { - state->regs[2].bo->fence = ctx->radeon->fence; - state->regs[3].bo->fence = ctx->radeon->fence; + state->bo[0]->fence = ctx->radeon->fence; + state->bo[1]->fence = ctx->radeon->fence; } } else { - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL); - if (state->regs[0].bo) { + if (is_vertex) { /* VERTEX RESOURCE, we preted there is 2 bo to relocate so * we have single case btw VERTEX & TEXTURE resource */ - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo); - state->regs[0].bo->fence = ctx->radeon->fence; + r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]); + r600_bo_reference(ctx->radeon, &block->reloc[2].bo, NULL); + state->bo[0]->fence = ctx->radeon->fence; } else { /* TEXTURE RESOURCE */ - r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo); - r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo); - state->regs[2].bo->fence = ctx->radeon->fence; - state->regs[3].bo->fence = ctx->radeon->fence; - state->regs[2].bo->bo->binding |= BO_BOUND_TEXTURE; + r600_bo_reference(ctx->radeon, &block->reloc[1].bo, state->bo[0]); + r600_bo_reference(ctx->radeon, &block->reloc[2].bo, state->bo[1]); + state->bo[0]->fence = ctx->radeon->fence; + state->bo[1]->fence = ctx->radeon->fence; + state->bo[0]->bo->binding |= BO_BOUND_TEXTURE; } } - r600_context_dirty_block(ctx, block, dirty, num_regs - 1); + if (dirty) { + if (is_vertex) + block->status |= R600_BLOCK_STATUS_RESOURCE_VERTEX; + else + block->status &= ~R600_BLOCK_STATUS_RESOURCE_VERTEX; + + r600_context_dirty_resource_block(ctx, block, dirty, num_regs - 1); + } } -void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) { - unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1C * rid; + struct r600_block *block = ctx->ps_resources.blocks[rid]; - r600_context_pipe_state_set_resource(ctx, state, offset); + r600_context_pipe_state_set_resource(ctx, state, block); } -void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) { - unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1180 + 0x1C * rid; + struct r600_block *block = ctx->vs_resources.blocks[rid]; - r600_context_pipe_state_set_resource(ctx, state, offset); + r600_context_pipe_state_set_resource(ctx, state, block); } -void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) { - unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x2300 + 0x1C * rid; + struct r600_block *block = ctx->fs_resources.blocks[rid]; - r600_context_pipe_state_set_resource(ctx, state, offset); + r600_context_pipe_state_set_resource(ctx, state, block); } static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) @@ -1081,6 +1172,7 @@ static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); LIST_DELINIT(&block->list); + LIST_DELINIT(&block->enable_list); return; } dirty = block->status & R600_BLOCK_STATUS_DIRTY; @@ -1091,7 +1183,8 @@ static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, } } - r600_context_dirty_block(ctx, block, dirty, 2); + if (dirty) + r600_context_dirty_block(ctx, block, dirty, 2); } static inline void r600_context_ps_partial_flush(struct r600_context *ctx) @@ -1117,6 +1210,7 @@ static inline void r600_context_pipe_state_set_sampler_border(struct r600_contex if (state == NULL) { block->status &= ~(R600_BLOCK_STATUS_ENABLED | R600_BLOCK_STATUS_DIRTY); LIST_DELINIT(&block->list); + LIST_DELINIT(&block->enable_list); return; } if (state->nregs <= 3) { @@ -1135,8 +1229,8 @@ static inline void r600_context_pipe_state_set_sampler_border(struct r600_contex * will end up using the new border color. */ if (dirty & R600_BLOCK_STATUS_DIRTY) r600_context_ps_partial_flush(ctx); - - r600_context_dirty_block(ctx, block, dirty, 3); + if (dirty) + r600_context_dirty_block(ctx, block, dirty, 3); } void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) @@ -1179,33 +1273,39 @@ void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block * { int id; int optional = block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS); - int cp_dwords = block->pm4_ndwords, start_dword; - int new_dwords; + int cp_dwords = block->pm4_ndwords, start_dword = 0; + int new_dwords = 0; + int nbo = block->nbo; if (block->nreg_dirty == 0 && optional) { goto out; } - optional &= (block->nreg_dirty != block->nreg); - - ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH; - for (int j = 0; j < block->nreg; j++) { - if (block->pm4_bo_index[j]) { - /* find relocation */ - id = block->pm4_bo_index[j]; - if (block->reloc[id].bo) { - r600_context_bo_reloc(ctx, - &block->pm4[block->reloc[id].bo_pm4_index], - block->reloc[id].bo); - r600_context_bo_flush(ctx, - block->reloc[id].flush_flags, - block->reloc[id].flush_mask, - block->reloc[id].bo); + if (nbo) { + ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH; + + for (int j = 0; j < block->nreg; j++) { + if (block->pm4_bo_index[j]) { + /* find relocation */ + id = block->pm4_bo_index[j]; + if (block->reloc[id].bo) { + r600_context_bo_reloc(ctx, + &block->pm4[block->reloc[id].bo_pm4_index], + block->reloc[id].bo); + r600_context_bo_flush(ctx, + block->reloc[id].flush_flags, + block->reloc[id].flush_mask, + block->reloc[id].bo); + } + nbo--; + if (nbo == 0) + break; } } + ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; } - ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; + optional &= (block->nreg_dirty != block->nreg); if (optional) { new_dwords = block->nreg_dirty; start_dword = ctx->pm4_cdwords; @@ -1228,6 +1328,42 @@ out: LIST_DELINIT(&block->list); } +void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r600_block *block) +{ + int id; + int cp_dwords = block->pm4_ndwords; + int nbo = block->nbo; + + ctx->flags |= R600_CONTEXT_CHECK_EVENT_FLUSH; + + if (block->status & R600_BLOCK_STATUS_RESOURCE_VERTEX) { + nbo = 1; + cp_dwords -= 2; /* don't copy the second NOP */ + } + + for (int j = 0; j < nbo; j++) { + if (block->pm4_bo_index[j]) { + /* find relocation */ + id = block->pm4_bo_index[j]; + r600_context_bo_reloc(ctx, + &block->pm4[block->reloc[id].bo_pm4_index], + block->reloc[id].bo); + r600_context_bo_flush(ctx, + block->reloc[id].flush_flags, + block->reloc[id].flush_mask, + block->reloc[id].bo); + } + } + ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; + + memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, cp_dwords * 4); + ctx->pm4_cdwords += cp_dwords; + + block->status ^= R600_BLOCK_STATUS_RESOURCE_DIRTY; + block->nreg_dirty = 0; + LIST_DELINIT(&block->list); +} + void r600_context_flush_dest_caches(struct r600_context *ctx) { struct r600_bo *cb[8]; @@ -1270,6 +1406,7 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) unsigned ndwords = 7; struct r600_block *dirty_block = NULL; struct r600_block *next_block; + uint32_t *pm4; if (draw->indices) { ndwords = 11; @@ -1311,25 +1448,32 @@ void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) r600_context_block_emit_dirty(ctx, dirty_block); } + LIST_FOR_EACH_ENTRY_SAFE(dirty_block, next_block, &ctx->resource_dirty, list) { + r600_context_block_resource_emit_dirty(ctx, dirty_block); + } + /* draw packet */ - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances; + pm4 = &ctx->pm4[ctx->pm4_cdwords]; + + pm4[0] = PKT3(PKT3_INDEX_TYPE, 0, ctx->predicate_drawing); + pm4[1] = draw->vgt_index_type; + pm4[2] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing); + pm4[3] = draw->vgt_num_instances; if (draw->indices) { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset + r600_bo_offset(draw->indices); - ctx->pm4[ctx->pm4_cdwords++] = 0; - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = 0; - r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices); + pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); + pm4[5] = draw->indices_bo_offset + r600_bo_offset(draw->indices); + pm4[6] = 0; + pm4[7] = draw->vgt_num_indices; + pm4[8] = draw->vgt_draw_initiator; + pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); + pm4[10] = 0; + r600_context_bo_reloc(ctx, &pm4[10], draw->indices); } else { - ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; - ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; + pm4[4] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, ctx->predicate_drawing); + pm4[5] = draw->vgt_num_indices; + pm4[6] = draw->vgt_draw_initiator; } + ctx->pm4_cdwords += ndwords; ctx->flags |= (R600_CONTEXT_DST_CACHES_DIRTY | R600_CONTEXT_DRAW_PENDING); @@ -1344,6 +1488,7 @@ void r600_context_flush(struct r600_context *ctx) uint64_t chunk_array[2]; unsigned fence; int r; + struct r600_block *enable_block = NULL; if (!ctx->pm4_cdwords) return; @@ -1417,15 +1562,21 @@ void r600_context_flush(struct r600_context *ctx) /* set all valid group as dirty so they get reemited on * next draw command */ - for (int i = 0; i < ctx->nblocks; i++) { - if (ctx->blocks[i]->status & R600_BLOCK_STATUS_ENABLED) { - if(!(ctx->blocks[i]->status & R600_BLOCK_STATUS_DIRTY)) { - LIST_ADDTAIL(&ctx->blocks[i]->list,&ctx->dirty); + LIST_FOR_EACH_ENTRY(enable_block, &ctx->enable_list, enable_list) { + if (!(enable_block->flags & BLOCK_FLAG_RESOURCE)) { + if(!(enable_block->status & R600_BLOCK_STATUS_DIRTY)) { + LIST_ADDTAIL(&enable_block->list,&ctx->dirty); + enable_block->status |= R600_BLOCK_STATUS_DIRTY; + } + } else { + if(!(enable_block->status & R600_BLOCK_STATUS_RESOURCE_DIRTY)) { + LIST_ADDTAIL(&enable_block->list,&ctx->resource_dirty); + enable_block->status |= R600_BLOCK_STATUS_RESOURCE_DIRTY; } - ctx->pm4_dirty_cdwords += ctx->blocks[i]->pm4_ndwords + ctx->blocks[i]->pm4_flush_ndwords; - ctx->blocks[i]->status |= R600_BLOCK_STATUS_DIRTY; - ctx->blocks[i]->nreg_dirty = ctx->blocks[i]->nreg; } + ctx->pm4_dirty_cdwords += enable_block->pm4_ndwords + + enable_block->pm4_flush_ndwords; + enable_block->nreg_dirty = enable_block->nreg; } } @@ -1548,7 +1699,8 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu if (!results) return FALSE; - size = query->num_results * (query->type == PIPE_QUERY_OCCLUSION_COUNTER ? ctx->max_db : 1); + /* query->num_results contains how many dwords were used for the query */ + size = query->num_results; for (i = 0; i < size; i += 4) { start = (u64)results[i] | (u64)results[i + 1] << 32; end = (u64)results[i + 2] | (u64)results[i + 3] << 32; @@ -1586,7 +1738,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) } if (query->type == PIPE_QUERY_OCCLUSION_COUNTER && - num_backends > 0 && num_backends < ctx->max_db) { + num_backends > 0) { /* as per info on ZPASS the driver must set the unusued DB top bits */ u32 *results; int i; @@ -1594,7 +1746,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) results = r600_bo_map(ctx->radeon, query->buffer, PB_USAGE_DONTBLOCK | PB_USAGE_CPU_WRITE, NULL); if (results) { memset(results + (query->num_results * 4), 0, ctx->max_db * 4 * 4); - + for (i = num_backends; i < ctx->max_db; i++) { results[(i * 4)+1] = 0x80000000; results[(i * 4)+3] = 0x80000000; @@ -1602,7 +1754,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) r600_bo_unmap(ctx->radeon, query->buffer); } } - + /* emit begin query */ if (query->type == PIPE_QUERY_TIME_ELAPSED) { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h index 9be5c358f85..45bc64fcf9a 100644 --- a/src/gallium/winsys/r600/drm/r600_priv.h +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -62,10 +62,13 @@ struct radeon { pipe_mutex bo_handles_mutex; }; +/* these flags are used in register flags and added into block flags */ #define REG_FLAG_NEED_BO 1 #define REG_FLAG_DIRTY_ALWAYS 2 #define REG_FLAG_RV6XX_SBU 4 #define REG_FLAG_NOT_R600 8 +#define REG_FLAG_ENABLE_ALWAYS 16 +#define BLOCK_FLAG_RESOURCE 32 struct r600_reg { unsigned offset; @@ -94,7 +97,8 @@ struct radeon_bo { }; struct r600_bo { - struct pipe_reference reference; + struct pipe_reference reference; /* this must be the first member for the r600_bo_reference inline to work */ + /* DO NOT MOVE THIS ^ */ unsigned size; unsigned tiling_flags; unsigned kernel_pitch; @@ -152,14 +156,15 @@ int radeon_bo_fixed_map(struct radeon *radeon, struct radeon_bo *bo); * r600_hw_context.c */ int r600_context_init_fence(struct r600_context *ctx); -void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *rbo); +void r600_context_get_reloc(struct r600_context *ctx, struct r600_bo *rbo); void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, unsigned flush_mask, struct r600_bo *rbo); struct r600_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned offset); int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg, unsigned opcode, unsigned offset_base); -void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset); +void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, struct r600_block *block); void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block); +void r600_context_block_resource_emit_dirty(struct r600_context *ctx, struct r600_block *block); void r600_context_dirty_block(struct r600_context *ctx, struct r600_block *block, int dirty, int index); int r600_setup_block_table(struct r600_context *ctx); @@ -167,6 +172,21 @@ void r600_context_reg(struct r600_context *ctx, unsigned offset, unsigned value, unsigned mask); void r600_init_cs(struct r600_context *ctx); +int r600_resource_init(struct r600_context *ctx, struct r600_range *range, unsigned offset, unsigned nblocks, unsigned stride, struct r600_reg *reg, int nreg, unsigned offset_base); + +static INLINE void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct r600_bo *rbo) +{ + struct radeon_bo *bo = rbo->bo; + + assert(bo != NULL); + + if (!bo->reloc) + r600_context_get_reloc(ctx, rbo); + + /* set PKT3 to point to proper reloc */ + *pm4 = bo->reloc_id; +} + /* * r600_bo.c */ @@ -177,7 +197,7 @@ void r600_bo_destroy(struct radeon *radeon, struct r600_bo *bo); */ struct r600_bomgr *r600_bomgr_create(struct radeon *radeon, unsigned usecs); void r600_bomgr_destroy(struct r600_bomgr *mgr); -bool r600_bomgr_bo_destroy(struct r600_bomgr *mgr, struct r600_bo *bo); +boolean r600_bomgr_bo_destroy(struct r600_bomgr *mgr, struct r600_bo *bo); void r600_bomgr_bo_init(struct r600_bomgr *mgr, struct r600_bo *bo); struct r600_bo *r600_bomgr_bo_create(struct r600_bomgr *mgr, unsigned size, @@ -210,7 +230,7 @@ static inline void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo) /* * fence */ -static inline bool fence_is_after(unsigned fence, unsigned ofence) +static inline boolean fence_is_after(unsigned fence, unsigned ofence) { /* handle wrap around */ if (fence < 0x80000000 && ofence > 0x80000000) diff --git a/src/gallium/winsys/r600/drm/radeon_pciid.c b/src/gallium/winsys/r600/drm/radeon_pciid.c index 5c41a10bdba..f54a7c8fe72 100644 --- a/src/gallium/winsys/r600/drm/radeon_pciid.c +++ b/src/gallium/winsys/r600/drm/radeon_pciid.c @@ -33,487 +33,8 @@ struct pci_id { }; static const struct pci_id radeon_pci_id[] = { - {0x1002, 0x3150, CHIP_RV380}, - {0x1002, 0x3152, CHIP_RV380}, - {0x1002, 0x3154, CHIP_RV380}, - {0x1002, 0x3E50, CHIP_RV380}, - {0x1002, 0x3E54, CHIP_RV380}, - {0x1002, 0x4136, CHIP_RS100}, - {0x1002, 0x4137, CHIP_RS200}, - {0x1002, 0x4144, CHIP_R300}, - {0x1002, 0x4145, CHIP_R300}, - {0x1002, 0x4146, CHIP_R300}, - {0x1002, 0x4147, CHIP_R300}, - {0x1002, 0x4148, CHIP_R350}, - {0x1002, 0x4149, CHIP_R350}, - {0x1002, 0x414A, CHIP_R350}, - {0x1002, 0x414B, CHIP_R350}, - {0x1002, 0x4150, CHIP_RV350}, - {0x1002, 0x4151, CHIP_RV350}, - {0x1002, 0x4152, CHIP_RV350}, - {0x1002, 0x4153, CHIP_RV350}, - {0x1002, 0x4154, CHIP_RV350}, - {0x1002, 0x4155, CHIP_RV350}, - {0x1002, 0x4156, CHIP_RV350}, - {0x1002, 0x4237, CHIP_RS200}, - {0x1002, 0x4242, CHIP_R200}, - {0x1002, 0x4243, CHIP_R200}, - {0x1002, 0x4336, CHIP_RS100}, - {0x1002, 0x4337, CHIP_RS200}, - {0x1002, 0x4437, CHIP_RS200}, - {0x1002, 0x4966, CHIP_RV250}, - {0x1002, 0x4967, CHIP_RV250}, - {0x1002, 0x4A48, CHIP_R420}, - {0x1002, 0x4A49, CHIP_R420}, - {0x1002, 0x4A4A, CHIP_R420}, - {0x1002, 0x4A4B, CHIP_R420}, - {0x1002, 0x4A4C, CHIP_R420}, - {0x1002, 0x4A4D, CHIP_R420}, - {0x1002, 0x4A4E, CHIP_R420}, - {0x1002, 0x4A4F, CHIP_R420}, - {0x1002, 0x4A50, CHIP_R420}, - {0x1002, 0x4A54, CHIP_R420}, - {0x1002, 0x4B48, CHIP_R420}, - {0x1002, 0x4B49, CHIP_R420}, - {0x1002, 0x4B4A, CHIP_R420}, - {0x1002, 0x4B4B, CHIP_R420}, - {0x1002, 0x4B4C, CHIP_R420}, - {0x1002, 0x4C57, CHIP_RV200}, - {0x1002, 0x4C58, CHIP_RV200}, - {0x1002, 0x4C59, CHIP_RV100}, - {0x1002, 0x4C5A, CHIP_RV100}, - {0x1002, 0x4C64, CHIP_RV250}, - {0x1002, 0x4C66, CHIP_RV250}, - {0x1002, 0x4C67, CHIP_RV250}, - {0x1002, 0x4E44, CHIP_R300}, - {0x1002, 0x4E45, CHIP_R300}, - {0x1002, 0x4E46, CHIP_R300}, - {0x1002, 0x4E47, CHIP_R300}, - {0x1002, 0x4E48, CHIP_R350}, - {0x1002, 0x4E49, CHIP_R350}, - {0x1002, 0x4E4A, CHIP_R350}, - {0x1002, 0x4E4B, CHIP_R350}, - {0x1002, 0x4E50, CHIP_RV350}, - {0x1002, 0x4E51, CHIP_RV350}, - {0x1002, 0x4E52, CHIP_RV350}, - {0x1002, 0x4E53, CHIP_RV350}, - {0x1002, 0x4E54, CHIP_RV350}, - {0x1002, 0x4E56, CHIP_RV350}, - {0x1002, 0x5144, CHIP_R100}, - {0x1002, 0x5145, CHIP_R100}, - {0x1002, 0x5146, CHIP_R100}, - {0x1002, 0x5147, CHIP_R100}, - {0x1002, 0x5148, CHIP_R200}, - {0x1002, 0x514C, CHIP_R200}, - {0x1002, 0x514D, CHIP_R200}, - {0x1002, 0x5157, CHIP_RV200}, - {0x1002, 0x5158, CHIP_RV200}, - {0x1002, 0x5159, CHIP_RV100}, - {0x1002, 0x515A, CHIP_RV100}, - {0x1002, 0x515E, CHIP_RV100}, - {0x1002, 0x5460, CHIP_RV380}, - {0x1002, 0x5462, CHIP_RV380}, - {0x1002, 0x5464, CHIP_RV380}, - {0x1002, 0x5657, CHIP_RV380}, - {0x1002, 0x5548, CHIP_R423}, - {0x1002, 0x5549, CHIP_R423}, - {0x1002, 0x554A, CHIP_R423}, - {0x1002, 0x554B, CHIP_R423}, - {0x1002, 0x554C, CHIP_R423}, - {0x1002, 0x554D, CHIP_R423}, - {0x1002, 0x554E, CHIP_R423}, - {0x1002, 0x554F, CHIP_R423}, - {0x1002, 0x5550, CHIP_R423}, - {0x1002, 0x5551, CHIP_R423}, - {0x1002, 0x5552, CHIP_R423}, - {0x1002, 0x5554, CHIP_R423}, - {0x1002, 0x564A, CHIP_RV410}, - {0x1002, 0x564B, CHIP_RV410}, - {0x1002, 0x564F, CHIP_RV410}, - {0x1002, 0x5652, CHIP_RV410}, - {0x1002, 0x5653, CHIP_RV410}, - {0x1002, 0x5834, CHIP_RS300}, - {0x1002, 0x5835, CHIP_RS300}, - {0x1002, 0x5954, CHIP_RS480}, - {0x1002, 0x5955, CHIP_RS480}, - {0x1002, 0x5974, CHIP_RS480}, - {0x1002, 0x5975, CHIP_RS480}, - {0x1002, 0x5960, CHIP_RV280}, - {0x1002, 0x5961, CHIP_RV280}, - {0x1002, 0x5962, CHIP_RV280}, - {0x1002, 0x5964, CHIP_RV280}, - {0x1002, 0x5965, CHIP_RV280}, - {0x1002, 0x5969, CHIP_RV100}, - {0x1002, 0x5a41, CHIP_RS400}, - {0x1002, 0x5a42, CHIP_RS400}, - {0x1002, 0x5a61, CHIP_RS400}, - {0x1002, 0x5a62, CHIP_RS400}, - {0x1002, 0x5b60, CHIP_RV380}, - {0x1002, 0x5b62, CHIP_RV380}, - {0x1002, 0x5b63, CHIP_RV380}, - {0x1002, 0x5b64, CHIP_RV380}, - {0x1002, 0x5b65, CHIP_RV380}, - {0x1002, 0x5c61, CHIP_RV280}, - {0x1002, 0x5c63, CHIP_RV280}, - {0x1002, 0x5d48, CHIP_R423}, - {0x1002, 0x5d49, CHIP_R423}, - {0x1002, 0x5d4a, CHIP_R423}, - {0x1002, 0x5d4c, CHIP_R423}, - {0x1002, 0x5d4d, CHIP_R423}, - {0x1002, 0x5d4e, CHIP_R423}, - {0x1002, 0x5d4f, CHIP_R423}, - {0x1002, 0x5d50, CHIP_R423}, - {0x1002, 0x5d52, CHIP_R423}, - {0x1002, 0x5d57, CHIP_R423}, - {0x1002, 0x5e48, CHIP_RV410}, - {0x1002, 0x5e4a, CHIP_RV410}, - {0x1002, 0x5e4b, CHIP_RV410}, - {0x1002, 0x5e4c, CHIP_RV410}, - {0x1002, 0x5e4d, CHIP_RV410}, - {0x1002, 0x5e4f, CHIP_RV410}, - {0x1002, 0x6880, CHIP_CYPRESS}, - {0x1002, 0x6888, CHIP_CYPRESS}, - {0x1002, 0x6889, CHIP_CYPRESS}, - {0x1002, 0x688A, CHIP_CYPRESS}, - {0x1002, 0x6898, CHIP_CYPRESS}, - {0x1002, 0x6899, CHIP_CYPRESS}, - {0x1002, 0x689b, CHIP_CYPRESS}, - {0x1002, 0x689c, CHIP_HEMLOCK}, - {0x1002, 0x689d, CHIP_HEMLOCK}, - {0x1002, 0x689e, CHIP_CYPRESS}, - {0x1002, 0x68a0, CHIP_JUNIPER}, - {0x1002, 0x68a1, CHIP_JUNIPER}, - {0x1002, 0x68a8, CHIP_JUNIPER}, - {0x1002, 0x68a9, CHIP_JUNIPER}, - {0x1002, 0x68b0, CHIP_JUNIPER}, - {0x1002, 0x68b8, CHIP_JUNIPER}, - {0x1002, 0x68b9, CHIP_JUNIPER}, - {0x1002, 0x68ba, CHIP_JUNIPER}, - {0x1002, 0x68be, CHIP_JUNIPER}, - {0x1002, 0x68bf, CHIP_JUNIPER}, - {0x1002, 0x68c0, CHIP_REDWOOD}, - {0x1002, 0x68c1, CHIP_REDWOOD}, - {0x1002, 0x68c8, CHIP_REDWOOD}, - {0x1002, 0x68c9, CHIP_REDWOOD}, - {0x1002, 0x68d8, CHIP_REDWOOD}, - {0x1002, 0x68d9, CHIP_REDWOOD}, - {0x1002, 0x68da, CHIP_REDWOOD}, - {0x1002, 0x68de, CHIP_REDWOOD}, - {0x1002, 0x68e0, CHIP_CEDAR}, - {0x1002, 0x68e1, CHIP_CEDAR}, - {0x1002, 0x68e4, CHIP_CEDAR}, - {0x1002, 0x68e5, CHIP_CEDAR}, - {0x1002, 0x68e8, CHIP_CEDAR}, - {0x1002, 0x68e9, CHIP_CEDAR}, - {0x1002, 0x68f1, CHIP_CEDAR}, - {0x1002, 0x68f2, CHIP_CEDAR}, - {0x1002, 0x68f8, CHIP_CEDAR}, - {0x1002, 0x68f9, CHIP_CEDAR}, - {0x1002, 0x68fe, CHIP_CEDAR}, - {0x1002, 0x7100, CHIP_R520}, - {0x1002, 0x7101, CHIP_R520}, - {0x1002, 0x7102, CHIP_R520}, - {0x1002, 0x7103, CHIP_R520}, - {0x1002, 0x7104, CHIP_R520}, - {0x1002, 0x7105, CHIP_R520}, - {0x1002, 0x7106, CHIP_R520}, - {0x1002, 0x7108, CHIP_R520}, - {0x1002, 0x7109, CHIP_R520}, - {0x1002, 0x710A, CHIP_R520}, - {0x1002, 0x710B, CHIP_R520}, - {0x1002, 0x710C, CHIP_R520}, - {0x1002, 0x710E, CHIP_R520}, - {0x1002, 0x710F, CHIP_R520}, - {0x1002, 0x7140, CHIP_RV515}, - {0x1002, 0x7141, CHIP_RV515}, - {0x1002, 0x7142, CHIP_RV515}, - {0x1002, 0x7143, CHIP_RV515}, - {0x1002, 0x7144, CHIP_RV515}, - {0x1002, 0x7145, CHIP_RV515}, - {0x1002, 0x7146, CHIP_RV515}, - {0x1002, 0x7147, CHIP_RV515}, - {0x1002, 0x7149, CHIP_RV515}, - {0x1002, 0x714A, CHIP_RV515}, - {0x1002, 0x714B, CHIP_RV515}, - {0x1002, 0x714C, CHIP_RV515}, - {0x1002, 0x714D, CHIP_RV515}, - {0x1002, 0x714E, CHIP_RV515}, - {0x1002, 0x714F, CHIP_RV515}, - {0x1002, 0x7151, CHIP_RV515}, - {0x1002, 0x7152, CHIP_RV515}, - {0x1002, 0x7153, CHIP_RV515}, - {0x1002, 0x715E, CHIP_RV515}, - {0x1002, 0x715F, CHIP_RV515}, - {0x1002, 0x7180, CHIP_RV515}, - {0x1002, 0x7181, CHIP_RV515}, - {0x1002, 0x7183, CHIP_RV515}, - {0x1002, 0x7186, CHIP_RV515}, - {0x1002, 0x7187, CHIP_RV515}, - {0x1002, 0x7188, CHIP_RV515}, - {0x1002, 0x718A, CHIP_RV515}, - {0x1002, 0x718B, CHIP_RV515}, - {0x1002, 0x718C, CHIP_RV515}, - {0x1002, 0x718D, CHIP_RV515}, - {0x1002, 0x718F, CHIP_RV515}, - {0x1002, 0x7193, CHIP_RV515}, - {0x1002, 0x7196, CHIP_RV515}, - {0x1002, 0x719B, CHIP_RV515}, - {0x1002, 0x719F, CHIP_RV515}, - {0x1002, 0x71C0, CHIP_RV530}, - {0x1002, 0x71C1, CHIP_RV530}, - {0x1002, 0x71C2, CHIP_RV530}, - {0x1002, 0x71C3, CHIP_RV530}, - {0x1002, 0x71C4, CHIP_RV530}, - {0x1002, 0x71C5, CHIP_RV530}, - {0x1002, 0x71C6, CHIP_RV530}, - {0x1002, 0x71C7, CHIP_RV530}, - {0x1002, 0x71CD, CHIP_RV530}, - {0x1002, 0x71CE, CHIP_RV530}, - {0x1002, 0x71D2, CHIP_RV530}, - {0x1002, 0x71D4, CHIP_RV530}, - {0x1002, 0x71D5, CHIP_RV530}, - {0x1002, 0x71D6, CHIP_RV530}, - {0x1002, 0x71DA, CHIP_RV530}, - {0x1002, 0x71DE, CHIP_RV530}, - {0x1002, 0x7200, CHIP_RV515}, - {0x1002, 0x7210, CHIP_RV515}, - {0x1002, 0x7211, CHIP_RV515}, - {0x1002, 0x7240, CHIP_R580}, - {0x1002, 0x7243, CHIP_R580}, - {0x1002, 0x7244, CHIP_R580}, - {0x1002, 0x7245, CHIP_R580}, - {0x1002, 0x7246, CHIP_R580}, - {0x1002, 0x7247, CHIP_R580}, - {0x1002, 0x7248, CHIP_R580}, - {0x1002, 0x7249, CHIP_R580}, - {0x1002, 0x724A, CHIP_R580}, - {0x1002, 0x724B, CHIP_R580}, - {0x1002, 0x724C, CHIP_R580}, - {0x1002, 0x724D, CHIP_R580}, - {0x1002, 0x724E, CHIP_R580}, - {0x1002, 0x724F, CHIP_R580}, - {0x1002, 0x7280, CHIP_RV570}, - {0x1002, 0x7281, CHIP_RV560}, - {0x1002, 0x7283, CHIP_RV560}, - {0x1002, 0x7284, CHIP_R580}, - {0x1002, 0x7287, CHIP_RV560}, - {0x1002, 0x7288, CHIP_RV570}, - {0x1002, 0x7289, CHIP_RV570}, - {0x1002, 0x728B, CHIP_RV570}, - {0x1002, 0x728C, CHIP_RV570}, - {0x1002, 0x7290, CHIP_RV560}, - {0x1002, 0x7291, CHIP_RV560}, - {0x1002, 0x7293, CHIP_RV560}, - {0x1002, 0x7297, CHIP_RV560}, - {0x1002, 0x7834, CHIP_RS300}, - {0x1002, 0x7835, CHIP_RS300}, - {0x1002, 0x791e, CHIP_RS690}, - {0x1002, 0x791f, CHIP_RS690}, - {0x1002, 0x793f, CHIP_RS600}, - {0x1002, 0x7941, CHIP_RS600}, - {0x1002, 0x7942, CHIP_RS600}, - {0x1002, 0x796c, CHIP_RS740}, - {0x1002, 0x796d, CHIP_RS740}, - {0x1002, 0x796e, CHIP_RS740}, - {0x1002, 0x796f, CHIP_RS740}, - {0x1002, 0x9400, CHIP_R600}, - {0x1002, 0x9401, CHIP_R600}, - {0x1002, 0x9402, CHIP_R600}, - {0x1002, 0x9403, CHIP_R600}, - {0x1002, 0x9405, CHIP_R600}, - {0x1002, 0x940A, CHIP_R600}, - {0x1002, 0x940B, CHIP_R600}, - {0x1002, 0x940F, CHIP_R600}, - {0x1002, 0x94A0, CHIP_RV740}, - {0x1002, 0x94A1, CHIP_RV740}, - {0x1002, 0x94A3, CHIP_RV740}, - {0x1002, 0x94B1, CHIP_RV740}, - {0x1002, 0x94B3, CHIP_RV740}, - {0x1002, 0x94B4, CHIP_RV740}, - {0x1002, 0x94B5, CHIP_RV740}, - {0x1002, 0x94B9, CHIP_RV740}, - {0x1002, 0x9440, CHIP_RV770}, - {0x1002, 0x9441, CHIP_RV770}, - {0x1002, 0x9442, CHIP_RV770}, - {0x1002, 0x9443, CHIP_RV770}, - {0x1002, 0x9444, CHIP_RV770}, - {0x1002, 0x9446, CHIP_RV770}, - {0x1002, 0x944A, CHIP_RV770}, - {0x1002, 0x944B, CHIP_RV770}, - {0x1002, 0x944C, CHIP_RV770}, - {0x1002, 0x944E, CHIP_RV770}, - {0x1002, 0x9450, CHIP_RV770}, - {0x1002, 0x9452, CHIP_RV770}, - {0x1002, 0x9456, CHIP_RV770}, - {0x1002, 0x945A, CHIP_RV770}, - {0x1002, 0x945B, CHIP_RV770}, - {0x1002, 0x9460, CHIP_RV770}, - {0x1002, 0x9462, CHIP_RV770}, - {0x1002, 0x946A, CHIP_RV770}, - {0x1002, 0x946B, CHIP_RV770}, - {0x1002, 0x947A, CHIP_RV770}, - {0x1002, 0x947B, CHIP_RV770}, - {0x1002, 0x9480, CHIP_RV730}, - {0x1002, 0x9487, CHIP_RV730}, - {0x1002, 0x9488, CHIP_RV730}, - {0x1002, 0x9489, CHIP_RV730}, - {0x1002, 0x948F, CHIP_RV730}, - {0x1002, 0x9490, CHIP_RV730}, - {0x1002, 0x9491, CHIP_RV730}, - {0x1002, 0x9495, CHIP_RV730}, - {0x1002, 0x9498, CHIP_RV730}, - {0x1002, 0x949C, CHIP_RV730}, - {0x1002, 0x949E, CHIP_RV730}, - {0x1002, 0x949F, CHIP_RV730}, - {0x1002, 0x94C0, CHIP_RV610}, - {0x1002, 0x94C1, CHIP_RV610}, - {0x1002, 0x94C3, CHIP_RV610}, - {0x1002, 0x94C4, CHIP_RV610}, - {0x1002, 0x94C5, CHIP_RV610}, - {0x1002, 0x94C6, CHIP_RV610}, - {0x1002, 0x94C7, CHIP_RV610}, - {0x1002, 0x94C8, CHIP_RV610}, - {0x1002, 0x94C9, CHIP_RV610}, - {0x1002, 0x94CB, CHIP_RV610}, - {0x1002, 0x94CC, CHIP_RV610}, - {0x1002, 0x94CD, CHIP_RV610}, - {0x1002, 0x9500, CHIP_RV670}, - {0x1002, 0x9501, CHIP_RV670}, - {0x1002, 0x9504, CHIP_RV670}, - {0x1002, 0x9505, CHIP_RV670}, - {0x1002, 0x9506, CHIP_RV670}, - {0x1002, 0x9507, CHIP_RV670}, - {0x1002, 0x9508, CHIP_RV670}, - {0x1002, 0x9509, CHIP_RV670}, - {0x1002, 0x950F, CHIP_RV670}, - {0x1002, 0x9511, CHIP_RV670}, - {0x1002, 0x9515, CHIP_RV670}, - {0x1002, 0x9517, CHIP_RV670}, - {0x1002, 0x9519, CHIP_RV670}, - {0x1002, 0x9540, CHIP_RV710}, - {0x1002, 0x9541, CHIP_RV710}, - {0x1002, 0x9542, CHIP_RV710}, - {0x1002, 0x954E, CHIP_RV710}, - {0x1002, 0x954F, CHIP_RV710}, - {0x1002, 0x9552, CHIP_RV710}, - {0x1002, 0x9553, CHIP_RV710}, - {0x1002, 0x9555, CHIP_RV710}, - {0x1002, 0x9557, CHIP_RV710}, - {0x1002, 0x9580, CHIP_RV630}, - {0x1002, 0x9581, CHIP_RV630}, - {0x1002, 0x9583, CHIP_RV630}, - {0x1002, 0x9586, CHIP_RV630}, - {0x1002, 0x9587, CHIP_RV630}, - {0x1002, 0x9588, CHIP_RV630}, - {0x1002, 0x9589, CHIP_RV630}, - {0x1002, 0x958A, CHIP_RV630}, - {0x1002, 0x958B, CHIP_RV630}, - {0x1002, 0x958C, CHIP_RV630}, - {0x1002, 0x958D, CHIP_RV630}, - {0x1002, 0x958E, CHIP_RV630}, - {0x1002, 0x958F, CHIP_RV630}, - {0x1002, 0x9590, CHIP_RV635}, - {0x1002, 0x9591, CHIP_RV635}, - {0x1002, 0x9593, CHIP_RV635}, - {0x1002, 0x9595, CHIP_RV635}, - {0x1002, 0x9596, CHIP_RV635}, - {0x1002, 0x9597, CHIP_RV635}, - {0x1002, 0x9598, CHIP_RV635}, - {0x1002, 0x9599, CHIP_RV635}, - {0x1002, 0x959B, CHIP_RV635}, - {0x1002, 0x95C0, CHIP_RV620}, - {0x1002, 0x95C2, CHIP_RV620}, - {0x1002, 0x95C4, CHIP_RV620}, - {0x1002, 0x95C5, CHIP_RV620}, - {0x1002, 0x95C6, CHIP_RV620}, - {0x1002, 0x95C7, CHIP_RV620}, - {0x1002, 0x95C9, CHIP_RV620}, - {0x1002, 0x95CC, CHIP_RV620}, - {0x1002, 0x95CD, CHIP_RV620}, - {0x1002, 0x95CE, CHIP_RV620}, - {0x1002, 0x95CF, CHIP_RV620}, - {0x1002, 0x9610, CHIP_RS780}, - {0x1002, 0x9611, CHIP_RS780}, - {0x1002, 0x9612, CHIP_RS780}, - {0x1002, 0x9613, CHIP_RS780}, - {0x1002, 0x9614, CHIP_RS780}, - {0x1002, 0x9615, CHIP_RS780}, - {0x1002, 0x9616, CHIP_RS780}, - {0x1002, 0x9640, CHIP_SUMO}, - {0x1002, 0x9641, CHIP_SUMO}, - {0x1002, 0x9642, CHIP_SUMO2}, - {0x1002, 0x9643, CHIP_SUMO2}, - {0x1002, 0x9644, CHIP_SUMO2}, - {0x1002, 0x9645, CHIP_SUMO2}, - {0x1002, 0x9647, CHIP_SUMO}, - {0x1002, 0x9648, CHIP_SUMO}, - {0x1002, 0x964a, CHIP_SUMO}, - {0x1002, 0x964e, CHIP_SUMO}, - {0x1002, 0x964f, CHIP_SUMO}, - {0x1002, 0x9710, CHIP_RS880}, - {0x1002, 0x9711, CHIP_RS880}, - {0x1002, 0x9712, CHIP_RS880}, - {0x1002, 0x9713, CHIP_RS880}, - {0x1002, 0x9714, CHIP_RS880}, - {0x1002, 0x9715, CHIP_RS880}, - {0x1002, 0x9802, CHIP_PALM}, - {0x1002, 0x9803, CHIP_PALM}, - {0x1002, 0x9804, CHIP_PALM}, - {0x1002, 0x9805, CHIP_PALM}, - {0x1002, 0x9806, CHIP_PALM}, - {0x1002, 0x9807, CHIP_PALM}, - {0x1002, 0x6700, CHIP_CAYMAN}, - {0x1002, 0x6701, CHIP_CAYMAN}, - {0x1002, 0x6702, CHIP_CAYMAN}, - {0x1002, 0x6703, CHIP_CAYMAN}, - {0x1002, 0x6704, CHIP_CAYMAN}, - {0x1002, 0x6705, CHIP_CAYMAN}, - {0x1002, 0x6706, CHIP_CAYMAN}, - {0x1002, 0x6707, CHIP_CAYMAN}, - {0x1002, 0x6708, CHIP_CAYMAN}, - {0x1002, 0x6709, CHIP_CAYMAN}, - {0x1002, 0x6718, CHIP_CAYMAN}, - {0x1002, 0x6719, CHIP_CAYMAN}, - {0x1002, 0x671C, CHIP_CAYMAN}, - {0x1002, 0x671D, CHIP_CAYMAN}, - {0x1002, 0x671F, CHIP_CAYMAN}, - {0x1002, 0x6720, CHIP_BARTS}, - {0x1002, 0x6721, CHIP_BARTS}, - {0x1002, 0x6722, CHIP_BARTS}, - {0x1002, 0x6723, CHIP_BARTS}, - {0x1002, 0x6724, CHIP_BARTS}, - {0x1002, 0x6725, CHIP_BARTS}, - {0x1002, 0x6726, CHIP_BARTS}, - {0x1002, 0x6727, CHIP_BARTS}, - {0x1002, 0x6728, CHIP_BARTS}, - {0x1002, 0x6729, CHIP_BARTS}, - {0x1002, 0x6738, CHIP_BARTS}, - {0x1002, 0x6739, CHIP_BARTS}, - {0x1002, 0x673e, CHIP_BARTS}, - {0x1002, 0x6740, CHIP_TURKS}, - {0x1002, 0x6741, CHIP_TURKS}, - {0x1002, 0x6742, CHIP_TURKS}, - {0x1002, 0x6743, CHIP_TURKS}, - {0x1002, 0x6744, CHIP_TURKS}, - {0x1002, 0x6745, CHIP_TURKS}, - {0x1002, 0x6746, CHIP_TURKS}, - {0x1002, 0x6747, CHIP_TURKS}, - {0x1002, 0x6748, CHIP_TURKS}, - {0x1002, 0x6749, CHIP_TURKS}, - {0x1002, 0x6750, CHIP_TURKS}, - {0x1002, 0x6758, CHIP_TURKS}, - {0x1002, 0x6759, CHIP_TURKS}, - {0x1002, 0x6760, CHIP_CAICOS}, - {0x1002, 0x6761, CHIP_CAICOS}, - {0x1002, 0x6762, CHIP_CAICOS}, - {0x1002, 0x6763, CHIP_CAICOS}, - {0x1002, 0x6764, CHIP_CAICOS}, - {0x1002, 0x6765, CHIP_CAICOS}, - {0x1002, 0x6766, CHIP_CAICOS}, - {0x1002, 0x6767, CHIP_CAICOS}, - {0x1002, 0x6768, CHIP_CAICOS}, - {0x1002, 0x6770, CHIP_CAICOS}, - {0x1002, 0x6779, CHIP_CAICOS}, +#define CHIPSET(chip, name, family) { 0x1002, chip, CHIP_##family }, +#include "pci_ids/r600_pci_ids.h" {0, 0}, }; diff --git a/src/gallium/winsys/radeon/drm/Makefile b/src/gallium/winsys/radeon/drm/Makefile index d44b7c14250..913e6ad186a 100644 --- a/src/gallium/winsys/radeon/drm/Makefile +++ b/src/gallium/winsys/radeon/drm/Makefile @@ -9,7 +9,8 @@ C_SOURCES = \ radeon_drm_cs.c \ radeon_drm_winsys.c -LIBRARY_INCLUDES = $(shell pkg-config libdrm --cflags-only-I) +LIBRARY_INCLUDES = -I$(TOP)/include \ + $(shell pkg-config libdrm --cflags-only-I) include ../../../Makefile.template diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_public.h b/src/gallium/winsys/radeon/drm/radeon_drm_public.h index 76d9dda422d..4fc62f1a400 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_public.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_public.h @@ -7,216 +7,4 @@ struct radeon_winsys; struct radeon_winsys *radeon_drm_winsys_create(int fd); -static INLINE boolean is_r3xx(int pciid) -{ - switch (pciid) { - case 0x4144: /* PCI_CHIP_R300_AD */ - case 0x4145: /* PCI_CHIP_R300_AE */ - case 0x4146: /* PCI_CHIP_R300_AF */ - case 0x4147: /* PCI_CHIP_R300_AG */ - case 0x4E44: /* PCI_CHIP_R300_ND */ - case 0x4E45: /* PCI_CHIP_R300_NE */ - case 0x4E46: /* PCI_CHIP_R300_NF */ - case 0x4E47: /* PCI_CHIP_R300_NG */ - case 0x4E48: /* PCI_CHIP_R350_NH */ - case 0x4E49: /* PCI_CHIP_R350_NI */ - case 0x4E4B: /* PCI_CHIP_R350_NK */ - case 0x4148: /* PCI_CHIP_R350_AH */ - case 0x4149: /* PCI_CHIP_R350_AI */ - case 0x414A: /* PCI_CHIP_R350_AJ */ - case 0x414B: /* PCI_CHIP_R350_AK */ - case 0x4E4A: /* PCI_CHIP_R360_NJ */ - case 0x4150: /* PCI_CHIP_RV350_AP */ - case 0x4151: /* PCI_CHIP_RV350_AQ */ - case 0x4152: /* PCI_CHIP_RV350_AR */ - case 0x4153: /* PCI_CHIP_RV350_AS */ - case 0x4154: /* PCI_CHIP_RV350_AT */ - case 0x4155: /* PCI_CHIP_RV350_AU */ - case 0x4156: /* PCI_CHIP_RV350_AV */ - case 0x4E50: /* PCI_CHIP_RV350_NP */ - case 0x4E51: /* PCI_CHIP_RV350_NQ */ - case 0x4E52: /* PCI_CHIP_RV350_NR */ - case 0x4E53: /* PCI_CHIP_RV350_NS */ - case 0x4E54: /* PCI_CHIP_RV350_NT */ - case 0x4E56: /* PCI_CHIP_RV350_NV */ - case 0x5460: /* PCI_CHIP_RV370_5460 */ - case 0x5462: /* PCI_CHIP_RV370_5462 */ - case 0x5464: /* PCI_CHIP_RV370_5464 */ - case 0x5B60: /* PCI_CHIP_RV370_5B60 */ - case 0x5B62: /* PCI_CHIP_RV370_5B62 */ - case 0x5B63: /* PCI_CHIP_RV370_5B63 */ - case 0x5B64: /* PCI_CHIP_RV370_5B64 */ - case 0x5B65: /* PCI_CHIP_RV370_5B65 */ - case 0x3150: /* PCI_CHIP_RV380_3150 */ - case 0x3152: /* PCI_CHIP_RV380_3152 */ - case 0x3154: /* PCI_CHIP_RV380_3154 */ - case 0x3155: /* PCI_CHIP_RV380_3155 */ - case 0x3E50: /* PCI_CHIP_RV380_3E50 */ - case 0x3E54: /* PCI_CHIP_RV380_3E54 */ - case 0x4A48: /* PCI_CHIP_R420_JH */ - case 0x4A49: /* PCI_CHIP_R420_JI */ - case 0x4A4A: /* PCI_CHIP_R420_JJ */ - case 0x4A4B: /* PCI_CHIP_R420_JK */ - case 0x4A4C: /* PCI_CHIP_R420_JL */ - case 0x4A4D: /* PCI_CHIP_R420_JM */ - case 0x4A4E: /* PCI_CHIP_R420_JN */ - case 0x4A4F: /* PCI_CHIP_R420_JO */ - case 0x4A50: /* PCI_CHIP_R420_JP */ - case 0x4A54: /* PCI_CHIP_R420_JT */ - case 0x5548: /* PCI_CHIP_R423_UH */ - case 0x5549: /* PCI_CHIP_R423_UI */ - case 0x554A: /* PCI_CHIP_R423_UJ */ - case 0x554B: /* PCI_CHIP_R423_UK */ - case 0x5550: /* PCI_CHIP_R423_5550 */ - case 0x5551: /* PCI_CHIP_R423_UQ */ - case 0x5552: /* PCI_CHIP_R423_UR */ - case 0x5554: /* PCI_CHIP_R423_UT */ - case 0x5D57: /* PCI_CHIP_R423_5D57 */ - case 0x554C: /* PCI_CHIP_R430_554C */ - case 0x554D: /* PCI_CHIP_R430_554D */ - case 0x554E: /* PCI_CHIP_R430_554E */ - case 0x554F: /* PCI_CHIP_R430_554F */ - case 0x5D48: /* PCI_CHIP_R430_5D48 */ - case 0x5D49: /* PCI_CHIP_R430_5D49 */ - case 0x5D4A: /* PCI_CHIP_R430_5D4A */ - case 0x5D4C: /* PCI_CHIP_R480_5D4C */ - case 0x5D4D: /* PCI_CHIP_R480_5D4D */ - case 0x5D4E: /* PCI_CHIP_R480_5D4E */ - case 0x5D4F: /* PCI_CHIP_R480_5D4F */ - case 0x5D50: /* PCI_CHIP_R480_5D50 */ - case 0x5D52: /* PCI_CHIP_R480_5D52 */ - case 0x4B49: /* PCI_CHIP_R481_4B49 */ - case 0x4B4A: /* PCI_CHIP_R481_4B4A */ - case 0x4B4B: /* PCI_CHIP_R481_4B4B */ - case 0x4B4C: /* PCI_CHIP_R481_4B4C */ - case 0x564A: /* PCI_CHIP_RV410_564A */ - case 0x564B: /* PCI_CHIP_RV410_564B */ - case 0x564F: /* PCI_CHIP_RV410_564F */ - case 0x5652: /* PCI_CHIP_RV410_5652 */ - case 0x5653: /* PCI_CHIP_RV410_5653 */ - case 0x5657: /* PCI_CHIP_RV410_5657 */ - case 0x5E48: /* PCI_CHIP_RV410_5E48 */ - case 0x5E4A: /* PCI_CHIP_RV410_5E4A */ - case 0x5E4B: /* PCI_CHIP_RV410_5E4B */ - case 0x5E4C: /* PCI_CHIP_RV410_5E4C */ - case 0x5E4D: /* PCI_CHIP_RV410_5E4D */ - case 0x5E4F: /* PCI_CHIP_RV410_5E4F */ - case 0x5A41: /* PCI_CHIP_RS400_5A41 */ - case 0x5A42: /* PCI_CHIP_RS400_5A42 */ - case 0x5A61: /* PCI_CHIP_RC410_5A61 */ - case 0x5A62: /* PCI_CHIP_RC410_5A62 */ - case 0x5954: /* PCI_CHIP_RS480_5954 */ - case 0x5955: /* PCI_CHIP_RS480_5955 */ - case 0x5974: /* PCI_CHIP_RS482_5974 */ - case 0x5975: /* PCI_CHIP_RS482_5975 */ - case 0x7100: /* PCI_CHIP_R520_7100 */ - case 0x7101: /* PCI_CHIP_R520_7101 */ - case 0x7102: /* PCI_CHIP_R520_7102 */ - case 0x7103: /* PCI_CHIP_R520_7103 */ - case 0x7104: /* PCI_CHIP_R520_7104 */ - case 0x7105: /* PCI_CHIP_R520_7105 */ - case 0x7106: /* PCI_CHIP_R520_7106 */ - case 0x7108: /* PCI_CHIP_R520_7108 */ - case 0x7109: /* PCI_CHIP_R520_7109 */ - case 0x710A: /* PCI_CHIP_R520_710A */ - case 0x710B: /* PCI_CHIP_R520_710B */ - case 0x710C: /* PCI_CHIP_R520_710C */ - case 0x710E: /* PCI_CHIP_R520_710E */ - case 0x710F: /* PCI_CHIP_R520_710F */ - case 0x7140: /* PCI_CHIP_RV515_7140 */ - case 0x7141: /* PCI_CHIP_RV515_7141 */ - case 0x7142: /* PCI_CHIP_RV515_7142 */ - case 0x7143: /* PCI_CHIP_RV515_7143 */ - case 0x7144: /* PCI_CHIP_RV515_7144 */ - case 0x7145: /* PCI_CHIP_RV515_7145 */ - case 0x7146: /* PCI_CHIP_RV515_7146 */ - case 0x7147: /* PCI_CHIP_RV515_7147 */ - case 0x7149: /* PCI_CHIP_RV515_7149 */ - case 0x714A: /* PCI_CHIP_RV515_714A */ - case 0x714B: /* PCI_CHIP_RV515_714B */ - case 0x714C: /* PCI_CHIP_RV515_714C */ - case 0x714D: /* PCI_CHIP_RV515_714D */ - case 0x714E: /* PCI_CHIP_RV515_714E */ - case 0x714F: /* PCI_CHIP_RV515_714F */ - case 0x7151: /* PCI_CHIP_RV515_7151 */ - case 0x7152: /* PCI_CHIP_RV515_7152 */ - case 0x7153: /* PCI_CHIP_RV515_7153 */ - case 0x715E: /* PCI_CHIP_RV515_715E */ - case 0x715F: /* PCI_CHIP_RV515_715F */ - case 0x7180: /* PCI_CHIP_RV515_7180 */ - case 0x7181: /* PCI_CHIP_RV515_7181 */ - case 0x7183: /* PCI_CHIP_RV515_7183 */ - case 0x7186: /* PCI_CHIP_RV515_7186 */ - case 0x7187: /* PCI_CHIP_RV515_7187 */ - case 0x7188: /* PCI_CHIP_RV515_7188 */ - case 0x718A: /* PCI_CHIP_RV515_718A */ - case 0x718B: /* PCI_CHIP_RV515_718B */ - case 0x718C: /* PCI_CHIP_RV515_718C */ - case 0x718D: /* PCI_CHIP_RV515_718D */ - case 0x718F: /* PCI_CHIP_RV515_718F */ - case 0x7193: /* PCI_CHIP_RV515_7193 */ - case 0x7196: /* PCI_CHIP_RV515_7196 */ - case 0x719B: /* PCI_CHIP_RV515_719B */ - case 0x719F: /* PCI_CHIP_RV515_719F */ - case 0x7200: /* PCI_CHIP_RV515_7200 */ - case 0x7210: /* PCI_CHIP_RV515_7210 */ - case 0x7211: /* PCI_CHIP_RV515_7211 */ - case 0x71C0: /* PCI_CHIP_RV530_71C0 */ - case 0x71C1: /* PCI_CHIP_RV530_71C1 */ - case 0x71C2: /* PCI_CHIP_RV530_71C2 */ - case 0x71C3: /* PCI_CHIP_RV530_71C3 */ - case 0x71C4: /* PCI_CHIP_RV530_71C4 */ - case 0x71C5: /* PCI_CHIP_RV530_71C5 */ - case 0x71C6: /* PCI_CHIP_RV530_71C6 */ - case 0x71C7: /* PCI_CHIP_RV530_71C7 */ - case 0x71CD: /* PCI_CHIP_RV530_71CD */ - case 0x71CE: /* PCI_CHIP_RV530_71CE */ - case 0x71D2: /* PCI_CHIP_RV530_71D2 */ - case 0x71D4: /* PCI_CHIP_RV530_71D4 */ - case 0x71D5: /* PCI_CHIP_RV530_71D5 */ - case 0x71D6: /* PCI_CHIP_RV530_71D6 */ - case 0x71DA: /* PCI_CHIP_RV530_71DA */ - case 0x71DE: /* PCI_CHIP_RV530_71DE */ - case 0x7281: /* PCI_CHIP_RV560_7281 */ - case 0x7283: /* PCI_CHIP_RV560_7283 */ - case 0x7287: /* PCI_CHIP_RV560_7287 */ - case 0x7290: /* PCI_CHIP_RV560_7290 */ - case 0x7291: /* PCI_CHIP_RV560_7291 */ - case 0x7293: /* PCI_CHIP_RV560_7293 */ - case 0x7297: /* PCI_CHIP_RV560_7297 */ - case 0x7280: /* PCI_CHIP_RV570_7280 */ - case 0x7288: /* PCI_CHIP_RV570_7288 */ - case 0x7289: /* PCI_CHIP_RV570_7289 */ - case 0x728B: /* PCI_CHIP_RV570_728B */ - case 0x728C: /* PCI_CHIP_RV570_728C */ - case 0x7240: /* PCI_CHIP_R580_7240 */ - case 0x7243: /* PCI_CHIP_R580_7243 */ - case 0x7244: /* PCI_CHIP_R580_7244 */ - case 0x7245: /* PCI_CHIP_R580_7245 */ - case 0x7246: /* PCI_CHIP_R580_7246 */ - case 0x7247: /* PCI_CHIP_R580_7247 */ - case 0x7248: /* PCI_CHIP_R580_7248 */ - case 0x7249: /* PCI_CHIP_R580_7249 */ - case 0x724A: /* PCI_CHIP_R580_724A */ - case 0x724B: /* PCI_CHIP_R580_724B */ - case 0x724C: /* PCI_CHIP_R580_724C */ - case 0x724D: /* PCI_CHIP_R580_724D */ - case 0x724E: /* PCI_CHIP_R580_724E */ - case 0x724F: /* PCI_CHIP_R580_724F */ - case 0x7284: /* PCI_CHIP_R580_7284 */ - case 0x793F: /* PCI_CHIP_RS600_793F */ - case 0x7941: /* PCI_CHIP_RS600_7941 */ - case 0x7942: /* PCI_CHIP_RS600_7942 */ - case 0x791E: /* PCI_CHIP_RS690_791E */ - case 0x791F: /* PCI_CHIP_RS690_791F */ - case 0x796C: /* PCI_CHIP_RS740_796C */ - case 0x796D: /* PCI_CHIP_RS740_796D */ - case 0x796E: /* PCI_CHIP_RS740_796E */ - case 0x796F: /* PCI_CHIP_RS740_796F */ - return TRUE; - default: - return FALSE; - } -} - #endif diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 3ac57d25b5e..0474b381ade 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -270,8 +270,13 @@ struct radeon_winsys *radeon_drm_winsys_create(int fd) ws->fd = fd; do_ioctls(ws); - if (!is_r3xx(ws->pci_id)) { - goto fail; + switch (ws->pci_id) { +#define CHIPSET(pci_id, name, family) case pci_id: +#include "pci_ids/r300_pci_ids.h" +#undef CHIPSET + break; + default: + goto fail; } /* Create managers. */ |