diff options
Diffstat (limited to 'src')
132 files changed, 1399 insertions, 1845 deletions
diff --git a/src/compiler/Android.gen.mk b/src/compiler/Android.gen.mk index fcd5f94d459..96fc750ec64 100644 --- a/src/compiler/Android.gen.mk +++ b/src/compiler/Android.gen.mk @@ -42,6 +42,10 @@ LOCAL_EXPORT_C_INCLUDE_DIRS += \ LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \ $(NIR_GENERATED_FILES)) +# Modules using libmesa_nir must set LOCAL_GENERATED_SOURCES to this +MESA_GEN_NIR_H := $(addprefix $(call local-generated-sources-dir)/, \ + nir/nir_opcodes.h \ + nir/nir_builder_opcodes.h) nir_builder_opcodes_gen := $(LOCAL_PATH)/nir/nir_builder_opcodes_h.py nir_builder_opcodes_deps := \ diff --git a/src/compiler/glsl/Android.gen.mk b/src/compiler/glsl/Android.gen.mk index c5741b40bc5..de5cd0f474c 100644 --- a/src/compiler/glsl/Android.gen.mk +++ b/src/compiler/glsl/Android.gen.mk @@ -33,7 +33,7 @@ LOCAL_SRC_FILES := $(LOCAL_SRC_FILES) LOCAL_C_INCLUDES += \ $(intermediates)/glcpp \ - $(MESA_TOP)/src/glsl/glcpp \ + $(LOCAL_PATH)/glcpp \ LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \ $(LIBGLCPP_GENERATED_FILES) \ @@ -51,6 +51,8 @@ define glsl_local-y-to-c-and-h $(hide) $(YACC) -o $@ -p "glcpp_parser_" $< endef +YACC_HEADER_SUFFIX := .hpp + define local-yy-to-cpp-and-h @mkdir -p $(dir $@) @echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<" diff --git a/src/compiler/glsl/Android.mk b/src/compiler/glsl/Android.mk index 9cbb9a339a1..f5d96b300f0 100644 --- a/src/compiler/glsl/Android.mk +++ b/src/compiler/glsl/Android.mk @@ -36,7 +36,6 @@ include $(CLEAR_VARS) LOCAL_SRC_FILES := \ $(LIBGLCPP_FILES) \ $(LIBGLSL_FILES) \ - $(NIR_FILES) LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ @@ -67,7 +66,7 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/gallium/include \ $(MESA_TOP)/src/gallium/auxiliary -LOCAL_STATIC_LIBRARIES := libmesa_glsl libmesa_glsl_utils libmesa_util +LOCAL_STATIC_LIBRARIES := libmesa_glsl libmesa_glsl_utils libmesa_util libmesa_compiler LOCAL_MODULE_TAGS := eng LOCAL_MODULE := glsl_compiler diff --git a/src/compiler/glsl/ast_array_index.cpp b/src/compiler/glsl/ast_array_index.cpp index f5baeb9ea32..69322cf111f 100644 --- a/src/compiler/glsl/ast_array_index.cpp +++ b/src/compiler/glsl/ast_array_index.cpp @@ -236,14 +236,23 @@ _mesa_ast_array_index_to_hir(void *mem_ctx, _mesa_glsl_error(&loc, state, "unsized array index must be constant"); } } else if (array->type->without_array()->is_interface() - && (array->variable_referenced()->data.mode == ir_var_uniform || - array->variable_referenced()->data.mode == ir_var_shader_storage) - && !state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) { - /* Page 50 in section 4.3.9 of the OpenGL ES 3.10 spec says: - * - * "All indices used to index a uniform or shader storage block - * array must be constant integral expressions." - */ + && ((array->variable_referenced()->data.mode == ir_var_uniform + && !state->is_version(400, 320) + && !state->ARB_gpu_shader5_enable + && !state->EXT_gpu_shader5_enable + && !state->OES_gpu_shader5_enable) || + (array->variable_referenced()->data.mode == ir_var_shader_storage + && !state->is_version(400, 0) + && !state->ARB_gpu_shader5_enable))) { + /* Page 50 in section 4.3.9 of the OpenGL ES 3.10 spec says: + * + * "All indices used to index a uniform or shader storage block + * array must be constant integral expressions." + * + * But OES_gpu_shader5 (and ESSL 3.20) relax this to allow indexing + * on uniform blocks but not shader storage blocks. + * + */ _mesa_glsl_error(&loc, state, "%s block array index must be constant", array->variable_referenced()->data.mode == ir_var_uniform ? "uniform" : "shader storage"); @@ -279,7 +288,10 @@ _mesa_ast_array_index_to_hir(void *mem_ctx, * dynamically uniform expression is undefined. */ if (array->type->without_array()->is_sampler()) { - if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) { + if (!state->is_version(400, 320) && + !state->ARB_gpu_shader5_enable && + !state->EXT_gpu_shader5_enable && + !state->OES_gpu_shader5_enable) { if (state->is_version(130, 300)) _mesa_glsl_error(&loc, state, "sampler arrays indexed with non-constant " diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index 75abef6a8f9..db5ec9a4ad9 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -231,15 +231,14 @@ _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) static ir_expression_operation -get_conversion_operation(const glsl_type *to, const glsl_type *from, - struct _mesa_glsl_parse_state *state) +get_implicit_conversion_operation(const glsl_type *to, const glsl_type *from, + struct _mesa_glsl_parse_state *state) { switch (to->base_type) { case GLSL_TYPE_FLOAT: switch (from->base_type) { case GLSL_TYPE_INT: return ir_unop_i2f; case GLSL_TYPE_UINT: return ir_unop_u2f; - case GLSL_TYPE_DOUBLE: return ir_unop_d2f; default: return (ir_expression_operation)0; } @@ -311,7 +310,7 @@ apply_implicit_conversion(const glsl_type *to, ir_rvalue * &from, to = glsl_type::get_instance(to->base_type, from->type->vector_elements, from->type->matrix_columns); - ir_expression_operation op = get_conversion_operation(to, from->type, state); + ir_expression_operation op = get_implicit_conversion_operation(to, from->type, state); if (op) { from = new(ctx) ir_expression(op, to, from, NULL); return true; diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index bbb237a102c..0a0dcc68a05 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -149,12 +149,6 @@ v400_fs_only(const _mesa_glsl_parse_state *state) } static bool -es31(const _mesa_glsl_parse_state *state) -{ - return state->is_version(0, 310); -} - -static bool texture_rectangle(const _mesa_glsl_parse_state *state) { return state->ARB_texture_rectangle_enable; @@ -239,6 +233,21 @@ gpu_shader5(const _mesa_glsl_parse_state *state) } static bool +gpu_shader5_es(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 320) || + state->ARB_gpu_shader5_enable || + state->EXT_gpu_shader5_enable || + state->OES_gpu_shader5_enable; +} + +static bool +es31_not_gs5(const _mesa_glsl_parse_state *state) +{ + return state->is_version(0, 310) && !gpu_shader5_es(state); +} + +static bool gpu_shader5_or_es31(const _mesa_glsl_parse_state *state) { return state->is_version(400, 310) || state->ARB_gpu_shader5_enable; @@ -360,8 +369,10 @@ texture_gather_or_es31(const _mesa_glsl_parse_state *state) static bool texture_gather_only_or_es31(const _mesa_glsl_parse_state *state) { - return !state->is_version(400, 0) && + return !state->is_version(400, 320) && !state->ARB_gpu_shader5_enable && + !state->EXT_gpu_shader5_enable && + !state->OES_gpu_shader5_enable && (state->ARB_texture_gather_enable || state->is_version(0, 310)); } @@ -1070,10 +1081,10 @@ builtin_builder::create_builtins() #define FDGS5(NAME) \ add_function(#NAME, \ - _##NAME(gpu_shader5, glsl_type::float_type), \ - _##NAME(gpu_shader5, glsl_type::vec2_type), \ - _##NAME(gpu_shader5, glsl_type::vec3_type), \ - _##NAME(gpu_shader5, glsl_type::vec4_type), \ + _##NAME(gpu_shader5_es, glsl_type::float_type), \ + _##NAME(gpu_shader5_es, glsl_type::vec2_type), \ + _##NAME(gpu_shader5_es, glsl_type::vec3_type), \ + _##NAME(gpu_shader5_es, glsl_type::vec4_type), \ _##NAME(fp64, glsl_type::double_type), \ _##NAME(fp64, glsl_type::dvec2_type), \ _##NAME(fp64, glsl_type::dvec3_type), \ @@ -2534,62 +2545,62 @@ builtin_builder::create_builtins() _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), - _texture(ir_tg4, es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), - _texture(ir_tg4, es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31_not_gs5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31_not_gs5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31_not_gs5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), - _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), - _texture(ir_tg4, es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), - _texture(ir_tg4, es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31_not_gs5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31_not_gs5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31_not_gs5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5_es, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5_es, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5_es, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5_es, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5_es, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5_es, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_es, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_es, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_es, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_es, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_es, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_es, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5_es, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5_es, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), - _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tg4, es31_not_gs5, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, es31_not_gs5, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), NULL); add_function("textureGatherOffsets", - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5_es, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5_es, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5_es, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_es, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_es, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_es, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5_es, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5_es, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5_es, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_es, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_es, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_es, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), @@ -2599,8 +2610,8 @@ builtin_builder::create_builtins() _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5_es, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5_es, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), NULL); diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y index b03e1910758..5c38f86d333 100644 --- a/src/compiler/glsl/glcpp/glcpp-parse.y +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -2398,6 +2398,10 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio add_builtin_define(parser, "GL_OES_geometry_point_size", 1); add_builtin_define(parser, "GL_OES_geometry_shader", 1); } + if (extensions->ARB_gpu_shader5) { + add_builtin_define(parser, "GL_EXT_gpu_shader5", 1); + add_builtin_define(parser, "GL_OES_gpu_shader5", 1); + } } } } else { diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll index 9704fc7ac4f..1f122654340 100644 --- a/src/compiler/glsl/glsl_lexer.ll +++ b/src/compiler/glsl/glsl_lexer.ll @@ -361,7 +361,7 @@ samplerExternalOES { } /* keywords available with ARB_gpu_shader5 */ -precise KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_gpu_shader5_enable, PRECISE); +precise KEYWORD_WITH_ALT(400, 310, 400, 320, yyextra->ARB_gpu_shader5_enable || yyextra->EXT_gpu_shader5_enable || yyextra->OES_gpu_shader5_enable, PRECISE); /* keywords available with ARB_shader_image_load_store */ image1D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE1D); diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index 8ccbefc3f71..fe8b3bb2e79 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -608,6 +608,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(OES_EGL_image_external, false, true, OES_EGL_image_external), EXT(OES_geometry_point_size, false, true, OES_geometry_shader), EXT(OES_geometry_shader, false, true, OES_geometry_shader), + EXT(OES_gpu_shader5, false, true, ARB_gpu_shader5), EXT(OES_shader_image_atomic, false, true, ARB_shader_image_load_store), EXT(OES_standard_derivatives, false, true, OES_standard_derivatives), EXT(OES_texture_3D, false, true, dummy_true), @@ -622,6 +623,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(AMD_vertex_shader_viewport_index, true, false, AMD_vertex_shader_viewport_index), EXT(EXT_blend_func_extended, false, true, ARB_blend_func_extended), EXT(EXT_draw_buffers, false, true, dummy_true), + EXT(EXT_gpu_shader5, false, true, ARB_gpu_shader5), EXT(EXT_separate_shader_objects, false, true, dummy_true), EXT(EXT_shader_integer_mix, true, true, EXT_shader_integer_mix), EXT(EXT_shader_samples_identical, true, true, EXT_shader_samples_identical), diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h index 86ec057f288..74825a0bd35 100644 --- a/src/compiler/glsl/glsl_parser_extras.h +++ b/src/compiler/glsl/glsl_parser_extras.h @@ -593,6 +593,8 @@ struct _mesa_glsl_parse_state { bool OES_geometry_point_size_warn; bool OES_geometry_shader_enable; bool OES_geometry_shader_warn; + bool OES_gpu_shader5_enable; + bool OES_gpu_shader5_warn; bool OES_shader_image_atomic_enable; bool OES_shader_image_atomic_warn; bool OES_standard_derivatives_enable; @@ -618,6 +620,8 @@ struct _mesa_glsl_parse_state { bool EXT_blend_func_extended_warn; bool EXT_draw_buffers_enable; bool EXT_draw_buffers_warn; + bool EXT_gpu_shader5_enable; + bool EXT_gpu_shader5_warn; bool EXT_separate_shader_objects_enable; bool EXT_separate_shader_objects_warn; bool EXT_shader_integer_mix_enable; diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h index 93c893d36fe..f6ed16de0c3 100644 --- a/src/compiler/glsl/ir.h +++ b/src/compiler/glsl/ir.h @@ -1686,6 +1686,7 @@ public: operation == ir_binop_dot || operation == ir_binop_vector_extract || operation == ir_triop_vector_insert || + operation == ir_binop_ubo_load || operation == ir_quadop_vector; } diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index 5326bfd4d68..3039232162a 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -3180,7 +3180,6 @@ check_explicit_uniform_locations(struct gl_context *ctx, } } - exec_list_make_empty(&prog->EmptyUniformLocations); struct empty_uniform_block *current_block = NULL; for (unsigned i = 0; i < prog->NumUniformRemapTable; i++) { diff --git a/src/compiler/glsl/loop_unroll.cpp b/src/compiler/glsl/loop_unroll.cpp index aea2743cdb1..bc377dff3b9 100644 --- a/src/compiler/glsl/loop_unroll.cpp +++ b/src/compiler/glsl/loop_unroll.cpp @@ -315,11 +315,33 @@ loop_unroll_visitor::visit_leave(ir_loop *ir) return visit_continue; } - /* Don't try to unroll loops where the number of iterations is not known - * at compile-time. - */ - if (ls->limiting_terminator == NULL) + if (ls->limiting_terminator == NULL) { + ir_instruction *last_ir = + (ir_instruction *) ir->body_instructions.get_tail(); + + /* If a loop has no induction variable and the last instruction is + * a break, unroll the loop with a count of 1. This is the classic + * + * do { + * // ... + * } while (false) + * + * that is used to wrap multi-line macros. + * + * If num_loop_jumps is not zero, last_ir cannot be NULL... there has to + * be at least num_loop_jumps instructions in the loop. + */ + if (ls->num_loop_jumps == 1 && is_break(last_ir)) { + last_ir->remove(); + + simple_unroll(ir, 1); + } + + /* Don't try to unroll loops where the number of iterations is not known + * at compile-time. + */ return visit_continue; + } iterations = ls->limiting_terminator->iterations; diff --git a/src/egl/Android.mk b/src/egl/Android.mk index cf7125145ca..51a9202d9cb 100644 --- a/src/egl/Android.mk +++ b/src/egl/Android.mk @@ -44,10 +44,10 @@ LOCAL_CFLAGS := \ -DHAVE_ANDROID_PLATFORM ifeq ($(MESA_LOLLIPOP_BUILD),true) -LOCAL_CFLAGS_32 := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\" -LOCAL_CFLAGS_64 := -DDEFAULT_DRIVER_DIR=\"/system/lib64/dri\" +LOCAL_CFLAGS_32 := -DDEFAULT_DRIVER_DIR=\"/system/lib/$(MESA_DRI_MODULE_REL_PATH)\" +LOCAL_CFLAGS_64 := -DDEFAULT_DRIVER_DIR=\"/system/lib64/$(MESA_DRI_MODULE_REL_PATH)\" else -LOCAL_CFLAGS += -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\" +LOCAL_CFLAGS += -DDEFAULT_DRIVER_DIR=\"/system/lib/$(MESA_DRI_MODULE_REL_PATH)\" endif LOCAL_C_INCLUDES := \ diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c index c2438f7509b..341acb7ed1b 100644 --- a/src/egl/drivers/dri2/platform_wayland.c +++ b/src/egl/drivers/dri2/platform_wayland.c @@ -653,6 +653,37 @@ create_wl_buffer(struct dri2_egl_surface *dri2_surf) &wl_buffer_listener, dri2_surf); } +static EGLBoolean +try_damage_buffer(struct dri2_egl_surface *dri2_surf, + const EGLint *rects, + EGLint n_rects) +{ +/* The WL_SURFACE_DAMAGE_BUFFER_SINCE_VERSION macro and + * wl_proxy_get_version() were both introduced in wayland 1.10. + * Instead of bumping our wayland dependency we just make this + * function conditional on the required 1.10 features, falling + * back to old (correct but suboptimal) behaviour for older + * wayland. + */ +#ifdef WL_SURFACE_DAMAGE_BUFFER_SINCE_VERSION + int i; + + if (wl_proxy_get_version((struct wl_proxy *) dri2_surf->wl_win->surface) + < WL_SURFACE_DAMAGE_BUFFER_SINCE_VERSION) + return EGL_FALSE; + + for (i = 0; i < n_rects; i++) { + const int *rect = &rects[i * 4]; + + wl_surface_damage_buffer(dri2_surf->wl_win->surface, + rect[0], + dri2_surf->base.Height - rect[1] - rect[3], + rect[2], rect[3]); + } + return EGL_TRUE; +#endif + return EGL_FALSE; +} /** * Called via eglSwapBuffers(), drv->API.SwapBuffers(). */ @@ -703,10 +734,12 @@ dri2_wl_swap_buffers_with_damage(_EGLDriver *drv, dri2_surf->dx = 0; dri2_surf->dy = 0; - /* We deliberately ignore the damage region and post maximum damage, due to + /* If the compositor doesn't support damage_buffer, we deliberately + * ignore the damage region and post maximum damage, due to * https://bugs.freedesktop.org/78190 */ - wl_surface_damage(dri2_surf->wl_win->surface, - 0, 0, INT32_MAX, INT32_MAX); + if (!n_rects || !try_damage_buffer(dri2_surf, rects, n_rects)) + wl_surface_damage(dri2_surf->wl_win->surface, + 0, 0, INT32_MAX, INT32_MAX); if (dri2_dpy->is_different_gpu) { _EGLContext *ctx = _eglGetCurrentContext(); diff --git a/src/gallium/auxiliary/Android.mk b/src/gallium/auxiliary/Android.mk index 86430eb6a21..f5b5a0cccce 100644 --- a/src/gallium/auxiliary/Android.mk +++ b/src/gallium/auxiliary/Android.mk @@ -44,9 +44,9 @@ LOCAL_SRC_FILES += \ LOCAL_CPPFLAGS := -std=c++11 endif -# We need libmesa_glsl to get NIR's generated include directories. +# We need libmesa_nir to get NIR's generated include directories. LOCAL_MODULE := libmesa_gallium -LOCAL_STATIC_LIBRARIES += libmesa_glsl +LOCAL_STATIC_LIBRARIES += libmesa_nir # generate sources LOCAL_MODULE_CLASS := STATIC_LIBRARIES @@ -64,5 +64,7 @@ $(intermediates)/util/u_format_srgb.c: $(intermediates)/%.c: $(LOCAL_PATH)/%.py $(intermediates)/util/u_format_table.c: $(intermediates)/%.c: $(LOCAL_PATH)/%.py $(LOCAL_PATH)/util/u_format.csv $(transform-generated-source) +LOCAL_GENERATED_SOURCES += $(MESA_GEN_NIR_H) + include $(GALLIUM_COMMON_MK) include $(BUILD_STATIC_LIBRARY) diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 4f85d2fda67..8e24cc626bd 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -43,6 +43,15 @@ #include "tgsi/tgsi_scan.h" +static bool +is_memory_file(unsigned file) +{ + return file == TGSI_FILE_SAMPLER || + file == TGSI_FILE_SAMPLER_VIEW || + file == TGSI_FILE_IMAGE || + file == TGSI_FILE_BUFFER; +} + static void scan_instruction(struct tgsi_shader_info *info, @@ -50,6 +59,7 @@ scan_instruction(struct tgsi_shader_info *info, unsigned *current_depth) { unsigned i; + bool is_mem_inst = false; assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); info->opcode_count[fullinst->Instruction.Opcode]++; @@ -181,6 +191,9 @@ scan_instruction(struct tgsi_shader_info *info, info->is_msaa_sampler[src->Register.Index] = TRUE; } } + + if (is_memory_file(src->Register.File)) + is_mem_inst = true; } /* check for indirect register writes */ @@ -190,8 +203,14 @@ scan_instruction(struct tgsi_shader_info *info, info->indirect_files |= (1 << dst->Register.File); info->indirect_files_written |= (1 << dst->Register.File); } + + if (is_memory_file(dst->Register.File)) + is_mem_inst = true; } + if (is_mem_inst) + info->num_memory_instructions++; + info->num_instructions++; } diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index 0541255764c..d65dec71888 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -74,6 +74,7 @@ struct tgsi_shader_info uint immediate_count; /**< number of immediates declared */ uint num_instructions; + uint num_memory_instructions; /**< sampler, buffer, and image instructions */ uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ diff --git a/src/gallium/drivers/freedreno/Android.mk b/src/gallium/drivers/freedreno/Android.mk index ed51835e1fb..d25adb467e5 100644 --- a/src/gallium/drivers/freedreno/Android.mk +++ b/src/gallium/drivers/freedreno/Android.mk @@ -32,14 +32,16 @@ LOCAL_SRC_FILES := \ $(a4xx_SOURCES) \ $(ir3_SOURCES) -LOCAL_CFLAGS := \ - -Wno-packed-bitfield-compat +#LOCAL_CFLAGS := \ +# -Wno-packed-bitfield-compat LOCAL_C_INCLUDES := \ $(LOCAL_PATH)/ir3 +LOCAL_GENERATED_SOURCES := $(MESA_GEN_NIR_H) + LOCAL_SHARED_LIBRARIES := libdrm libdrm_freedreno -LOCAL_STATIC_LIBRARIES := libmesa_glsl +LOCAL_STATIC_LIBRARIES := libmesa_glsl libmesa_nir LOCAL_MODULE := libmesa_pipe_freedreno include $(GALLIUM_COMMON_MK) diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources index 43ffce63a25..65f08c7d8d8 100644 --- a/src/gallium/drivers/nouveau/Makefile.sources +++ b/src/gallium/drivers/nouveau/Makefile.sources @@ -150,7 +150,6 @@ NVC0_C_SOURCES := \ nvc0/gm107_texture.xml.h \ nvc0/nvc0_3d.xml.h \ nvc0/nvc0_compute.c \ - nvc0/nvc0_compute.h \ nvc0/nvc0_compute.xml.h \ nvc0/nvc0_context.c \ nvc0/nvc0_context.h \ diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index 97ebed455b6..7b0eb2f95b8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -161,6 +161,7 @@ enum operation OP_VSEL, OP_CCTL, // cache control OP_SHFL, // warp shuffle + OP_VOTE, OP_LAST }; @@ -244,6 +245,9 @@ enum operation #define NV50_IR_SUBOP_V2(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x4000) #define NV50_IR_SUBOP_V4(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x8000) #define NV50_IR_SUBOP_Vn(n) ((n) >> 14) +#define NV50_IR_SUBOP_VOTE_ALL 0 +#define NV50_IR_SUBOP_VOTE_ANY 1 +#define NV50_IR_SUBOP_VOTE_UNI 2 enum DataType { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp index a78b3f954a4..b6b3ec7b948 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp @@ -128,6 +128,8 @@ private: void emitFlow(const Instruction *); + void emitVOTE(const Instruction *); + inline void defId(const ValueDef&, const int pos); inline void srcId(const ValueRef&, const int pos); inline void srcId(const ValueRef *, const int pos); @@ -1371,6 +1373,24 @@ CodeEmitterGK110::emitFlow(const Instruction *i) } void +CodeEmitterGK110::emitVOTE(const Instruction *i) +{ + assert(i->src(0).getFile() == FILE_PREDICATE && + i->def(1).getFile() == FILE_PREDICATE); + + code[0] = 0x00000002; + code[1] = 0x86c00000 | (i->subOp << 19); + + emitPredicate(i); + + defId(i->def(0), 2); + defId(i->def(1), 48); + if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) + code[0] |= 1 << 45; + srcId(i->src(0), 42); +} + +void CodeEmitterGK110::emitAFETCH(const Instruction *i) { uint32_t offset = i->src(0).get()->reg.data.offset & 0x7ff; @@ -1597,7 +1617,13 @@ CodeEmitterGK110::emitSTORE(const Instruction *i) switch (i->src(0).getFile()) { case FILE_MEMORY_GLOBAL: code[1] = 0xe0000000; code[0] = 0x00000000; break; case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break; - case FILE_MEMORY_SHARED: code[1] = 0x7ac00000; code[0] = 0x00000002; break; + case FILE_MEMORY_SHARED: + code[0] = 0x00000002; + if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) + code[1] = 0x78400000; + else + code[1] = 0x7ac00000; + break; default: assert(!"invalid memory file"); break; @@ -1617,6 +1643,13 @@ CodeEmitterGK110::emitSTORE(const Instruction *i) code[0] |= offset << 23; code[1] |= offset >> 9; + // Unlocked store on shared memory can fail. + if (i->src(0).getFile() == FILE_MEMORY_SHARED && + i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) { + assert(i->defExists(0)); + defId(i->def(0), 32 + 16); + } + emitPredicate(i); srcId(i->src(1), 2); @@ -1635,7 +1668,13 @@ CodeEmitterGK110::emitLOAD(const Instruction *i) switch (i->src(0).getFile()) { case FILE_MEMORY_GLOBAL: code[1] = 0xc0000000; code[0] = 0x00000000; break; case FILE_MEMORY_LOCAL: code[1] = 0x7a000000; code[0] = 0x00000002; break; - case FILE_MEMORY_SHARED: code[1] = 0x7a400000; code[0] = 0x00000002; break; + case FILE_MEMORY_SHARED: + code[0] = 0x00000002; + if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) + code[1] = 0x77400000; + else + code[1] = 0x7a400000; + break; case FILE_MEMORY_CONST: if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) { emitMOV(i); @@ -1663,6 +1702,13 @@ CodeEmitterGK110::emitLOAD(const Instruction *i) code[0] |= offset << 23; code[1] |= offset >> 9; + // Locked store on shared memory can fail. + if (i->src(0).getFile() == FILE_MEMORY_SHARED && + i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) { + assert(i->defExists(1)); + defId(i->def(1), 32 + 16); + } + emitPredicate(i); defId(i->def(0), 2); @@ -2054,6 +2100,9 @@ CodeEmitterGK110::emitInstruction(Instruction *insn) case OP_CCTL: emitCCTL(insn); break; + case OP_VOTE: + emitVOTE(insn); + break; case OP_PHI: case OP_UNION: case OP_CONSTRAINT: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp index 93c40d15e46..a383c53fcd3 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp @@ -195,6 +195,8 @@ private: void emitOUT(); void emitMEMBAR(); + + void emitVOTE(); }; /******************************************************************************* @@ -2653,6 +2655,30 @@ CodeEmitterGM107::emitMEMBAR() emitField(0x08, 2, insn->subOp >> 2); } +void +CodeEmitterGM107::emitVOTE() +{ + int subOp; + + assert(insn->src(0).getFile() == FILE_PREDICATE && + insn->def(1).getFile() == FILE_PREDICATE); + + switch (insn->subOp) { + case NV50_IR_SUBOP_VOTE_ANY: subOp = 1; break; + default: + assert(insn->subOp == NV50_IR_SUBOP_VOTE_ALL); + subOp = 0; + break; + } + + emitInsn (0x50d80000); + emitField(0x30, 2, subOp); + emitGPR (0x00, insn->def(0)); + emitPRED (0x2d, insn->def(1)); + emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT)); + emitPRED (0x27, insn->src(0)); +} + /******************************************************************************* * assembler front-end ******************************************************************************/ @@ -2955,6 +2981,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i) case OP_MEMBAR: emitMEMBAR(); break; + case OP_VOTE: + emitVOTE(); + break; default: assert(!"invalid opcode"); emitNOP(); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp index 0068da5cbb7..7bd7c732c49 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp @@ -142,6 +142,8 @@ private: void emitPIXLD(const Instruction *); + void emitVOTE(const Instruction *); + inline void defId(const ValueDef&, const int pos); inline void defId(const Instruction *, int d, const int pos); inline void srcId(const ValueRef&, const int pos); @@ -2334,6 +2336,24 @@ CodeEmitterNVC0::emitPIXLD(const Instruction *i) code[1] |= 0x00e00000; } +void +CodeEmitterNVC0::emitVOTE(const Instruction *i) +{ + assert(i->src(0).getFile() == FILE_PREDICATE && + i->def(1).getFile() == FILE_PREDICATE); + + code[0] = 0x00000004 | (i->subOp << 5); + code[1] = 0x48000000; + + emitPredicate(i); + + defId(i->def(0), 14); + defId(i->def(1), 32 + 22); + if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) + code[0] |= 1 << 23; + srcId(i->src(0), 20); +} + bool CodeEmitterNVC0::emitInstruction(Instruction *insn) { @@ -2604,6 +2624,9 @@ CodeEmitterNVC0::emitInstruction(Instruction *insn) case OP_PIXLD: emitPIXLD(insn); break; + case OP_VOTE: + emitVOTE(insn); + break; case OP_PHI: case OP_UNION: case OP_CONSTRAINT: diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index 85f77047c5c..cfa85ec123c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -190,6 +190,7 @@ const char *operationStr[OP_LAST + 1] = "vsel", "cctl", "shfl", + "vote", "(invalid)" }; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp index 89d3a08937f..160e36fd7b1 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp @@ -55,6 +55,7 @@ const uint8_t Target::operationSrcNr[] = 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET, 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL 3, // SHFL + 1, // VOTE 0 }; @@ -129,6 +130,8 @@ const OpClass Target::operationClass[] = OPCLASS_VECTOR, OPCLASS_CONTROL, // SHFL OPCLASS_OTHER, + // VOTE + OPCLASS_OTHER, OPCLASS_PSEUDO // LAST }; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 8d11dd7bf21..0bd5de91d1f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -121,7 +121,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 256; case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: - return 1; /* 256 for binding as RT, but that's not possible in GL */ + return 16; /* 256 for binding as RT, but that's not possible in GL */ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: return NOUVEAU_MIN_BUFFER_MAP_ALIGN; case PIPE_CAP_MAX_VIEWPORTS: diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c index 0f1265f5db5..060f59d0c73 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c @@ -23,7 +23,8 @@ */ #include "nvc0/nvc0_context.h" -#include "nvc0/nvc0_compute.h" + +#include "nvc0/nvc0_compute.xml.h" int nvc0_screen_compute_setup(struct nvc0_screen *screen, @@ -120,34 +121,6 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen, return 0; } -bool -nvc0_compute_validate_program(struct nvc0_context *nvc0) -{ - struct nvc0_program *prog = nvc0->compprog; - - if (prog->mem) - return true; - - if (!prog->translated) { - prog->translated = nvc0_program_translate( - prog, nvc0->screen->base.device->chipset, &nvc0->base.debug); - if (!prog->translated) - return false; - } - if (unlikely(!prog->code_size)) - return false; - - if (likely(prog->code_size)) { - if (nvc0_program_upload_code(nvc0, prog)) { - struct nouveau_pushbuf *push = nvc0->base.pushbuf; - BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); - PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE); - return true; - } - } - return false; -} - static void nvc0_compute_validate_samplers(struct nvc0_context *nvc0) { @@ -274,11 +247,25 @@ nvc0_compute_validate_buffers(struct nvc0_context *nvc0) } } +void +nvc0_compute_validate_globals(struct nvc0_context *nvc0) +{ + unsigned i; + + for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *); + ++i) { + struct pipe_resource *res = *util_dynarray_element( + &nvc0->global_residents, struct pipe_resource *, i); + if (res) + nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL, + nv04_resource(res), NOUVEAU_BO_RDWR); + } +} + static bool nvc0_compute_state_validate(struct nvc0_context *nvc0) { - if (!nvc0_compute_validate_program(nvc0)) - return false; + nvc0_compprog_validate(nvc0); if (nvc0->dirty_cp & NVC0_NEW_CP_CONSTBUF) nvc0_compute_validate_constbufs(nvc0); if (nvc0->dirty_cp & NVC0_NEW_CP_DRIVERCONST) @@ -289,8 +276,10 @@ nvc0_compute_state_validate(struct nvc0_context *nvc0) nvc0_compute_validate_textures(nvc0); if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS) nvc0_compute_validate_samplers(nvc0); + if (nvc0->dirty_cp & NVC0_NEW_CP_GLOBALS) + nvc0_compute_validate_globals(nvc0); - /* TODO: surfaces, global memory buffers */ + /* TODO: surfaces */ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false); diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h deleted file mode 100644 index a23f7f39dda..00000000000 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef NVC0_COMPUTE_H -#define NVC0_COMPUTE_H - -#include "nvc0/nvc0_compute.xml.h" - -bool -nvc0_compute_validate_program(struct nvc0_context *nvc0); - -#endif /* NVC0_COMPUTE_H */ diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h index d3e3a818910..0f1ebb0a6e2 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h @@ -254,6 +254,7 @@ void nvc0_tctlprog_validate(struct nvc0_context *); void nvc0_tevlprog_validate(struct nvc0_context *); void nvc0_gmtyprog_validate(struct nvc0_context *); void nvc0_fragprog_validate(struct nvc0_context *); +void nvc0_compprog_validate(struct nvc0_context *); void nvc0_tfb_validate(struct nvc0_context *); @@ -261,8 +262,6 @@ void nvc0_tfb_validate(struct nvc0_context *); extern void nvc0_init_state_functions(struct nvc0_context *); /* nvc0_state_validate.c */ -void nvc0_validate_global_residents(struct nvc0_context *, - struct nouveau_bufctx *, int bin); bool nvc0_state_validate(struct nvc0_context *, uint32_t state_mask); /* nvc0_surface.c */ @@ -342,5 +341,6 @@ void nve4_launch_grid(struct pipe_context *, const struct pipe_grid_info *); /* nvc0_compute.c */ void nvc0_launch_grid(struct pipe_context *, const struct pipe_grid_info *); +void nvc0_compute_validate_globals(struct nvc0_context *); #endif diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c index 998e9ea47ef..37620ea8ba6 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c @@ -112,7 +112,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT: return 256; case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT: - return 1; /* 256 for binding as RT, but that's not possible in GL */ + return 16; /* 256 for binding as RT, but that's not possible in GL */ case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT: return 16; case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT: diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c index 2f46c436a4c..6b02ed5680a 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c @@ -28,6 +28,8 @@ #include "nvc0/nvc0_context.h" #include "nvc0/nvc0_query_hw.h" +#include "nvc0/nvc0_compute.xml.h" + static inline void nvc0_program_update_context_state(struct nvc0_context *nvc0, struct nvc0_program *prog, int stage) @@ -257,6 +259,19 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0) } void +nvc0_compprog_validate(struct nvc0_context *nvc0) +{ + struct nouveau_pushbuf *push = nvc0->base.pushbuf; + struct nvc0_program *cp = nvc0->compprog; + + if (cp && !nvc0_program_validate(nvc0, cp)) + return; + + BEGIN_NVC0(push, NVC0_CP(FLUSH), 1); + PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE); +} + +void nvc0_tfb_validate(struct nvc0_context *nvc0) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c index 18e79e36b85..fbf45ceca2d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c @@ -559,21 +559,6 @@ nvc0_validate_driverconst(struct nvc0_context *nvc0) nvc0->dirty_cp |= NVC0_NEW_CP_DRIVERCONST; } -void -nvc0_validate_global_residents(struct nvc0_context *nvc0, - struct nouveau_bufctx *bctx, int bin) -{ - unsigned i; - - for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *); - ++i) { - struct pipe_resource *res = *util_dynarray_element( - &nvc0->global_residents, struct pipe_resource *, i); - if (res) - nvc0_add_resident(bctx, bin, nv04_resource(res), NOUVEAU_BO_RDWR); - } -} - static void nvc0_validate_derived_1(struct nvc0_context *nvc0) { diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c index 652bc6d83d6..4a4e8367d28 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -23,7 +23,6 @@ */ #include "nvc0/nvc0_context.h" -#include "nvc0/nvc0_compute.h" #include "nvc0/nve4_compute.h" #include "codegen/nv50_ir_driver.h" @@ -306,8 +305,7 @@ nve4_compute_set_tex_handles(struct nvc0_context *nvc0) static bool nve4_compute_state_validate(struct nvc0_context *nvc0) { - if (!nvc0_compute_validate_program(nvc0)) - return false; + nvc0_compprog_validate(nvc0); if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES) nve4_compute_validate_textures(nvc0); if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS) @@ -317,8 +315,7 @@ nve4_compute_state_validate(struct nvc0_context *nvc0) if (nvc0->dirty_cp & NVC0_NEW_CP_SURFACES) nve4_compute_validate_surfaces(nvc0); if (nvc0->dirty_cp & NVC0_NEW_CP_GLOBALS) - nvc0_validate_global_residents(nvc0, - nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL); + nvc0_compute_validate_globals(nvc0); nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, false); diff --git a/src/gallium/drivers/r300/Android.mk b/src/gallium/drivers/r300/Android.mk index 7ff4f86bf0d..bb31519f000 100644 --- a/src/gallium/drivers/r300/Android.mk +++ b/src/gallium/drivers/r300/Android.mk @@ -32,7 +32,6 @@ LOCAL_SRC_FILES := $(C_SOURCES) LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ - $(MESA_TOP)/src/glsl \ $(MESA_TOP)/src/mesa diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 82ba0435118..6fa892089ec 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -156,7 +156,6 @@ static boolean r300_setup_atoms(struct r300_context* r300) boolean is_rv350 = r300->screen->caps.is_rv350; boolean is_r500 = r300->screen->caps.is_r500; boolean has_tcl = r300->screen->caps.has_tcl; - boolean drm_2_6_0 = r300->screen->info.drm_minor >= 6; /* Create the actual atom list. * @@ -175,11 +174,11 @@ static boolean r300_setup_atoms(struct r300_context* r300) R300_INIT_ATOM(gpu_flush, 9); R300_INIT_ATOM(aa_state, 4); R300_INIT_ATOM(fb_state, 0); - R300_INIT_ATOM(hyperz_state, is_r500 || (is_rv350 && drm_2_6_0) ? 10 : 8); + R300_INIT_ATOM(hyperz_state, is_r500 || is_rv350 ? 10 : 8); /* ZB (unpipelined), SC. */ R300_INIT_ATOM(ztop_state, 2); /* ZB, FG. */ - R300_INIT_ATOM(dsa_state, is_r500 ? (drm_2_6_0 ? 10 : 8) : 6); + R300_INIT_ATOM(dsa_state, is_r500 ? 10 : 6); /* RB3D. */ R300_INIT_ATOM(blend_state, 8); R300_INIT_ATOM(blend_color_state, is_r500 ? 3 : 2); @@ -353,9 +352,7 @@ static void r300_init_states(struct pipe_context *pipe) OUT_CB_REG(R300_ZB_DEPTHCLEARVALUE, 0); OUT_CB_REG(R300_SC_HYPERZ, R300_SC_HYPERZ_ADJ_2); - if (r300->screen->caps.is_r500 || - (r300->screen->caps.is_rv350 && - r300->screen->info.drm_minor >= 6)) { + if (r300->screen->caps.is_r500 || r300->screen->caps.is_rv350) { OUT_CB_REG(R300_GB_Z_PEQ_CONFIG, 0); } END_CB; diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 9eb9c1755c2..95971de0e6c 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -1434,8 +1434,7 @@ unsigned r300_get_num_cs_end_dwords(struct r300_context *r300) dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ if (r300->screen->caps.is_r500) dwords += 2; /* emit_index_bias */ - if (r300->screen->info.drm_minor >= 6) - dwords += 3; /* MSPOS */ + dwords += 3; /* MSPOS */ return dwords; } diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 46b23667a8d..7a75b43a53e 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -45,7 +45,7 @@ static void r300_flush_and_cleanup(struct r300_context *r300, unsigned flags, r500_emit_index_bias(r300, 0); /* The DDX doesn't set these regs. */ - if (r300->screen->info.drm_minor >= 6) { + { CS_LOCALS(r300); OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); OUT_CS(0x66666666); @@ -78,9 +78,7 @@ void r300_flush(struct pipe_context *pipe, { struct r300_context *r300 = r300_context(pipe); - if (r300->screen->info.drm_minor >= 12) { - flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; - } + flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; if (r300->dirty_hw) { r300_flush_and_cleanup(r300, flags, fence); diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 15a94d90721..7fad7ad6a43 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -529,7 +529,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, unsigned usage) { uint32_t retval = 0; - boolean drm_2_8_0 = r300_screen(screen)->info.drm_minor >= 8; boolean is_r500 = r300_screen(screen)->caps.is_r500; boolean is_r400 = r300_screen(screen)->caps.is_r400; boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM || @@ -545,13 +544,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, format == PIPE_FORMAT_RGTC2_SNORM || format == PIPE_FORMAT_LATC2_UNORM || format == PIPE_FORMAT_LATC2_SNORM; - boolean is_x16f_xy16f = format == PIPE_FORMAT_R16_FLOAT || - format == PIPE_FORMAT_R16G16_FLOAT || - format == PIPE_FORMAT_A16_FLOAT || - format == PIPE_FORMAT_L16_FLOAT || - format == PIPE_FORMAT_L16A16_FLOAT || - format == PIPE_FORMAT_R16A16_FLOAT || - format == PIPE_FORMAT_I16_FLOAT; boolean is_half_float = format == PIPE_FORMAT_R16_FLOAT || format == PIPE_FORMAT_R16G16_FLOAT || format == PIPE_FORMAT_R16G16B16_FLOAT || @@ -570,10 +562,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, case 2: case 4: case 6: - /* We need DRM 2.8.0. */ - if (!drm_2_8_0) { - return FALSE; - } /* No texturing and scanout. */ if (usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_DISPLAY_TARGET | @@ -613,8 +601,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, (is_r500 || !is_ati1n) && /* ATI2N is supported on r4xx-r5xx. */ (is_r400 || is_r500 || !is_ati2n) && - /* R16F and RG16F texture support was added in as late as DRM 2.8.0 */ - (drm_2_8_0 || !is_x16f_xy16f) && r300_is_sampler_format_supported(format)) { retval |= PIPE_BIND_SAMPLER_VIEW; } @@ -626,7 +612,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen, PIPE_BIND_SHARED | PIPE_BIND_BLENDABLE)) && /* 2101010 cannot be rendered to on non-r5xx. */ - (!is_color2101010 || (is_r500 && drm_2_8_0)) && + (!is_color2101010 || is_r500) && r300_is_colorbuffer_format_supported(format)) { retval |= usage & (PIPE_BIND_RENDER_TARGET | @@ -723,9 +709,6 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws) if (SCREEN_DBG_ON(r300screen, DBG_NO_HIZ)) r300screen->caps.hiz_ram = 0; - if (r300screen->info.drm_minor < 8) - r300screen->caps.has_us_format = FALSE; - r300screen->rws = rws; r300screen->screen.destroy = r300_destroy_screen; r300screen->screen.get_name = r300_get_name; diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 1d78134de6d..1063cce7f03 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -834,45 +834,6 @@ static void r300_set_stencil_ref(struct pipe_context* pipe, r300_mark_atom_dirty(r300, &r300->dsa_state); } -static void r300_tex_set_tiling_flags(struct r300_context *r300, - struct r300_resource *tex, - unsigned level) -{ - /* Check if the macrotile flag needs to be changed. - * Skip changing the flags otherwise. */ - if (tex->tex.macrotile[tex->surface_level] != - tex->tex.macrotile[level]) { - r300->rws->buffer_set_tiling(tex->buf, r300->cs, - tex->tex.microtile, tex->tex.macrotile[level], - 0, 0, 0, 0, 0, 0, 0, - tex->tex.stride_in_bytes[0], false); - - tex->surface_level = level; - } -} - -/* This switcheroo is needed just because of goddamned MACRO_SWITCH. */ -static void r300_fb_set_tiling_flags(struct r300_context *r300, - const struct pipe_framebuffer_state *state) -{ - unsigned i; - - /* Set tiling flags for new surfaces. */ - for (i = 0; i < state->nr_cbufs; i++) { - if (!state->cbufs[i]) - continue; - - r300_tex_set_tiling_flags(r300, - r300_resource(state->cbufs[i]->texture), - state->cbufs[i]->u.tex.level); - } - if (state->zsbuf) { - r300_tex_set_tiling_flags(r300, - r300_resource(state->zsbuf->texture), - state->zsbuf->u.tex.level); - } -} - static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index, const char *binding) { @@ -1017,13 +978,6 @@ r300_set_framebuffer_state(struct pipe_context* pipe, /* Re-swizzle the blend color. */ r300_set_blend_color(pipe, &((struct r300_blend_color_state*)r300->blend_color_state.state)->state); - if (r300->screen->info.drm_minor < 12) { - /* The tiling flags are dependent on the surface miplevel, unfortunately. - * This workarounds a bad design decision in old kernels which were - * rewriting tile fields in registers. */ - r300_fb_set_tiling_flags(r300, state); - } - if (unlock_zbuffer) { pipe_surface_reference(&r300->locked_zbuffer, NULL); } diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 997e5f0e383..2a1b2519ec7 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -447,24 +447,15 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */ radeon_emit(cs, reloc); - if (!ctx->keep_tiling_flags) { - radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */ - radeon_emit(cs, reloc); - } - radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */ radeon_emit(cs, reloc); } - if (ctx->keep_tiling_flags) { - for (; i < 8 ; i++) { - radeon_compute_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, - S_028C70_FORMAT(V_028C70_COLOR_INVALID)); - } - for (; i < 12; i++) { - radeon_compute_set_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C, - S_028C70_FORMAT(V_028C70_COLOR_INVALID)); - } - } + for (; i < 8 ; i++) + radeon_compute_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, + S_028C70_FORMAT(V_028C70_COLOR_INVALID)); + for (; i < 12; i++) + radeon_compute_set_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C, + S_028C70_FORMAT(V_028C70_COLOR_INVALID)); /* Set CB_TARGET_MASK XXX: Use cb_misc_state */ radeon_compute_set_context_reg(cs, R_028238_CB_TARGET_MASK, diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 61d32c06671..a12638a5bdb 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1466,15 +1466,13 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx, /* Colorbuffers. */ rctx->framebuffer.atom.num_dw += state->nr_cbufs * 23; - if (rctx->keep_tiling_flags) - rctx->framebuffer.atom.num_dw += state->nr_cbufs * 2; + rctx->framebuffer.atom.num_dw += state->nr_cbufs * 2; rctx->framebuffer.atom.num_dw += (12 - state->nr_cbufs) * 3; /* ZS buffer. */ if (state->zsbuf) { rctx->framebuffer.atom.num_dw += 24; - if (rctx->keep_tiling_flags) - rctx->framebuffer.atom.num_dw += 2; + rctx->framebuffer.atom.num_dw += 2; } else if (rctx->screen->b.info.drm_minor >= 18) { rctx->framebuffer.atom.num_dw += 4; } @@ -1656,11 +1654,6 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */ radeon_emit(cs, reloc); - if (!rctx->keep_tiling_flags) { - radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */ - radeon_emit(cs, reloc); - } - radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */ radeon_emit(cs, reloc); @@ -1674,27 +1667,12 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r if (i == 1 && state->cbufs[0]) { radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C, cb->cb_color_info | tex->cb_color_info); - - if (!rctx->keep_tiling_flags) { - unsigned reloc = radeon_add_to_buffer_list(&rctx->b, - &rctx->b.gfx, - (struct r600_resource*)state->cbufs[0]->texture, - RADEON_USAGE_READWRITE, - RADEON_PRIO_COLOR_BUFFER); - - radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */ - radeon_emit(cs, reloc); - } i++; } - if (rctx->keep_tiling_flags) { - for (; i < 8 ; i++) { - radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); - } - for (; i < 12; i++) { - radeon_set_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C, 0); - } - } + for (; i < 8 ; i++) + radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); + for (; i < 12; i++) + radeon_set_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C, 0); /* ZS buffer. */ if (state->zsbuf) { @@ -1721,11 +1699,6 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r radeon_emit(cs, zb->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ radeon_emit(cs, zb->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ - if (!rctx->keep_tiling_flags) { - radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028040_DB_Z_INFO */ - radeon_emit(cs, reloc); - } - radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028048_DB_Z_READ_BASE */ radeon_emit(cs, reloc); @@ -3680,8 +3653,7 @@ void evergreen_init_state_functions(struct r600_context *rctx) */ if (rctx->b.chip_class == EVERGREEN) { r600_init_atom(rctx, &rctx->config_state.atom, id++, evergreen_emit_config_state, 11); - if (rctx->screen->b.info.drm_minor >= 7) - rctx->config_state.dyn_gpr_enabled = true; + rctx->config_state.dyn_gpr_enabled = true; } r600_init_atom(rctx, &rctx->framebuffer.atom, id++, evergreen_emit_framebuffer_state, 0); /* shader const */ @@ -3933,7 +3905,7 @@ bool evergreen_adjust_gprs(struct r600_context *rctx) max_gprs += def_num_clause_temp_gprs * 2; /* if we have no TESS and dyn gpr is enabled then do nothing. */ - if (!rctx->hw_shader_stages[EG_HW_STAGE_HS].shader || rctx->screen->b.info.drm_minor < 7) { + if (!rctx->hw_shader_stages[EG_HW_STAGE_HS].shader) { if (rctx->config_state.dyn_gpr_enabled) return true; diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 90b99e8a275..4951297df42 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -270,9 +270,7 @@ void r600_context_gfx_flush(void *context, unsigned flags, } /* force to keep tiling flags */ - if (ctx->keep_tiling_flags) { - flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; - } + flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; /* Flush the CS. */ ctx->b.ws->cs_flush(cs, flags, fence, ctx->screen->b.cs_count++); diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h index 27fc1e8e8be..b3f49bda88e 100644 --- a/src/gallium/drivers/r600/r600_isa.h +++ b/src/gallium/drivers/r600/r600_isa.h @@ -102,13 +102,13 @@ enum alu_op_flags /* condition codes - 3 bits */ AF_CC_SHIFT = 29, - AF_CC_MASK = (7 << AF_CC_SHIFT), - AF_CC_E = (0 << AF_CC_SHIFT), - AF_CC_GT = (1 << AF_CC_SHIFT), - AF_CC_GE = (2 << AF_CC_SHIFT), - AF_CC_NE = (3 << AF_CC_SHIFT), - AF_CC_LT = (4 << AF_CC_SHIFT), - AF_CC_LE = (5 << AF_CC_SHIFT), + AF_CC_MASK = (7U << AF_CC_SHIFT), + AF_CC_E = (0U << AF_CC_SHIFT), + AF_CC_GT = (1U << AF_CC_SHIFT), + AF_CC_GE = (2U << AF_CC_SHIFT), + AF_CC_NE = (3U << AF_CC_SHIFT), + AF_CC_LT = (4U << AF_CC_SHIFT), + AF_CC_LE = (5U << AF_CC_SHIFT), }; /* flags for FETCH instructions (TEX/VTX) */ @@ -165,7 +165,7 @@ struct alu_op_info * (0 if instruction doesn't exist for chip class) */ int slots[4]; /* flags (mostly autogenerated from instruction name) */ - int flags; + unsigned int flags; }; /* FETCH instruction info */ diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index 7fb4108a188..5a6ce71414c 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -136,7 +136,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, goto fail; rctx->screen = rscreen; - rctx->keep_tiling_flags = rscreen->b.info.drm_minor >= 12; r600_init_blit_functions(rctx); @@ -409,7 +408,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) return 12; case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: /* textures support 8192, but layered rendering supports 2048 */ - return rscreen->b.info.drm_minor >= 9 ? 2048 : 0; + return 2048; /* Render targets. */ case PIPE_CAP_MAX_RENDER_TARGETS: diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 0e4dd16525b..f8a20398355 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -442,7 +442,6 @@ struct r600_context { /* Hardware info. */ boolean has_vertex_cache; - boolean keep_tiling_flags; unsigned default_gprs[EG_NUM_HW_STAGES]; unsigned current_gprs[EG_NUM_HW_STAGES]; unsigned r6xx_num_clause_temp_gprs; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index c3346f29811..aa3a085c6d2 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -2235,7 +2235,6 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen, uint32_t result = 0, word4 = 0, yuv_format = 0; const struct util_format_description *desc; boolean uniform = TRUE; - bool enable_s3tc = rscreen->b.info.drm_minor >= 9; bool is_srgb_valid = FALSE; const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; @@ -2330,9 +2329,6 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen, } if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { - if (!enable_s3tc) - goto out_unknown; - switch (format) { case PIPE_FORMAT_RGTC1_SNORM: case PIPE_FORMAT_LATC1_SNORM: @@ -2354,10 +2350,6 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen, } if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { - - if (!enable_s3tc) - goto out_unknown; - if (!util_format_s3tc_enabled) { goto out_unknown; } @@ -2386,9 +2378,6 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen, } if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { - if (!enable_s3tc) - goto out_unknown; - if (rscreen->b.chip_class < EVERGREEN) goto out_unknown; @@ -2721,6 +2710,13 @@ uint32_t r600_colorformat_endian_swap(uint32_t colorformat) /* 32-bit buffers. */ case V_0280A0_COLOR_8_8_8_8: + /* + * No need to do endian swaps on four 8-bits components, + * as mesa<-->pipe formats conversion take into account + * the endianess + */ + return ENDIAN_NONE; + case V_0280A0_COLOR_2_10_10_10: case V_0280A0_COLOR_8_24: case V_0280A0_COLOR_24_8: diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 7df617737a7..b77b1321d73 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -659,7 +659,7 @@ r600_get_sampler_view_priority(struct r600_resource *res) } while (0); #define R600_ERR(fmt, args...) \ - fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args) + fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args) /* For MSAA sample positions. */ #define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \ diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index 1df0c300e85..0b31d0a1f01 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -1293,25 +1293,14 @@ unsigned r600_translate_colorswap(enum pipe_format format) break; case 4: /* check the middle channels, the 1st and 4th channel can be NONE */ -#ifdef PIPE_ARCH_LITTLE_ENDIAN if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) return V_0280A0_SWAP_STD; /* XYZW */ else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) return V_0280A0_SWAP_STD_REV; /* WZYX */ else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X)) return V_0280A0_SWAP_ALT; /* ZYXW */ - else if (HAS_SWIZZLE(1,X) && HAS_SWIZZLE(2,Y)) - return V_0280A0_SWAP_ALT_REV; /* WXYZ */ -#else - if (HAS_SWIZZLE(1,W) && HAS_SWIZZLE(2,X)) - return V_0280A0_SWAP_STD; /* ZWXY */ - else if (HAS_SWIZZLE(1,X) && HAS_SWIZZLE(2,W)) - return V_0280A0_SWAP_STD_REV; /* YXWZ */ - else if (HAS_SWIZZLE(1,W) && HAS_SWIZZLE(2,Z)) - return V_0280A0_SWAP_ALT; /* XWZY */ else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) return V_0280A0_SWAP_ALT_REV; /* YZWX */ -#endif break; } return ~0U; @@ -1419,6 +1408,11 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, { int i; + /* This function is broken in BE, so just disable this path for now */ +#ifdef PIPE_ARCH_BIG_ENDIAN + return; +#endif + if (rctx->render_cond) return; diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 7329ceedf04..1160d235062 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -26,24 +26,10 @@ /* The public winsys interface header for the radeon driver. */ -/* R300 features in DRM. - * - * 2.6.0: - * - Hyper-Z - * - GB_Z_PEQ_CONFIG on rv350->r4xx - * - R500 FG_ALPHA_VALUE - * - * 2.8.0: - * - R500 US_FORMAT regs - * - R500 ARGB2101010 colorbuffer - * - CMask and AA regs - * - R16F/RG16F - */ - #include "pipebuffer/pb_buffer.h" #define RADEON_FLUSH_ASYNC (1 << 0) -#define RADEON_FLUSH_KEEP_TILING_FLAGS (1 << 1) /* needs DRM 2.12.0 */ +#define RADEON_FLUSH_KEEP_TILING_FLAGS (1 << 1) #define RADEON_FLUSH_END_OF_FRAME (1 << 2) /* Tiling flags. */ diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 9f5f4c682bc..1ec695ea0df 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -115,7 +115,7 @@ static void *si_create_compute_state( si_shader_binary_read_config(&program->shader.binary, &program->shader.config, 0); si_shader_dump(sctx->screen, &program->shader, &sctx->b.debug, - TGSI_PROCESSOR_COMPUTE); + TGSI_PROCESSOR_COMPUTE, stderr); si_shader_binary_upload(sctx->screen, &program->shader); program->input_buffer = si_resource_create_custom(sctx->b.b.screen, diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index e16ebbdef3e..eb0cabb9f2f 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -34,15 +34,15 @@ DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL) -static void si_dump_shader(struct si_shader_ctx_state *state, const char *name, - FILE *f) +static void si_dump_shader(struct si_screen *sscreen, + struct si_shader_ctx_state *state, FILE *f) { if (!state->cso || !state->current) return; - fprintf(f, "%s shader disassembly:\n", name); si_dump_shader_key(state->cso->type, &state->current->key, f); - fprintf(f, "%s\n\n", state->current->binary.disasm_string); + si_shader_dump(sscreen, state->current, NULL, + state->cso->info.processor, f); } /** @@ -670,11 +670,11 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, si_dump_debug_registers(sctx, f); si_dump_framebuffer(sctx, f); - si_dump_shader(&sctx->vs_shader, "Vertex", f); - si_dump_shader(&sctx->tcs_shader, "Tessellation control", f); - si_dump_shader(&sctx->tes_shader, "Tessellation evaluation", f); - si_dump_shader(&sctx->gs_shader, "Geometry", f); - si_dump_shader(&sctx->ps_shader, "Fragment", f); + si_dump_shader(sctx->screen, &sctx->vs_shader, f); + si_dump_shader(sctx->screen, &sctx->tcs_shader, f); + si_dump_shader(sctx->screen, &sctx->tes_shader, f); + si_dump_shader(sctx->screen, &sctx->gs_shader, f); + si_dump_shader(sctx->screen, &sctx->ps_shader, f); si_dump_last_bo_list(sctx, f); si_dump_last_ib(sctx, f); @@ -781,8 +781,7 @@ void si_check_vm_faults(struct si_context *sctx) fprintf(f, "Device name: %s\n\n", screen->get_name(screen)); fprintf(f, "Failing VM page: 0x%08x\n\n", addr); - si_dump_last_bo_list(sctx, f); - si_dump_last_ib(sctx, f); + si_dump_debug_state(&sctx->b.b, f, 0); fclose(f); fprintf(stderr, "Detected a VM fault, exiting...\n"); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 57458ae1381..8c1151aa493 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -4406,14 +4406,14 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader) static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary, struct pipe_debug_callback *debug, - const char *name) + const char *name, FILE *file) { char *line, *p; unsigned i, count; if (binary->disasm_string) { - fprintf(stderr, "Shader %s disassembly:\n", name); - fprintf(stderr, "%s", binary->disasm_string); + fprintf(file, "Shader %s disassembly:\n", name); + fprintf(file, "%s", binary->disasm_string); if (debug && debug->debug_message) { /* Very long debug messages are cut off, so send the @@ -4443,9 +4443,9 @@ static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary "Shader Disassembly End"); } } else { - fprintf(stderr, "Shader %s binary:\n", name); + fprintf(file, "Shader %s binary:\n", name); for (i = 0; i < binary->code_size; i += 4) { - fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, + fprintf(file, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3], binary->code[i + 2], binary->code[i + 1], binary->code[i]); } @@ -4457,7 +4457,8 @@ static void si_shader_dump_stats(struct si_screen *sscreen, unsigned num_inputs, unsigned code_size, struct pipe_debug_callback *debug, - unsigned processor) + unsigned processor, + FILE *file) { unsigned lds_increment = sscreen->b.chip_class >= CIK ? 512 : 256; unsigned lds_per_wave = 0; @@ -4493,15 +4494,16 @@ static void si_shader_dump_stats(struct si_screen *sscreen, if (lds_per_wave) max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave); - if (r600_can_dump_shader(&sscreen->b, processor)) { + if (file != stderr || + r600_can_dump_shader(&sscreen->b, processor)) { if (processor == TGSI_PROCESSOR_FRAGMENT) { - fprintf(stderr, "*** SHADER CONFIG ***\n" + fprintf(file, "*** SHADER CONFIG ***\n" "SPI_PS_INPUT_ADDR = 0x%04x\n" "SPI_PS_INPUT_ENA = 0x%04x\n", conf->spi_ps_input_addr, conf->spi_ps_input_ena); } - fprintf(stderr, "*** SHADER STATS ***\n" + fprintf(file, "*** SHADER STATS ***\n" "SGPRS: %d\n" "VGPRS: %d\n" "Code Size: %d bytes\n" @@ -4555,27 +4557,30 @@ static const char *si_get_shader_name(struct si_shader *shader, } void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, - struct pipe_debug_callback *debug, unsigned processor) + struct pipe_debug_callback *debug, unsigned processor, + FILE *file) { - if (r600_can_dump_shader(&sscreen->b, processor) && - !(sscreen->b.debug_flags & DBG_NO_ASM)) { - fprintf(stderr, "\n%s:\n", si_get_shader_name(shader, processor)); + if (file != stderr || + (r600_can_dump_shader(&sscreen->b, processor) && + !(sscreen->b.debug_flags & DBG_NO_ASM))) { + fprintf(file, "\n%s:\n", si_get_shader_name(shader, processor)); if (shader->prolog) si_shader_dump_disassembly(&shader->prolog->binary, - debug, "prolog"); + debug, "prolog", file); - si_shader_dump_disassembly(&shader->binary, debug, "main"); + si_shader_dump_disassembly(&shader->binary, debug, "main", file); if (shader->epilog) si_shader_dump_disassembly(&shader->epilog->binary, - debug, "epilog"); - fprintf(stderr, "\n"); + debug, "epilog", file); + fprintf(file, "\n"); } si_shader_dump_stats(sscreen, &shader->config, shader->selector ? shader->selector->info.num_inputs : 0, - si_get_shader_binary_size(shader), debug, processor); + si_get_shader_binary_size(shader), debug, processor, + file); } int si_compile_llvm(struct si_screen *sscreen, @@ -4723,7 +4728,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen, if (r600_can_dump_shader(&sscreen->b, TGSI_PROCESSOR_GEOMETRY)) fprintf(stderr, "GS Copy Shader:\n"); si_shader_dump(sscreen, ctx->shader, debug, - TGSI_PROCESSOR_GEOMETRY); + TGSI_PROCESSOR_GEOMETRY, stderr); r = si_shader_binary_upload(sscreen, ctx->shader); } @@ -5971,7 +5976,8 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, } } - si_shader_dump(sscreen, shader, debug, shader->selector->info.processor); + si_shader_dump(sscreen, shader, debug, shader->selector->info.processor, + stderr); /* Upload. */ r = si_shader_binary_upload(sscreen, shader); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index ff5c24d8918..de23e642fe4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -365,6 +365,7 @@ struct si_shader { struct r600_resource *scratch_bo; union si_shader_key key; bool is_binary_shared; + unsigned z_order; /* The following data is all that's needed for binary shaders. */ struct radeon_shader_binary binary; @@ -433,7 +434,8 @@ void si_shader_destroy(struct si_shader *shader); unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index); int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader); void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, - struct pipe_debug_callback *debug, unsigned processor); + struct pipe_debug_callback *debug, unsigned processor, + FILE *f); void si_shader_apply_scratch_relocs(struct si_context *sctx, struct si_shader *shader, uint64_t scratch_va); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 2dfdbeb8d8f..b23b17ad77b 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1339,10 +1339,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s sctx->ps_db_shader_control; /* Bug workaround for smoothing (overrasterization) on SI. */ - if (sctx->b.chip_class == SI && sctx->smoothing_enabled) + if (sctx->b.chip_class == SI && sctx->smoothing_enabled) { + db_shader_control &= C_02880C_Z_ORDER; db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); - else - db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); + } /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable)) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index a6753a7a528..321b87d80a6 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -789,6 +789,17 @@ static void si_shader_ps(struct si_shader *shader) S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) | S_00B02C_USER_SGPR(num_user_sgprs) | S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); + + /* Prefer RE_Z if the shader is complex enough. The requirement is either: + * - the shader uses at least 2 VMEM instructions, or + * - the code size is at least 50 2-dword instructions or 100 1-dword + * instructions. + */ + if (info->num_memory_instructions >= 2 || + shader->binary.code_size > 100*4) + shader->z_order = V_02880C_EARLY_Z_THEN_RE_Z; + else + shader->z_order = V_02880C_EARLY_Z_THEN_LATE_Z; } static void si_shader_init_pm4_state(struct si_shader *shader) @@ -1985,15 +1996,18 @@ bool si_update_shaders(struct si_context *sctx) si_update_vgt_shader_config(sctx); if (sctx->ps_shader.cso) { - unsigned db_shader_control = - sctx->ps_shader.cso->db_shader_control | - S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS); + unsigned db_shader_control; r = si_shader_select(ctx, &sctx->ps_shader); if (r) return false; si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4); + db_shader_control = + sctx->ps_shader.cso->db_shader_control | + S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS) | + S_02880C_Z_ORDER(sctx->ps_shader.current->z_order); + if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) || sctx->sprite_coord_enable != rs->sprite_coord_enable || sctx->flatshade != rs->flatshade) { diff --git a/src/gallium/drivers/vc4/Android.mk b/src/gallium/drivers/vc4/Android.mk index f42a152aa8c..2efb4722636 100644 --- a/src/gallium/drivers/vc4/Android.mk +++ b/src/gallium/drivers/vc4/Android.mk @@ -28,9 +28,11 @@ include $(CLEAR_VARS) LOCAL_SRC_FILES := \ $(C_SOURCES) +LOCAL_GENERATED_SOURCES := $(MESA_GEN_NIR_H) + LOCAL_SHARED_LIBRARIES := libdrm -# We need libmesa_glsl to get NIR's generated include directories. -LOCAL_STATIC_LIBRARIES := libmesa_glsl +# We need libmesa_nir to get NIR's generated include directories. +LOCAL_STATIC_LIBRARIES := libmesa_nir LOCAL_MODULE := libmesa_pipe_vc4 include $(GALLIUM_COMMON_MK) diff --git a/src/gallium/drivers/virgl/virgl_encode.c b/src/gallium/drivers/virgl/virgl_encode.c index 22fb5292819..1a1c40bac78 100644 --- a/src/gallium/drivers/virgl/virgl_encode.c +++ b/src/gallium/drivers/virgl/virgl_encode.c @@ -741,7 +741,9 @@ int virgl_encode_blit(struct virgl_context *ctx, virgl_encoder_write_cmd_dword(ctx, VIRGL_CMD0(VIRGL_CCMD_BLIT, 0, VIRGL_CMD_BLIT_SIZE)); tmp = VIRGL_CMD_BLIT_S0_MASK(blit->mask) | VIRGL_CMD_BLIT_S0_FILTER(blit->filter) | - VIRGL_CMD_BLIT_S0_SCISSOR_ENABLE(blit->scissor_enable); + VIRGL_CMD_BLIT_S0_SCISSOR_ENABLE(blit->scissor_enable) | + VIRGL_CMD_BLIT_S0_RENDER_CONDITION_ENABLE(blit->render_condition_enable) | + VIRGL_CMD_BLIT_S0_ALPHA_BLEND(blit->alpha_blend); virgl_encoder_write_dword(ctx->cbuf, tmp); virgl_encoder_write_dword(ctx->cbuf, (blit->scissor.minx | blit->scissor.miny << 16)); virgl_encoder_write_dword(ctx->cbuf, (blit->scissor.maxx | blit->scissor.maxy << 16)); diff --git a/src/gallium/drivers/virgl/virgl_protocol.h b/src/gallium/drivers/virgl/virgl_protocol.h index ca3142f5f72..a2f1e818309 100644 --- a/src/gallium/drivers/virgl/virgl_protocol.h +++ b/src/gallium/drivers/virgl/virgl_protocol.h @@ -388,6 +388,8 @@ enum virgl_context_cmd { #define VIRGL_CMD_BLIT_S0_MASK(x) (((x) & 0xff) << 0) #define VIRGL_CMD_BLIT_S0_FILTER(x) (((x) & 0x3) << 8) #define VIRGL_CMD_BLIT_S0_SCISSOR_ENABLE(x) (((x) & 0x1) << 10) +#define VIRGL_CMD_BLIT_S0_RENDER_CONDITION_ENABLE(x) (((x) & 0x1) << 11) +#define VIRGL_CMD_BLIT_S0_ALPHA_BLEND(x) (((x) & 0x1) << 12) #define VIRGL_CMD_BLIT_SCISSOR_MINX_MINY 2 #define VIRGL_CMD_BLIT_SCISSOR_MAXX_MAXY 3 #define VIRGL_CMD_BLIT_DST_RES_HANDLE 4 diff --git a/src/gallium/drivers/virgl/virgl_screen.c b/src/gallium/drivers/virgl/virgl_screen.c index 18263e91e6a..d3f4e259cad 100644 --- a/src/gallium/drivers/virgl/virgl_screen.c +++ b/src/gallium/drivers/virgl/virgl_screen.c @@ -232,6 +232,9 @@ virgl_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_GENERATE_MIPMAP: case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: case PIPE_CAP_QUERY_BUFFER_OBJECT: + case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: + case PIPE_CAP_STRING_MARKER: + case PIPE_CAP_QUERY_MEMORY_INFO: return 0; case PIPE_CAP_VENDOR_ID: return 0x1af4; diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index d9c9f9b5cc2..ab18523a80c 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -432,7 +432,6 @@ enum pipe_format { #elif defined(PIPE_ARCH_BIG_ENDIAN) #define PIPE_FORMAT_ABGR8888_UNORM PIPE_FORMAT_R8G8B8A8_UNORM #define PIPE_FORMAT_XBGR8888_UNORM PIPE_FORMAT_R8G8B8X8_UNORM -#define PIPE_FORMAT_XRGB8888_UNORM PIPE_FORMAT_B8G8R8X8_UNORM #define PIPE_FORMAT_ARGB8888_UNORM PIPE_FORMAT_B8G8R8A8_UNORM #define PIPE_FORMAT_XRGB8888_UNORM PIPE_FORMAT_B8G8R8X8_UNORM #define PIPE_FORMAT_BGRA8888_UNORM PIPE_FORMAT_A8R8G8B8_UNORM diff --git a/src/gallium/state_trackers/clover/Makefile.am b/src/gallium/state_trackers/clover/Makefile.am index 3c9421692fc..4c9d7d9b7be 100644 --- a/src/gallium/state_trackers/clover/Makefile.am +++ b/src/gallium/state_trackers/clover/Makefile.am @@ -41,6 +41,7 @@ libclllvm_la_CXXFLAGS = \ $(VISIBILITY_CXXFLAGS) \ $(LLVM_CXXFLAGS) \ $(DEFINES) \ + $(LIBELF_CFLAGS) \ -DLIBCLC_INCLUDEDIR=\"$(LIBCLC_INCLUDEDIR)/\" \ -DLIBCLC_LIBEXECDIR=\"$(LIBCLC_LIBEXECDIR)/\" \ -DCLANG_RESOURCE_DIR=\"$(CLANG_RESOURCE_DIR)\" diff --git a/src/gallium/state_trackers/nine/Makefile.sources b/src/gallium/state_trackers/nine/Makefile.sources index 05eb1655376..af80532ce37 100644 --- a/src/gallium/state_trackers/nine/Makefile.sources +++ b/src/gallium/state_trackers/nine/Makefile.sources @@ -35,6 +35,7 @@ C_SOURCES := \ nine_flags.h \ nine_helpers.c \ nine_helpers.h \ + nine_limits.h \ nine_lock.c \ nine_lock.h \ nine_pdata.h \ diff --git a/src/gallium/state_trackers/nine/buffer9.c b/src/gallium/state_trackers/nine/buffer9.c index e066fc59f45..1103741f7ee 100644 --- a/src/gallium/state_trackers/nine/buffer9.c +++ b/src/gallium/state_trackers/nine/buffer9.c @@ -174,13 +174,18 @@ NineBuffer9_Lock( struct NineBuffer9 *This, u_box_1d(OffsetToLock, SizeToLock, &box); if (This->base.pool == D3DPOOL_MANAGED) { - if (!This->managed.dirty) { - assert(LIST_IS_EMPTY(&This->managed.list)); - list_add(&This->managed.list, &This->base.base.device->update_buffers); - This->managed.dirty = TRUE; - This->managed.dirty_box = box; - } else { - u_box_union_2d(&This->managed.dirty_box, &This->managed.dirty_box, &box); + /* READONLY doesn't dirty the buffer */ + if (!(Flags & D3DLOCK_READONLY)) { + if (!This->managed.dirty) { + assert(LIST_IS_EMPTY(&This->managed.list)); + This->managed.dirty = TRUE; + This->managed.dirty_box = box; + } else { + u_box_union_2d(&This->managed.dirty_box, &This->managed.dirty_box, &box); + /* Do not upload while we are locking, we'll add it back later */ + if (!LIST_IS_EMPTY(&This->managed.list)) + list_delinit(&This->managed.list); + } } *ppbData = (char *)This->managed.data + OffsetToLock; DBG("returning pointer %p\n", *ppbData); @@ -229,8 +234,13 @@ NineBuffer9_Unlock( struct NineBuffer9 *This ) user_assert(This->nmaps > 0, D3DERR_INVALIDCALL); if (This->base.pool != D3DPOOL_MANAGED) This->pipe->transfer_unmap(This->pipe, This->maps[--(This->nmaps)]); - else + else { This->nmaps--; + /* TODO: Fix this to upload at the first draw call needing the data, + * instead of at the next draw call */ + if (!This->nmaps && This->managed.dirty && LIST_IS_EMPTY(&This->managed.list)) + list_add(&This->managed.list, &This->base.base.device->update_buffers); + } return D3D_OK; } diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index a7a7da27903..eb6479bb4d9 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -830,6 +830,18 @@ nine_ureg_dst_register(unsigned file, int index) return ureg_dst(ureg_src_register(file, index)); } +static inline struct ureg_src +nine_get_position_input(struct shader_translator *tx) +{ + struct ureg_program *ureg = tx->ureg; + + if (tx->wpos_is_sysval) + return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0); + else + return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, + 0, TGSI_INTERPOLATE_LINEAR); +} + static struct ureg_src tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param) { @@ -955,16 +967,8 @@ tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param) case D3DSPR_MISCTYPE: switch (param->idx) { case D3DSMO_POSITION: - if (ureg_src_is_undef(tx->regs.vPos)) { - if (tx->wpos_is_sysval) { - tx->regs.vPos = - ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0); - } else { - tx->regs.vPos = - ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0, - TGSI_INTERPOLATE_LINEAR); - } - } + if (ureg_src_is_undef(tx->regs.vPos)) + tx->regs.vPos = nine_get_position_input(tx); if (tx->shift_wpos) { /* TODO: do this only once */ struct ureg_dst wpos = tx_scratch(tx); @@ -2048,9 +2052,16 @@ DECL_SPECIAL(DCL) unsigned interp_location = 0; /* SM3 only, SM2 input semantic determined by file */ assert(sem.reg.idx < Elements(tx->regs.v)); + + if (tgsi.Name == TGSI_SEMANTIC_POSITION) { + tx->regs.v[sem.reg.idx] = nine_get_position_input(tx); + return D3D_OK; + } + if (sem.reg.mod & NINED3DSPDM_CENTROID || (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid)) interp_location = TGSI_INTERPOLATE_LOC_CENTROID; + tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid( ureg, tgsi.Name, tgsi.Index, nine_tgsi_to_interp_mode(&tgsi), @@ -3269,12 +3280,7 @@ shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col) } if (tx->info->fog_mode != D3DFOG_NONE) { - if (tx->wpos_is_sysval) { - depth = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0); - } else { - depth = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0, - TGSI_INTERPOLATE_LINEAR); - } + depth = nine_get_position_input(tx); depth = ureg_scalar(depth, TGSI_SWIZZLE_Z); } diff --git a/src/gallium/targets/dri/Android.mk b/src/gallium/targets/dri/Android.mk index 2a7738e6979..7da0e8aabbb 100644 --- a/src/gallium/targets/dri/Android.mk +++ b/src/gallium/targets/dri/Android.mk @@ -28,9 +28,10 @@ include $(CLEAR_VARS) LOCAL_MODULE := gallium_dri ifeq ($(MESA_LOLLIPOP_BUILD),true) -LOCAL_MODULE_RELATIVE_PATH := $(notdir $(MESA_DRI_MODULE_PATH)) +LOCAL_MODULE_RELATIVE_PATH := $(MESA_DRI_MODULE_REL_PATH) else LOCAL_MODULE_PATH := $(MESA_DRI_MODULE_PATH) +LOCAL_UNSTRIPPED_PATH := $(MESA_DRI_MODULE_UNSTRIPPED_PATH) endif LOCAL_SRC_FILES := target.c @@ -109,6 +110,8 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \ libmesa_st_dri \ libmesa_st_mesa \ libmesa_glsl \ + libmesa_compiler \ + libmesa_nir \ libmesa_dri_common \ libmesa_megadriver_stub \ libmesa_gallium \ diff --git a/src/gallium/targets/opencl/Makefile.am b/src/gallium/targets/opencl/Makefile.am index f3ba1e32d1b..99532f06d81 100644 --- a/src/gallium/targets/opencl/Makefile.am +++ b/src/gallium/targets/opencl/Makefile.am @@ -2,9 +2,6 @@ include $(top_srcdir)/src/gallium/Automake.inc lib_LTLIBRARIES = lib@[email protected] -AM_CPPFLAGS = \ - $(LIBELF_CFLAGS) - lib@OPENCL_LIBNAME@_la_LDFLAGS = \ $(LLVM_LDFLAGS) \ -no-undefined \ diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 8151c447065..1accc6a1863 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -174,9 +174,9 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) /* Get DRM version. */ version = drmGetVersion(ws->fd); if (version->version_major != 2 || - version->version_minor < 3) { + version->version_minor < 12) { fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " - "only compatible with 2.3.x (kernel 2.6.34) or later.\n", + "only compatible with 2.12.0 (kernel 3.2) or later.\n", __FUNCTION__, version->version_major, version->version_minor, @@ -374,8 +374,7 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) else if (ws->gen >= DRV_R600) { uint32_t tiling_config = 0; - if (ws->info.drm_minor >= 9 && - !radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS, + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS, "num backends", &ws->info.num_render_backends)) return FALSE; @@ -401,27 +400,20 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) ws->info.pipe_interleave_bytes = ws->info.chip_class >= EVERGREEN ? 512 : 256; - if (ws->info.drm_minor >= 11) { - radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL, - &ws->info.num_tile_pipes); - - /* "num_tiles_pipes" must be equal to the number of pipes (Px) in the - * pipe config field of the GB_TILE_MODE array. Only one card (Tahiti) - * reports a different value (12). Fix it by setting what's in the - * GB_TILE_MODE array (8). - */ - if (ws->gen == DRV_SI && ws->info.num_tile_pipes == 12) - ws->info.num_tile_pipes = 8; - - if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL, - &ws->info.r600_gb_backend_map)) - ws->info.r600_gb_backend_map_valid = TRUE; - } else { - ws->info.num_tile_pipes = - ws->info.chip_class >= EVERGREEN ? - 1 << (tiling_config & 0xf) : - 1 << ((tiling_config & 0xe) >> 1); - } + radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL, + &ws->info.num_tile_pipes); + + /* "num_tiles_pipes" must be equal to the number of pipes (Px) in the + * pipe config field of the GB_TILE_MODE array. Only one card (Tahiti) + * reports a different value (12). Fix it by setting what's in the + * GB_TILE_MODE array (8). + */ + if (ws->gen == DRV_SI && ws->info.num_tile_pipes == 12) + ws->info.num_tile_pipes = 8; + + if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL, + &ws->info.r600_gb_backend_map)) + ws->info.r600_gb_backend_map_valid = TRUE; ws->info.has_virtual_memory = FALSE; if (ws->info.drm_minor >= 13) { diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 3dab205e5cc..647f2eb96b0 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -396,13 +396,8 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, prog_data->binding_table.image_start = bias; /* Finish the optimization and compilation process */ - if (nir->stage != MESA_SHADER_VERTEX && - nir->stage != MESA_SHADER_TESS_CTRL && - nir->stage != MESA_SHADER_TESS_EVAL && - nir->stage != MESA_SHADER_FRAGMENT) { - nir = brw_nir_lower_io(nir, &pipeline->device->info, - compiler->scalar_stage[stage], false, NULL); - } + if (nir->stage == MESA_SHADER_COMPUTE) + brw_nir_lower_shared(nir); /* nir_lower_io will only handle the push constants; we need to set this * to the full number of possible uniforms. diff --git a/src/mesa/Android.libmesa_dricore.mk b/src/mesa/Android.libmesa_dricore.mk index ba952da4ad4..a3e6c6d55ae 100644 --- a/src/mesa/Android.libmesa_dricore.mk +++ b/src/mesa/Android.libmesa_dricore.mk @@ -59,7 +59,6 @@ endif LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/mesa/main \ - $(MESA_TOP)/src/glsl \ $(MESA_TOP)/src/compiler/nir \ $(MESA_TOP)/src/gallium/include \ $(MESA_TOP)/src/gallium/auxiliary diff --git a/src/mesa/Android.libmesa_glsl_utils.mk b/src/mesa/Android.libmesa_glsl_utils.mk index 894ae20a239..5a80f221c8f 100644 --- a/src/mesa/Android.libmesa_glsl_utils.mk +++ b/src/mesa/Android.libmesa_glsl_utils.mk @@ -36,7 +36,6 @@ include $(CLEAR_VARS) LOCAL_MODULE := libmesa_glsl_utils LOCAL_C_INCLUDES := \ - $(MESA_TOP)/src/glsl \ $(MESA_TOP)/src/compiler/nir \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/gallium/include \ @@ -62,7 +61,6 @@ LOCAL_IS_HOST_MODULE := true LOCAL_CFLAGS := -D_POSIX_C_SOURCE=199309L LOCAL_C_INCLUDES := \ - $(MESA_TOP)/src/glsl \ $(MESA_TOP)/src/compiler/nir \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/gallium/include \ diff --git a/src/mesa/Android.libmesa_st_mesa.mk b/src/mesa/Android.libmesa_st_mesa.mk index ee8887b73ec..9fd9460a5ba 100644 --- a/src/mesa/Android.libmesa_st_mesa.mk +++ b/src/mesa/Android.libmesa_st_mesa.mk @@ -54,7 +54,6 @@ endif LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/mesa/main \ - $(MESA_TOP)/src/glsl \ $(MESA_TOP)/src/compiler/nir \ $(MESA_TOP)/src/gallium/auxiliary \ $(MESA_TOP)/src/gallium/include diff --git a/src/mesa/Android.mesa_gen_matypes.mk b/src/mesa/Android.mesa_gen_matypes.mk index 6e301f90416..4fcf73a631d 100644 --- a/src/mesa/Android.mesa_gen_matypes.mk +++ b/src/mesa/Android.mesa_gen_matypes.mk @@ -35,8 +35,7 @@ LOCAL_MODULE := mesa_gen_matypes LOCAL_IS_HOST_MODULE := true LOCAL_C_INCLUDES := \ - $(MESA_TOP)/src/mapi \ - $(MESA_TOP)/src/glsl + $(MESA_TOP)/src/mapi LOCAL_SRC_FILES := \ x86/gen_matypes.c diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 329e48f46f5..ab78f4565da 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -48,6 +48,7 @@ #include "main/feedback.h" #include "main/formats.h" #include "main/format_unpack.h" +#include "main/framebuffer.h" #include "main/glformats.h" #include "main/image.h" #include "main/macros.h" @@ -104,42 +105,20 @@ static void meta_drawpix_cleanup(struct gl_context *ctx, struct drawpix_state *drawpix); void -_mesa_meta_bind_fbo_image(GLenum fboTarget, GLenum attachment, - struct gl_texture_image *texImage, GLuint layer) +_mesa_meta_framebuffer_texture_image(struct gl_context *ctx, + struct gl_framebuffer *fb, + GLenum attachment, + struct gl_texture_image *texImage, + GLuint layer) { struct gl_texture_object *texObj = texImage->TexObject; int level = texImage->Level; - GLenum texTarget = texObj->Target; + const GLenum texTarget = texObj->Target == GL_TEXTURE_CUBE_MAP + ? GL_TEXTURE_CUBE_MAP_POSITIVE_X + texImage->Face + : texObj->Target; - switch (texTarget) { - case GL_TEXTURE_1D: - _mesa_FramebufferTexture1D(fboTarget, - attachment, - texTarget, - texObj->Name, - level); - break; - case GL_TEXTURE_1D_ARRAY: - case GL_TEXTURE_2D_ARRAY: - case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: - case GL_TEXTURE_CUBE_MAP_ARRAY: - case GL_TEXTURE_3D: - _mesa_FramebufferTextureLayer(fboTarget, - attachment, - texObj->Name, - level, - layer); - break; - default: /* 2D / cube */ - if (texTarget == GL_TEXTURE_CUBE_MAP) - texTarget = GL_TEXTURE_CUBE_MAP_POSITIVE_X + texImage->Face; - - _mesa_FramebufferTexture2D(fboTarget, - attachment, - texTarget, - texObj->Name, - level); - } + _mesa_framebuffer_texture(ctx, fb, attachment, texObj, texTarget, + level, layer, false, __func__); } GLuint @@ -847,8 +826,8 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) if (ctx->RasterDiscard) _mesa_set_enable(ctx, GL_RASTERIZER_DISCARD, GL_FALSE); - save->DrawBufferName = ctx->DrawBuffer->Name; - save->ReadBufferName = ctx->ReadBuffer->Name; + _mesa_reference_framebuffer(&save->DrawBuffer, ctx->DrawBuffer); + _mesa_reference_framebuffer(&save->ReadBuffer, ctx->ReadBuffer); } } @@ -1234,11 +1213,9 @@ _mesa_meta_end(struct gl_context *ctx) if (save->TransformFeedbackNeedsResume) _mesa_ResumeTransformFeedback(); - if (ctx->DrawBuffer->Name != save->DrawBufferName) - _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, save->DrawBufferName); - - if (ctx->ReadBuffer->Name != save->ReadBufferName) - _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, save->ReadBufferName); + _mesa_bind_framebuffers(ctx, save->DrawBuffer, save->ReadBuffer); + _mesa_reference_framebuffer(&save->DrawBuffer, NULL); + _mesa_reference_framebuffer(&save->ReadBuffer, NULL); if (state & MESA_META_DRAW_BUFFERS) { _mesa_drawbuffers(ctx, ctx->DrawBuffer, ctx->Const.MaxDrawBuffers, @@ -2807,7 +2784,7 @@ copytexsubimage_using_blit_framebuffer(struct gl_context *ctx, GLuint dims, GLint x, GLint y, GLsizei width, GLsizei height) { - GLuint fbo; + struct gl_framebuffer *drawFb; bool success = false; GLbitfield mask; GLenum status; @@ -2815,32 +2792,37 @@ copytexsubimage_using_blit_framebuffer(struct gl_context *ctx, GLuint dims, if (!ctx->Extensions.ARB_framebuffer_object) return false; - _mesa_meta_begin(ctx, MESA_META_ALL & ~MESA_META_DRAW_BUFFERS); + drawFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (drawFb == NULL) + return false; - _mesa_GenFramebuffers(1, &fbo); - _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); + _mesa_meta_begin(ctx, MESA_META_ALL & ~MESA_META_DRAW_BUFFERS); + _mesa_bind_framebuffers(ctx, drawFb, ctx->ReadBuffer); if (rb->_BaseFormat == GL_DEPTH_STENCIL || rb->_BaseFormat == GL_DEPTH_COMPONENT) { - _mesa_meta_bind_fbo_image(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, - texImage, zoffset); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_DEPTH_ATTACHMENT, + texImage, zoffset); mask = GL_DEPTH_BUFFER_BIT; if (rb->_BaseFormat == GL_DEPTH_STENCIL && texImage->_BaseFormat == GL_DEPTH_STENCIL) { - _mesa_meta_bind_fbo_image(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, - texImage, zoffset); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_STENCIL_ATTACHMENT, + texImage, zoffset); mask |= GL_STENCIL_BUFFER_BIT; } _mesa_DrawBuffer(GL_NONE); } else { - _mesa_meta_bind_fbo_image(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - texImage, zoffset); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_COLOR_ATTACHMENT0, + texImage, zoffset); mask = GL_COLOR_BUFFER_BIT; _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0); } - status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->DrawBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) goto out; @@ -2866,7 +2848,7 @@ copytexsubimage_using_blit_framebuffer(struct gl_context *ctx, GLuint dims, success = mask == 0x0; out: - _mesa_DeleteFramebuffers(1, &fbo); + _mesa_reference_framebuffer(&drawFb, NULL); _mesa_meta_end(ctx); return success; } @@ -2961,8 +2943,8 @@ _mesa_meta_CopyTexSubImage(struct gl_context *ctx, GLuint dims, static void meta_decompress_fbo_cleanup(struct decompress_fbo_state *decompress_fbo) { - if (decompress_fbo->FBO != 0) { - _mesa_DeleteFramebuffers(1, &decompress_fbo->FBO); + if (decompress_fbo->fb != NULL) { + _mesa_reference_framebuffer(&decompress_fbo->fb, NULL); _mesa_reference_renderbuffer(&decompress_fbo->rb, NULL); } @@ -3065,7 +3047,7 @@ decompress_texture_image(struct gl_context *ctx, ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler); /* Create/bind FBO/renderbuffer */ - if (decompress_fbo->FBO == 0) { + if (decompress_fbo->fb == NULL) { decompress_fbo->rb = ctx->Driver.NewRenderbuffer(ctx, 0xDEADBEEF); if (decompress_fbo->rb == NULL) { _mesa_meta_end(ctx); @@ -3074,20 +3056,25 @@ decompress_texture_image(struct gl_context *ctx, decompress_fbo->rb->RefCount = 1; - _mesa_GenFramebuffers(1, &decompress_fbo->FBO); - _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, decompress_fbo->FBO); + decompress_fbo->fb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (decompress_fbo->fb == NULL) { + _mesa_meta_end(ctx); + return false; + } + + _mesa_bind_framebuffers(ctx, decompress_fbo->fb, decompress_fbo->fb); _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer, GL_COLOR_ATTACHMENT0, decompress_fbo->rb); } else { - _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, decompress_fbo->FBO); + _mesa_bind_framebuffers(ctx, decompress_fbo->fb, decompress_fbo->fb); } /* alloc dest surface */ if (width > decompress_fbo->Width || height > decompress_fbo->Height) { _mesa_renderbuffer_storage(ctx, decompress_fbo->rb, rbFormat, width, height, 0); - status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->DrawBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) { /* If the framebuffer isn't complete then we'll leave * decompress_fbo->Width as zero so that it will fail again next time @@ -3434,10 +3421,11 @@ cleartexsubimage_color(struct gl_context *ctx, GLenum datatype; GLenum status; - _mesa_meta_bind_fbo_image(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - texImage, zoffset); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_COLOR_ATTACHMENT0, + texImage, zoffset); - status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->DrawBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) return false; @@ -3481,14 +3469,16 @@ cleartexsubimage_depth_stencil(struct gl_context *ctx, GLfloat depthValue; GLenum status; - _mesa_meta_bind_fbo_image(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, - texImage, zoffset); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_DEPTH_ATTACHMENT, + texImage, zoffset); if (texImage->_BaseFormat == GL_DEPTH_STENCIL) - _mesa_meta_bind_fbo_image(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, - texImage, zoffset); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_STENCIL_ATTACHMENT, + texImage, zoffset); - status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->DrawBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) return false; @@ -3526,11 +3516,14 @@ cleartexsubimage_for_zoffset(struct gl_context *ctx, GLint zoffset, const GLvoid *clearValue) { - GLuint fbo; + struct gl_framebuffer *drawFb; bool success; - _mesa_GenFramebuffers(1, &fbo); - _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); + drawFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (drawFb == NULL) + return false; + + _mesa_bind_framebuffers(ctx, drawFb, ctx->ReadBuffer); switch(texImage->_BaseFormat) { case GL_DEPTH_STENCIL: @@ -3543,7 +3536,7 @@ cleartexsubimage_for_zoffset(struct gl_context *ctx, break; } - _mesa_DeleteFramebuffers(1, &fbo); + _mesa_reference_framebuffer(&drawFb, NULL); return success; } diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h index 7a120b6c44b..60ae5f7577f 100644 --- a/src/mesa/drivers/common/meta.h +++ b/src/mesa/drivers/common/meta.h @@ -186,7 +186,8 @@ struct save_state GLboolean RasterDiscard; GLboolean TransformFeedbackNeedsResume; - GLuint DrawBufferName, ReadBufferName; + struct gl_framebuffer *DrawBuffer; + struct gl_framebuffer *ReadBuffer; /** MESA_META_DRAW_BUFFERS */ GLenum ColorDrawBuffers[MAX_DRAW_BUFFERS]; @@ -368,7 +369,7 @@ struct gen_mipmap_state { GLuint VAO; struct gl_buffer_object *buf_obj; - GLuint FBO; + struct gl_framebuffer *fb; struct gl_sampler_object *samp_obj; struct blit_shader_table shaders; @@ -381,7 +382,7 @@ struct gen_mipmap_state struct decompress_fbo_state { struct gl_renderbuffer *rb; - GLuint FBO; + struct gl_framebuffer *fb; GLint Width, Height; }; @@ -661,7 +662,10 @@ _mesa_meta_glsl_generate_mipmap_cleanup(struct gl_context *ctx, struct gen_mipmap_state *mipmap); void -_mesa_meta_bind_fbo_image(GLenum target, GLenum attachment, - struct gl_texture_image *texImage, GLuint layer); +_mesa_meta_framebuffer_texture_image(struct gl_context *ctx, + struct gl_framebuffer *fb, + GLenum attachment, + struct gl_texture_image *texImage, + GLuint layer); #endif /* META_H */ diff --git a/src/mesa/drivers/common/meta_copy_image.c b/src/mesa/drivers/common/meta_copy_image.c index 57c3f686b0c..18b9681b710 100644 --- a/src/mesa/drivers/common/meta_copy_image.c +++ b/src/mesa/drivers/common/meta_copy_image.c @@ -30,6 +30,7 @@ #include "teximage.h" #include "texobj.h" #include "fbobject.h" +#include "framebuffer.h" #include "buffers.h" #include "state.h" #include "mtypes.h" @@ -166,7 +167,8 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx, GLint src_internal_format, dst_internal_format; GLuint src_view_texture = 0; struct gl_texture_image *src_view_tex_image; - GLuint fbos[2]; + struct gl_framebuffer *readFb; + struct gl_framebuffer *drawFb; bool success = false; GLbitfield mask; GLenum status, attachment; @@ -210,9 +212,15 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx, /* We really only need to stash the bound framebuffers and scissor. */ _mesa_meta_begin(ctx, MESA_META_SCISSOR); - _mesa_GenFramebuffers(2, fbos); - _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbos[0]); - _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbos[1]); + readFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (readFb == NULL) + goto meta_end; + + drawFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (drawFb == NULL) + goto meta_end; + + _mesa_bind_framebuffers(ctx, drawFb, readFb); switch (_mesa_get_format_base_format(src_format)) { case GL_DEPTH_COMPONENT: @@ -238,14 +246,14 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx, /* Prefer the tex image because, even if we have a renderbuffer, we may * have had to wrap it in a texture view. */ - _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, attachment, - src_view_tex_image, src_z); + _mesa_meta_framebuffer_texture_image(ctx, ctx->ReadBuffer, attachment, + src_view_tex_image, src_z); } else { _mesa_framebuffer_renderbuffer(ctx, ctx->ReadBuffer, attachment, src_renderbuffer); } - status = _mesa_CheckFramebufferStatus(GL_READ_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->ReadBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) goto meta_end; @@ -253,11 +261,11 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx, _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer, attachment, dst_renderbuffer); } else { - _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, attachment, - dst_tex_image, dst_z); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, attachment, + dst_tex_image, dst_z); } - status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->DrawBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) goto meta_end; @@ -281,7 +289,8 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx, success = true; meta_end: - _mesa_DeleteFramebuffers(2, fbos); + _mesa_reference_framebuffer(&readFb, NULL); + _mesa_reference_framebuffer(&drawFb, NULL); _mesa_meta_end(ctx); cleanup: diff --git a/src/mesa/drivers/common/meta_generate_mipmap.c b/src/mesa/drivers/common/meta_generate_mipmap.c index 27435b2b722..892d8d34619 100644 --- a/src/mesa/drivers/common/meta_generate_mipmap.c +++ b/src/mesa/drivers/common/meta_generate_mipmap.c @@ -35,6 +35,7 @@ #include "main/enums.h" #include "main/enable.h" #include "main/fbobject.h" +#include "main/framebuffer.h" #include "main/macros.h" #include "main/mipmap.h" #include "main/teximage.h" @@ -56,21 +57,11 @@ static bool fallback_required(struct gl_context *ctx, GLenum target, struct gl_texture_object *texObj) { - const GLuint fboSave = ctx->DrawBuffer->Name; struct gen_mipmap_state *mipmap = &ctx->Meta->Mipmap; struct gl_texture_image *baseImage; GLuint srcLevel; GLenum status; - /* GL_DRAW_FRAMEBUFFER does not exist in OpenGL ES 1.x, and since - * _mesa_meta_begin hasn't been called yet, we have to work-around API - * difficulties. The whole reason that GL_DRAW_FRAMEBUFFER is used instead - * of GL_FRAMEBUFFER is that the read framebuffer may be different. This - * is moot in OpenGL ES 1.x. - */ - const GLenum fbo_target = ctx->API == API_OPENGLES - ? GL_FRAMEBUFFER : GL_DRAW_FRAMEBUFFER; - /* check for fallbacks */ if (target == GL_TEXTURE_3D) { _mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_HIGH, @@ -109,16 +100,19 @@ fallback_required(struct gl_context *ctx, GLenum target, /* * Test that we can actually render in the texture's format. */ - if (!mipmap->FBO) - _mesa_GenFramebuffers(1, &mipmap->FBO); - _mesa_BindFramebuffer(fbo_target, mipmap->FBO); - - _mesa_meta_bind_fbo_image(fbo_target, GL_COLOR_ATTACHMENT0, baseImage, 0); - - status = _mesa_CheckFramebufferStatus(fbo_target); + if (mipmap->fb == NULL) { + mipmap->fb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (mipmap->fb == NULL) { + _mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_HIGH, + "glGenerateMipmap() ran out of memory\n"); + return true; + } + } - _mesa_BindFramebuffer(fbo_target, fboSave); + _mesa_meta_framebuffer_texture_image(ctx, mipmap->fb, + GL_COLOR_ATTACHMENT0, baseImage, 0); + status = _mesa_check_framebuffer_status(ctx, mipmap->fb); if (status != GL_FRAMEBUFFER_COMPLETE_EXT) { _mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_HIGH, "glGenerateMipmap() got incomplete FBO\n"); @@ -138,11 +132,7 @@ _mesa_meta_glsl_generate_mipmap_cleanup(struct gl_context *ctx, mipmap->VAO = 0; _mesa_reference_buffer_object(ctx, &mipmap->buf_obj, NULL); _mesa_reference_sampler_object(ctx, &mipmap->samp_obj, NULL); - - if (mipmap->FBO != 0) { - _mesa_DeleteFramebuffers(1, &mipmap->FBO); - mipmap->FBO = 0; - } + _mesa_reference_framebuffer(&mipmap->fb, NULL); _mesa_meta_blit_shader_table_cleanup(&mipmap->shaders); } @@ -251,8 +241,8 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, _mesa_bind_sampler(ctx, ctx->Texture.CurrentUnit, mipmap->samp_obj); - assert(mipmap->FBO != 0); - _mesa_BindFramebuffer(GL_FRAMEBUFFER_EXT, mipmap->FBO); + assert(mipmap->fb != NULL); + _mesa_bind_framebuffers(ctx, mipmap->fb, mipmap->fb); _mesa_texture_parameteriv(ctx, texObj, GL_GENERATE_MIPMAP, &always_false, false); @@ -354,10 +344,12 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, _mesa_buffer_data(ctx, mipmap->buf_obj, GL_NONE, sizeof(verts), verts, GL_DYNAMIC_DRAW, __func__); - _mesa_meta_bind_fbo_image(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstImage, layer); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_COLOR_ATTACHMENT0, dstImage, + layer); /* sanity check */ - if (_mesa_CheckFramebufferStatus(GL_FRAMEBUFFER) != + if (_mesa_check_framebuffer_status(ctx, ctx->DrawBuffer) != GL_FRAMEBUFFER_COMPLETE) { _mesa_problem(ctx, "Unexpected incomplete framebuffer in " "_mesa_meta_GenerateMipmap()"); diff --git a/src/mesa/drivers/common/meta_tex_subimage.c b/src/mesa/drivers/common/meta_tex_subimage.c index 4adaad7777b..639d3236359 100644 --- a/src/mesa/drivers/common/meta_tex_subimage.c +++ b/src/mesa/drivers/common/meta_tex_subimage.c @@ -30,6 +30,7 @@ #include "buffers.h" #include "clear.h" #include "fbobject.h" +#include "framebuffer.h" #include "glformats.h" #include "glheader.h" #include "image.h" @@ -178,7 +179,9 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims, const struct gl_pixelstore_attrib *packing) { struct gl_buffer_object *pbo = NULL; - GLuint pbo_tex = 0, fbos[2] = { 0, 0 }; + GLuint pbo_tex = 0; + struct gl_framebuffer *readFb = NULL; + struct gl_framebuffer *drawFb = NULL; int image_height; struct gl_texture_image *pbo_tex_image; GLenum status; @@ -225,9 +228,15 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims, _mesa_meta_begin(ctx, ~(MESA_META_PIXEL_TRANSFER | MESA_META_PIXEL_STORE)); - _mesa_GenFramebuffers(2, fbos); - _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbos[0]); - _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbos[1]); + readFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (readFb == NULL) + goto fail; + + drawFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (drawFb == NULL) + goto fail; + + _mesa_bind_framebuffers(ctx, drawFb, tex_image ? readFb : ctx->ReadBuffer); if (tex_image->TexObject->Target == GL_TEXTURE_1D_ARRAY) { assert(depth == 1); @@ -239,17 +248,19 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims, yoffset = 0; } - _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - pbo_tex_image, 0); + _mesa_meta_framebuffer_texture_image(ctx, ctx->ReadBuffer, + GL_COLOR_ATTACHMENT0, + pbo_tex_image, 0); /* If this passes on the first layer it should pass on the others */ - status = _mesa_CheckFramebufferStatus(GL_READ_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->ReadBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) goto fail; - _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - tex_image, zoffset); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_COLOR_ATTACHMENT0, + tex_image, zoffset); /* If this passes on the first layer it should pass on the others */ - status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->DrawBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) goto fail; @@ -263,8 +274,9 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims, goto fail; for (z = 1; z < depth; z++) { - _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - tex_image, zoffset + z); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_COLOR_ATTACHMENT0, + tex_image, zoffset + z); _mesa_update_state(ctx); @@ -279,7 +291,8 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims, success = true; fail: - _mesa_DeleteFramebuffers(2, fbos); + _mesa_reference_framebuffer(&readFb, NULL); + _mesa_reference_framebuffer(&drawFb, NULL); _mesa_DeleteTextures(1, &pbo_tex); _mesa_reference_buffer_object(ctx, &pbo, NULL); @@ -297,7 +310,9 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, const struct gl_pixelstore_attrib *packing) { struct gl_buffer_object *pbo = NULL; - GLuint pbo_tex = 0, fbos[2] = { 0, 0 }; + GLuint pbo_tex = 0; + struct gl_framebuffer *readFb; + struct gl_framebuffer *drawFb; int image_height; struct gl_texture_image *pbo_tex_image; struct gl_renderbuffer *rb = NULL; @@ -360,7 +375,13 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, if (ctx->Extensions.ARB_color_buffer_float) _mesa_ClampColor(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE); - _mesa_GenFramebuffers(2, fbos); + readFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (readFb == NULL) + goto fail; + + drawFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (drawFb == NULL) + goto fail; if (tex_image && tex_image->TexObject->Target == GL_TEXTURE_1D_ARRAY) { assert(depth == 1); @@ -376,23 +397,24 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, * we're doing a ReadPixels and we should just use whatever framebuffer * the client has bound. */ + _mesa_bind_framebuffers(ctx, drawFb, tex_image ? readFb : ctx->ReadBuffer); if (tex_image) { - _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbos[0]); - _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - tex_image, zoffset); + _mesa_meta_framebuffer_texture_image(ctx, ctx->ReadBuffer, + GL_COLOR_ATTACHMENT0, + tex_image, zoffset); /* If this passes on the first layer it should pass on the others */ - status = _mesa_CheckFramebufferStatus(GL_READ_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->ReadBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) goto fail; } else { assert(depth == 1); } - _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbos[1]); - _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - pbo_tex_image, 0); + _mesa_meta_framebuffer_texture_image(ctx, ctx->DrawBuffer, + GL_COLOR_ATTACHMENT0, + pbo_tex_image, 0); /* If this passes on the first layer it should pass on the others */ - status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); + status = _mesa_check_framebuffer_status(ctx, ctx->DrawBuffer); if (status != GL_FRAMEBUFFER_COMPLETE) goto fail; @@ -427,8 +449,9 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, } for (z = 1; z < depth; z++) { - _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - tex_image, zoffset + z); + _mesa_meta_framebuffer_texture_image(ctx, ctx->ReadBuffer, + GL_COLOR_ATTACHMENT0, + tex_image, zoffset + z); _mesa_update_state(ctx); @@ -452,7 +475,8 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims, success = true; fail: - _mesa_DeleteFramebuffers(2, fbos); + _mesa_reference_framebuffer(&drawFb, NULL); + _mesa_reference_framebuffer(&readFb, NULL); _mesa_DeleteTextures(1, &pbo_tex); _mesa_reference_buffer_object(ctx, &pbo, NULL); diff --git a/src/mesa/drivers/dri/Android.mk b/src/mesa/drivers/dri/Android.mk index 42bfd77d467..7e7587b0753 100644 --- a/src/mesa/drivers/dri/Android.mk +++ b/src/mesa/drivers/dri/Android.mk @@ -28,9 +28,6 @@ include $(LOCAL_PATH)/common/Makefile.sources #----------------------------------------------- # Variables common to all DRI drivers -MESA_DRI_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/dri -MESA_DRI_MODULE_UNSTRIPPED_PATH := $(TARGET_OUT_SHARED_LIBRARIES_UNSTRIPPED)/dri - MESA_DRI_CFLAGS := \ -DHAVE_ANDROID_PLATFORM @@ -42,6 +39,8 @@ MESA_DRI_C_INCLUDES := \ MESA_DRI_WHOLE_STATIC_LIBRARIES := \ libmesa_glsl \ + libmesa_compiler \ + libmesa_nir \ libmesa_megadriver_stub \ libmesa_dri_common \ libmesa_dricore \ diff --git a/src/mesa/drivers/dri/common/Android.mk b/src/mesa/drivers/dri/common/Android.mk index f1a733011b9..8a21e630325 100644 --- a/src/mesa/drivers/dri/common/Android.mk +++ b/src/mesa/drivers/dri/common/Android.mk @@ -74,20 +74,24 @@ $(intermediates)/xmlpool/%.po: $(LOCAL_PATH)/xmlpool/%.po $(POT) sed -i -e 's/charset=.*\\n/charset=UTF-8\\n/' $@; \ fi -$(intermediates)/xmlpool/%/LC_MESSAGES/options.mo: $(intermediates)/xmlpool/%.po +PRIVATE_SCRIPT := $(LOCAL_PATH)/xmlpool/gen_xmlpool.py +PRIVATE_LOCALEDIR := $(intermediates)/xmlpool +PRIVATE_TEMPLATE_HEADER := $(LOCAL_PATH)/xmlpool/t_options.h +PRIVATE_MO_FILES := $(MESA_DRI_OPTIONS_LANGS:%=$(intermediates)/xmlpool/%/LC_MESSAGES/options.mo) + +LOCAL_GENERATED_SOURCES += $(PRIVATE_MO_FILES) + +$(PRIVATE_MO_FILES): $(intermediates)/xmlpool/%/LC_MESSAGES/options.mo: $(intermediates)/xmlpool/%.po mkdir -p $(dir $@) msgfmt -o $@ $< -$(MESA_DRI_OPTIONS_H): PRIVATE_SCRIPT := $(LOCAL_PATH)/xmlpool/gen_xmlpool.py -$(MESA_DRI_OPTIONS_H): PRIVATE_LOCALEDIR := $(intermediates)/xmlpool -$(MESA_DRI_OPTIONS_H): PRIVATE_TEMPLATE_HEADER := $(LOCAL_PATH)/xmlpool/t_options.h -$(MESA_DRI_OPTIONS_H): PRIVATE_MO_FILES := $(MESA_DRI_OPTIONS_LANGS:%=$(intermediates)/xmlpool/%/LC_MESSAGES/options.mo) -.SECONDEXPANSION: -$(MESA_DRI_OPTIONS_H): $$(PRIVATE_SCRIPT) $$(PRIVATE_TEMPLATE_HEADER) $$(PRIVATE_MO_FILES) - @mkdir -p $(dir $@) - $(hide) $(MESA_PYTHON2) $(PRIVATE_SCRIPT) $(PRIVATE_TEMPLATE_HEADER) \ +$(LOCAL_GENERATED_SOURCES): PRIVATE_PYTHON := $(MESA_PYTHON2) +$(LOCAL_GENERATED_SOURCES): PRIVATE_CUSTOM_TOOL = $(PRIVATE_PYTHON) $^ $(PRIVATE_TEMPLATE_HEADER) \ $(PRIVATE_LOCALEDIR) $(MESA_DRI_OPTIONS_LANGS) > $@ +$(MESA_DRI_OPTIONS_H): $(PRIVATE_SCRIPT) $(PRIVATE_TEMPLATE_HEADER) $(PRIVATE_MO_FILES) + $(transform-generated-source) + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) diff --git a/src/mesa/drivers/dri/i915/Android.mk b/src/mesa/drivers/dri/i915/Android.mk index 741ea260e94..97359fa049d 100644 --- a/src/mesa/drivers/dri/i915/Android.mk +++ b/src/mesa/drivers/dri/i915/Android.mk @@ -27,7 +27,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := i915_dri ifeq ($(MESA_LOLLIPOP_BUILD),true) -LOCAL_MODULE_RELATIVE_PATH := $(notdir $(MESA_DRI_MODULE_PATH)) +LOCAL_MODULE_RELATIVE_PATH := $(MESA_DRI_MODULE_REL_PATH) else LOCAL_MODULE_PATH := $(MESA_DRI_MODULE_PATH) LOCAL_UNSTRIPPED_PATH := $(MESA_DRI_MODULE_UNSTRIPPED_PATH) @@ -54,7 +54,8 @@ LOCAL_SHARED_LIBRARIES := \ libdrm_intel LOCAL_GENERATED_SOURCES := \ - $(MESA_DRI_OPTIONS_H) + $(MESA_DRI_OPTIONS_H) \ + $(MESA_GEN_NIR_H) include $(MESA_COMMON_MK) include $(BUILD_SHARED_LIBRARY) diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 57b033c07ea..83aaf9ea5a6 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -254,7 +254,6 @@ i915CreateContext(int api, /* FINISHME: Are there other options that should be enabled for software * FINISHME: vertex shaders? */ - ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitCondCodes = true; ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectSampler = true; diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 59d795998c6..691bae359fb 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -598,26 +598,6 @@ upload_program(struct i915_fragment_program *p) 0, src0, T0_TEXKILL); break; - case OPCODE_KIL_NV: - if (inst->DstReg.CondMask == COND_TR) { - tmp = i915_get_utemp(p); - - /* The KIL instruction discards the fragment if any component of - * the source is < 0. Emit an immediate operand of {-1}.xywz. - */ - i915_emit_texld(p, get_live_regs(p, inst), - tmp, A0_DEST_CHANNEL_ALL, - 0, /* use a dummy dest reg */ - negate(swizzle(tmp, ONE, ONE, ONE, ONE), - 1, 1, 1, 1), - T0_TEXKILL); - } else { - p->error = 1; - i915_program_error(p, "Unsupported KIL_NV condition code: %d", - inst->DstReg.CondMask); - } - break; - case OPCODE_LG2: src0 = src_vector(p, &inst->SrcReg[0], program); diff --git a/src/mesa/drivers/dri/i965/Android.mk b/src/mesa/drivers/dri/i965/Android.mk index d30a053e10f..056b223f2de 100644 --- a/src/mesa/drivers/dri/i965/Android.mk +++ b/src/mesa/drivers/dri/i965/Android.mk @@ -27,7 +27,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := i965_dri ifeq ($(MESA_LOLLIPOP_BUILD),true) -LOCAL_MODULE_RELATIVE_PATH := $(notdir $(MESA_DRI_MODULE_PATH)) +LOCAL_MODULE_RELATIVE_PATH := $(MESA_DRI_MODULE_REL_PATH) else LOCAL_MODULE_PATH := $(MESA_DRI_MODULE_PATH) LOCAL_UNSTRIPPED_PATH := $(MESA_DRI_MODULE_UNSTRIPPED_PATH) @@ -59,7 +59,8 @@ LOCAL_SHARED_LIBRARIES := \ libdrm_intel LOCAL_GENERATED_SOURCES := \ - $(MESA_DRI_OPTIONS_H) + $(MESA_DRI_OPTIONS_H) \ + $(MESA_GEN_NIR_H) include $(MESA_COMMON_MK) include $(BUILD_SHARED_LIBRARY) diff --git a/src/mesa/drivers/dri/i965/brw_compiler.c b/src/mesa/drivers/dri/i965/brw_compiler.c index 00e44af2f8d..a95f51bfa4a 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.c +++ b/src/mesa/drivers/dri/i965/brw_compiler.c @@ -144,7 +144,6 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->glsl_compiler_options[i].MaxIfDepth = devinfo->gen < 6 ? 16 : UINT_MAX; - compiler->glsl_compiler_options[i].EmitCondCodes = true; compiler->glsl_compiler_options[i].EmitNoNoise = true; compiler->glsl_compiler_options[i].EmitNoMainReturn = true; compiler->glsl_compiler_options[i].EmitNoIndirectInput = true; diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp index 61f25811cb2..2c1abaf255c 100644 --- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp +++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp @@ -32,8 +32,8 @@ /* Look for and eliminate dead control flow: * * - if/endif - * . else in else/endif - * - if/else/endif + * - else in else/endif + * - then in if/else/endif */ bool dead_control_flow_eliminate(backend_shader *s) @@ -41,61 +41,42 @@ dead_control_flow_eliminate(backend_shader *s) bool progress = false; foreach_block_safe (block, s->cfg) { - bblock_t *if_block = NULL, *else_block = NULL, *endif_block = block; - bool found = false; + bblock_t *prev_block = block->prev(); + backend_instruction *const inst = block->start(); + backend_instruction *const prev_inst = prev_block->end(); /* ENDIF instructions, by definition, can only be found at the start of * basic blocks. */ - backend_instruction *endif_inst = endif_block->start(); - if (endif_inst->opcode != BRW_OPCODE_ENDIF) - continue; - - backend_instruction *if_inst = NULL, *else_inst = NULL; - backend_instruction *prev_inst = endif_block->prev()->end(); - if (prev_inst->opcode == BRW_OPCODE_ELSE) { - else_inst = prev_inst; - else_block = endif_block->prev(); - found = true; - - if (else_block->start_ip == else_block->end_ip) - prev_inst = else_block->prev()->end(); - } + if (inst->opcode == BRW_OPCODE_ENDIF && + prev_inst->opcode == BRW_OPCODE_ELSE) { + bblock_t *const else_block = prev_block; + backend_instruction *const else_inst = prev_inst; - if (prev_inst->opcode == BRW_OPCODE_IF) { - if_inst = prev_inst; - if_block = else_block != NULL ? else_block->prev() - : endif_block->prev(); - found = true; - } else { - /* Don't remove the ENDIF if we didn't find a dead IF. */ - endif_inst = NULL; - } + else_inst->remove(else_block); + progress = true; + } else if (inst->opcode == BRW_OPCODE_ENDIF && + prev_inst->opcode == BRW_OPCODE_IF) { + bblock_t *const endif_block = block; + bblock_t *const if_block = prev_block; + backend_instruction *const endif_inst = inst; + backend_instruction *const if_inst = prev_inst; - if (found) { bblock_t *earlier_block = NULL, *later_block = NULL; - if (if_inst) { - if (if_block->start_ip == if_block->end_ip) { - earlier_block = if_block->prev(); - } else { - earlier_block = if_block; - } - if_inst->remove(if_block); + if (if_block->start_ip == if_block->end_ip) { + earlier_block = if_block->prev(); + } else { + earlier_block = if_block; } + if_inst->remove(if_block); - if (else_inst) { - else_inst->remove(else_block); - } - - if (endif_inst) { - if (endif_block->start_ip == endif_block->end_ip) { - later_block = endif_block->next(); - } else { - later_block = endif_block; - } - endif_inst->remove(endif_block); + if (endif_block->start_ip == endif_block->end_ip) { + later_block = endif_block->next(); + } else { + later_block = endif_block; } + endif_inst->remove(endif_block); assert((earlier_block == NULL) == (later_block == NULL)); if (earlier_block && earlier_block->can_combine_with(later_block)) { @@ -111,6 +92,19 @@ dead_control_flow_eliminate(backend_shader *s) } progress = true; + } else if (inst->opcode == BRW_OPCODE_ELSE && + prev_inst->opcode == BRW_OPCODE_IF) { + bblock_t *const else_block = block; + backend_instruction *const if_inst = prev_inst; + backend_instruction *const else_inst = inst; + + /* Since the else-branch is becoming the new then-branch, the + * condition has to be inverted. + */ + if_inst->predicate_inverse = !if_inst->predicate_inverse; + else_inst->remove(else_block); + + progress = true; } } diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 35d8039ed13..2ef1d7bb825 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2526,6 +2526,8 @@ brw_send_indirect_message(struct brw_codegen *p, struct brw_inst *send; int setup; + dst = retype(dst, BRW_REGISTER_TYPE_UW); + assert(desc.type == BRW_REGISTER_TYPE_UD); /* We hold on to the setup instruction (the SEND in the direct case, the OR @@ -3207,6 +3209,7 @@ brw_memory_fence(struct brw_codegen *p, * message doesn't write anything back. */ insn = next_insn(p, BRW_OPCODE_SEND); + dst = retype(dst, BRW_REGISTER_TYPE_UW); brw_set_dest(p, insn, dst); brw_set_src0(p, insn, dst); brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, @@ -3473,7 +3476,7 @@ brw_barrier(struct brw_codegen *p, struct brw_reg src) assert(devinfo->gen >= 7); inst = next_insn(p, BRW_OPCODE_SEND); - brw_set_dest(p, inst, brw_null_reg()); + brw_set_dest(p, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW)); brw_set_src0(p, inst, src); brw_set_src1(p, inst, brw_null_reg()); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 81a83400ea0..0f9de30f05b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5643,8 +5643,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, true); - shader = brw_nir_lower_io(shader, compiler->devinfo, true, - false, NULL); + brw_nir_lower_fs_inputs(shader); + brw_nir_lower_fs_outputs(shader); shader = brw_postprocess_nir(shader, compiler->devinfo, true); /* key->alpha_test_func means simulating alpha testing via discards, diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index cde6566c05c..0e743de7faf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -139,6 +139,8 @@ operands_match(const fs_inst *a, const fs_inst *b, bool *negate) ys[1].f = ys1_imm; *negate = (xs0_negate != xs1_negate) != (ys0_negate != ys1_negate); + if (*negate && (a->saturate || b->saturate)) + return false; return ret; } else if (!a->is_commutative()) { bool match = true; diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index cac92b37bd5..75c29c597f5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -455,7 +455,7 @@ fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload) insn = brw_next_insn(p, BRW_OPCODE_SEND); - brw_set_dest(p, insn, brw_null_reg()); + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW)); brw_set_src0(p, insn, payload); brw_set_src1(p, insn, brw_imm_d(0)); diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp index 52570943996..dc2b0c8aa8d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp @@ -56,8 +56,7 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) inst->dst.file != VGRF || inst->dst.type != inst->src[0].type || inst->src[0].file != VGRF || - inst->src[0].abs || - inst->src[0].negate) + inst->src[0].abs) continue; int src_var = v->live_intervals->var_from_reg(inst->src[0]); @@ -82,6 +81,31 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) scan_inst->src[i].type = inst->dst.type; } } + + if (inst->src[0].negate) { + if (scan_inst->opcode == BRW_OPCODE_MUL) { + scan_inst->src[0].negate = !scan_inst->src[0].negate; + inst->src[0].negate = false; + } else if (scan_inst->opcode == BRW_OPCODE_MAD) { + scan_inst->src[0].negate = !scan_inst->src[0].negate; + scan_inst->src[1].negate = !scan_inst->src[1].negate; + inst->src[0].negate = false; + } else if (scan_inst->opcode == BRW_OPCODE_ADD) { + if (scan_inst->src[1].file == IMM) { + if (!brw_negate_immediate(scan_inst->src[1].type, + &scan_inst->src[1].as_brw_reg())) { + break; + } + } else { + scan_inst->src[1].negate = !scan_inst->src[1].negate; + } + scan_inst->src[0].negate = !scan_inst->src[0].negate; + inst->src[0].negate = false; + } else { + break; + } + } + scan_inst->saturate = true; inst->saturate = false; progress = true; @@ -96,7 +120,9 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) if (scan_inst->opcode != BRW_OPCODE_MOV || !scan_inst->saturate || scan_inst->src[0].abs || - scan_inst->src[0].negate) { + scan_inst->src[0].negate || + scan_inst->src[0].abs != inst->src[0].abs || + scan_inst->src[0].negate != inst->src[0].negate) { interfered = true; break; } diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c index 488fa6c0c45..b81b1438ba3 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c @@ -36,6 +36,7 @@ #include "main/varray.h" #include "main/uniforms.h" #include "main/fbobject.h" +#include "main/framebuffer.h" #include "main/renderbuffer.h" #include "main/texobj.h" @@ -848,18 +849,23 @@ brw_meta_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt) { struct gl_context *ctx = &brw->ctx; - GLuint fbo; + struct gl_framebuffer *drawFb; struct gl_renderbuffer *rb; struct rect rect; brw_emit_mi_flush(brw); + drawFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (drawFb == NULL) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "in %s", __func__); + return; + } + _mesa_meta_begin(ctx, MESA_META_ALL); - _mesa_GenFramebuffers(1, &fbo); rb = brw_get_rb_for_slice(brw, mt, 0, 0, false); - _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); + _mesa_bind_framebuffers(ctx, drawFb, ctx->ReadBuffer); _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer, GL_COLOR_ATTACHMENT0, rb); _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0); @@ -888,7 +894,7 @@ brw_meta_resolve_color(struct brw_context *brw, use_rectlist(brw, false); _mesa_reference_renderbuffer(&rb, NULL); - _mesa_DeleteFramebuffers(1, &fbo); + _mesa_reference_framebuffer(&drawFb, NULL); _mesa_meta_end(ctx); diff --git a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c index 5cfaec673c0..5b0c2e9bdd5 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c +++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c @@ -49,6 +49,7 @@ #include "main/blit.h" #include "main/buffers.h" #include "main/fbobject.h" +#include "main/framebuffer.h" #include "main/uniforms.h" #include "main/texparam.h" #include "main/texobj.h" @@ -424,8 +425,9 @@ brw_meta_stencil_blit(struct brw_context *brw, struct gl_context *ctx = &brw->ctx; struct blit_dims dims = *orig_dims; struct fb_tex_blit_state blit; - GLuint prog, fbo; - struct gl_renderbuffer *rb; + GLuint prog; + struct gl_framebuffer *drawFb = NULL; + struct gl_renderbuffer *rb = NULL; GLenum target; _mesa_meta_fb_tex_blit_begin(ctx, &blit); @@ -436,13 +438,18 @@ brw_meta_stencil_blit(struct brw_context *brw, assert(ctx->Extensions.ARB_texture_stencil8 == false); ctx->Extensions.ARB_texture_stencil8 = true; - _mesa_GenFramebuffers(1, &fbo); + drawFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (drawFb == NULL) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "in %s", __func__); + goto error; + } + /* Force the surface to be configured for level zero. */ rb = brw_get_rb_for_slice(brw, dst_mt, 0, dst_layer, true); adjust_msaa(&dims, dst_mt->num_samples); adjust_tiling(&dims, dst_mt->num_samples); - _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); + _mesa_bind_framebuffers(ctx, drawFb, ctx->ReadBuffer); _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer, GL_COLOR_ATTACHMENT0, rb); _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0); @@ -477,7 +484,7 @@ error: _mesa_meta_end(ctx); _mesa_reference_renderbuffer(&rb, NULL); - _mesa_DeleteFramebuffers(1, &fbo); + _mesa_reference_framebuffer(&drawFb, NULL); } void @@ -534,19 +541,22 @@ brw_meta_stencil_updownsample(struct brw_context *brw, .dst_x0 = 0, .dst_y0 = 0, .dst_x1 = dst->logical_width0, .dst_y1 = dst->logical_height0, .mirror_x = 0, .mirror_y = 0 }; - GLuint fbo; + struct gl_framebuffer *readFb; struct gl_renderbuffer *rb; if (dst->stencil_mt) dst = dst->stencil_mt; + readFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + if (readFb == NULL) + return; + brw_emit_mi_flush(brw); _mesa_meta_begin(ctx, MESA_META_ALL); - _mesa_GenFramebuffers(1, &fbo); rb = brw_get_rb_for_slice(brw, src, 0, 0, false); - _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbo); + _mesa_bind_framebuffers(ctx, ctx->DrawBuffer, readFb); _mesa_framebuffer_renderbuffer(ctx, ctx->ReadBuffer, GL_STENCIL_ATTACHMENT, rb); @@ -554,5 +564,5 @@ brw_meta_stencil_updownsample(struct brw_context *brw, brw_emit_mi_flush(brw); _mesa_reference_renderbuffer(&rb, NULL); - _mesa_DeleteFramebuffers(1, &fbo); + _mesa_reference_framebuffer(&readFb, NULL); } diff --git a/src/mesa/drivers/dri/i965/brw_meta_updownsample.c b/src/mesa/drivers/dri/i965/brw_meta_updownsample.c index e90e6b1e326..f5fc2072dd7 100644 --- a/src/mesa/drivers/dri/i965/brw_meta_updownsample.c +++ b/src/mesa/drivers/dri/i965/brw_meta_updownsample.c @@ -29,6 +29,7 @@ #include "main/buffers.h" #include "main/enums.h" #include "main/fbobject.h" +#include "main/framebuffer.h" #include "main/renderbuffer.h" #include "drivers/common/meta.h" @@ -93,7 +94,8 @@ brw_meta_updownsample(struct brw_context *brw, struct intel_mipmap_tree *dst_mt) { struct gl_context *ctx = &brw->ctx; - GLuint fbos[2], src_fbo, dst_fbo; + struct gl_framebuffer *src_fb; + struct gl_framebuffer *dst_fb; struct gl_renderbuffer *src_rb; struct gl_renderbuffer *dst_rb; GLenum drawbuffer; @@ -113,17 +115,20 @@ brw_meta_updownsample(struct brw_context *brw, brw_emit_mi_flush(brw); _mesa_meta_begin(ctx, MESA_META_ALL); - _mesa_GenFramebuffers(2, fbos); src_rb = brw_get_rb_for_slice(brw, src_mt, 0, 0, false); dst_rb = brw_get_rb_for_slice(brw, dst_mt, 0, 0, false); - src_fbo = fbos[0]; - dst_fbo = fbos[1]; + src_fb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); + dst_fb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); - _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, src_fbo); + if (src_fb == NULL || dst_fb == NULL || src_rb == NULL || dst_rb == NULL) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "in %s", __func__); + goto error; + } + + _mesa_bind_framebuffers(ctx, dst_fb, src_fb); _mesa_framebuffer_renderbuffer(ctx, ctx->ReadBuffer, attachment, src_rb); _mesa_ReadBuffer(drawbuffer); - _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_fbo); _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer, attachment, dst_rb); _mesa_DrawBuffer(drawbuffer); @@ -133,9 +138,11 @@ brw_meta_updownsample(struct brw_context *brw, dst_mt->logical_width0, dst_mt->logical_height0, blit_bit, GL_NEAREST); +error: _mesa_reference_renderbuffer(&src_rb, NULL); _mesa_reference_renderbuffer(&dst_rb, NULL); - _mesa_DeleteFramebuffers(2, fbos); + _mesa_reference_framebuffer(&src_fb, NULL); + _mesa_reference_framebuffer(&dst_fb, NULL); _mesa_meta_end(ctx); diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index e9351a5556a..ba9cb3f608c 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -149,7 +149,7 @@ remap_inputs_with_vue_map(nir_block *block, void *closure) struct remap_patch_urb_offsets_state { nir_builder b; - struct brw_vue_map vue_map; + const struct brw_vue_map *vue_map; }; static bool @@ -167,7 +167,7 @@ remap_patch_urb_offsets(nir_block *block, void *closure) if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) || (stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) { - int vue_slot = state->vue_map.varying_to_slot[intrin->const_index[0]]; + int vue_slot = state->vue_map->varying_to_slot[intrin->const_index[0]]; assert(vue_slot != -1); intrin->const_index[0] = vue_slot; @@ -176,7 +176,7 @@ remap_patch_urb_offsets(nir_block *block, void *closure) nir_const_value *const_vertex = nir_src_as_const_value(*vertex); if (const_vertex) { intrin->const_index[0] += const_vertex->u[0] * - state->vue_map.num_per_vertex_slots; + state->vue_map->num_per_vertex_slots; } else { state->b.cursor = nir_before_instr(&intrin->instr); @@ -185,7 +185,7 @@ remap_patch_urb_offsets(nir_block *block, void *closure) nir_imul(&state->b, nir_ssa_for_src(&state->b, *vertex, 1), nir_imm_int(&state->b, - state->vue_map.num_per_vertex_slots)); + state->vue_map->num_per_vertex_slots)); /* Add it to the existing offset */ nir_src *offset = nir_get_io_offset_src(intrin); @@ -202,110 +202,61 @@ remap_patch_urb_offsets(nir_block *block, void *closure) return true; } -static void -brw_nir_lower_inputs(nir_shader *nir, - const struct brw_device_info *devinfo, - bool is_scalar, - bool use_legacy_snorm_formula, - const uint8_t *vs_attrib_wa_flags) +void +brw_nir_lower_vs_inputs(nir_shader *nir, + const struct brw_device_info *devinfo, + bool is_scalar, + bool use_legacy_snorm_formula, + const uint8_t *vs_attrib_wa_flags) { - switch (nir->stage) { - case MESA_SHADER_VERTEX: - /* Start with the location of the variable's base. */ - foreach_list_typed(nir_variable, var, node, &nir->inputs) { - var->data.driver_location = var->data.location; - } - - /* Now use nir_lower_io to walk dereference chains. Attribute arrays - * are loaded as one vec4 per element (or matrix column), so we use - * type_size_vec4 here. - */ - nir_lower_io(nir, nir_var_shader_in, type_size_vec4); - - /* This pass needs actual constants */ - nir_opt_constant_folding(nir); - - add_const_offset_to_base(nir, nir_var_shader_in); - - brw_nir_apply_attribute_workarounds(nir, use_legacy_snorm_formula, - vs_attrib_wa_flags); + /* Start with the location of the variable's base. */ + foreach_list_typed(nir_variable, var, node, &nir->inputs) { + var->data.driver_location = var->data.location; + } - if (is_scalar) { - /* Finally, translate VERT_ATTRIB_* values into the actual registers. - * - * Note that we can use nir->info.inputs_read instead of - * key->inputs_read since the two are identical aside from Gen4-5 - * edge flag differences. - */ - GLbitfield64 inputs_read = nir->info.inputs_read; + /* Now use nir_lower_io to walk dereference chains. Attribute arrays + * are loaded as one vec4 per element (or matrix column), so we use + * type_size_vec4 here. + */ + nir_lower_io(nir, nir_var_shader_in, type_size_vec4); - nir_foreach_function(nir, function) { - if (function->impl) { - nir_foreach_block(function->impl, remap_vs_attrs, &inputs_read); - } - } - } - break; - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_GEOMETRY: { - if (!is_scalar && nir->stage == MESA_SHADER_GEOMETRY) { - foreach_list_typed(nir_variable, var, node, &nir->inputs) { - var->data.driver_location = var->data.location; - } - } else { - /* The GLSL linker will have already matched up GS inputs and - * the outputs of prior stages. The driver does extend VS outputs - * in some cases, but only for legacy OpenGL or Gen4-5 hardware, - * neither of which offer geometry shader support. So we can - * safely ignore that. - * - * For SSO pipelines, we use a fixed VUE map layout based on variable - * locations, so we can rely on rendezvous-by-location to make this - * work. - * - * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not - * written by previous stages and shows up via payload magic. - */ - struct brw_vue_map input_vue_map; - GLbitfield64 inputs_read = - nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID; - brw_compute_vue_map(devinfo, &input_vue_map, inputs_read, - nir->info.separate_shader || - nir->stage == MESA_SHADER_TESS_CTRL); - - foreach_list_typed(nir_variable, var, node, &nir->inputs) { - var->data.driver_location = var->data.location; - } + /* This pass needs actual constants */ + nir_opt_constant_folding(nir); - /* Inputs are stored in vec4 slots, so use type_size_vec4(). */ - nir_lower_io(nir, nir_var_shader_in, type_size_vec4); + add_const_offset_to_base(nir, nir_var_shader_in); - /* This pass needs actual constants */ - nir_opt_constant_folding(nir); + brw_nir_apply_attribute_workarounds(nir, use_legacy_snorm_formula, + vs_attrib_wa_flags); - add_const_offset_to_base(nir, nir_var_shader_in); + if (is_scalar) { + /* Finally, translate VERT_ATTRIB_* values into the actual registers. + * + * Note that we can use nir->info.inputs_read instead of + * key->inputs_read since the two are identical aside from Gen4-5 + * edge flag differences. + */ + GLbitfield64 inputs_read = nir->info.inputs_read; - nir_foreach_function(nir, function) { - if (function->impl) { - nir_foreach_block(function->impl, remap_inputs_with_vue_map, - &input_vue_map); - } + nir_foreach_function(nir, function) { + if (function->impl) { + nir_foreach_block(function->impl, remap_vs_attrs, &inputs_read); } } - break; } - case MESA_SHADER_TESS_EVAL: { - struct remap_patch_urb_offsets_state state; - brw_compute_tess_vue_map(&state.vue_map, - nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID, - nir->info.patch_inputs_read); +} - foreach_list_typed(nir_variable, var, node, &nir->inputs) { - var->data.driver_location = var->data.location; - } +void +brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar, + const struct brw_vue_map *vue_map) +{ + foreach_list_typed(nir_variable, var, node, &nir->inputs) { + var->data.driver_location = var->data.location; + } - nir_lower_io(nir, nir_var_shader_in, type_size_vec4); + /* Inputs are stored in vec4 slots, so use type_size_vec4(). */ + nir_lower_io(nir, nir_var_shader_in, type_size_vec4); + if (is_scalar || nir->stage != MESA_SHADER_GEOMETRY) { /* This pass needs actual constants */ nir_opt_constant_folding(nir); @@ -313,81 +264,93 @@ brw_nir_lower_inputs(nir_shader *nir, nir_foreach_function(nir, function) { if (function->impl) { - nir_builder_init(&state.b, function->impl); - nir_foreach_block(function->impl, remap_patch_urb_offsets, &state); + nir_foreach_block(function->impl, remap_inputs_with_vue_map, + (void *) vue_map); } } - break; - } - case MESA_SHADER_FRAGMENT: - assert(is_scalar); - nir_assign_var_locations(&nir->inputs, &nir->num_inputs, - type_size_scalar); - break; - case MESA_SHADER_COMPUTE: - /* Compute shaders have no inputs. */ - assert(exec_list_is_empty(&nir->inputs)); - break; - default: - unreachable("unsupported shader stage"); } } -static void -brw_nir_lower_outputs(nir_shader *nir, - const struct brw_device_info *devinfo, - bool is_scalar) +void +brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue_map) { - switch (nir->stage) { - case MESA_SHADER_VERTEX: - case MESA_SHADER_TESS_EVAL: - case MESA_SHADER_GEOMETRY: - if (is_scalar) { - nir_assign_var_locations(&nir->outputs, &nir->num_outputs, - type_size_vec4_times_4); - nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4); - } else { - nir_foreach_variable(var, &nir->outputs) - var->data.driver_location = var->data.location; - } - break; - case MESA_SHADER_TESS_CTRL: { - struct remap_patch_urb_offsets_state state; - brw_compute_tess_vue_map(&state.vue_map, nir->info.outputs_written, - nir->info.patch_outputs_written); + struct remap_patch_urb_offsets_state state; + state.vue_map = vue_map; - nir_foreach_variable(var, &nir->outputs) { - var->data.driver_location = var->data.location; - } + foreach_list_typed(nir_variable, var, node, &nir->inputs) { + var->data.driver_location = var->data.location; + } - nir_lower_io(nir, nir_var_shader_out, type_size_vec4); + nir_lower_io(nir, nir_var_shader_in, type_size_vec4); - /* This pass needs actual constants */ - nir_opt_constant_folding(nir); + /* This pass needs actual constants */ + nir_opt_constant_folding(nir); - add_const_offset_to_base(nir, nir_var_shader_out); + add_const_offset_to_base(nir, nir_var_shader_in); - nir_foreach_function(nir, function) { - if (function->impl) { - nir_builder_init(&state.b, function->impl); - nir_foreach_block(function->impl, remap_patch_urb_offsets, &state); - } + nir_foreach_function(nir, function) { + if (function->impl) { + nir_builder_init(&state.b, function->impl); + nir_foreach_block(function->impl, remap_patch_urb_offsets, &state); } - break; } - case MESA_SHADER_FRAGMENT: +} + +void +brw_nir_lower_fs_inputs(nir_shader *nir) +{ + nir_assign_var_locations(&nir->inputs, &nir->num_inputs, type_size_scalar); + nir_lower_io(nir, nir_var_shader_in, type_size_scalar); +} + +void +brw_nir_lower_vue_outputs(nir_shader *nir, + bool is_scalar) +{ + if (is_scalar) { nir_assign_var_locations(&nir->outputs, &nir->num_outputs, - type_size_scalar); - break; - case MESA_SHADER_COMPUTE: - /* Compute shaders have no outputs. */ - assert(exec_list_is_empty(&nir->outputs)); - break; - default: - unreachable("unsupported shader stage"); + type_size_vec4_times_4); + nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4); + } else { + nir_foreach_variable(var, &nir->outputs) + var->data.driver_location = var->data.location; + nir_lower_io(nir, nir_var_shader_out, type_size_vec4); } } +void +brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map) +{ + struct remap_patch_urb_offsets_state state; + state.vue_map = vue_map; + + nir_foreach_variable(var, &nir->outputs) { + var->data.driver_location = var->data.location; + } + + nir_lower_io(nir, nir_var_shader_out, type_size_vec4); + + /* This pass needs actual constants */ + nir_opt_constant_folding(nir); + + add_const_offset_to_base(nir, nir_var_shader_out); + + nir_foreach_function(nir, function) { + if (function->impl) { + nir_builder_init(&state.b, function->impl); + nir_foreach_block(function->impl, remap_patch_urb_offsets, &state); + } + } +} + +void +brw_nir_lower_fs_outputs(nir_shader *nir) +{ + nir_assign_var_locations(&nir->outputs, &nir->num_outputs, + type_size_scalar); + nir_lower_io(nir, nir_var_shader_out, type_size_scalar); +} + static int type_size_scalar_bytes(const struct glsl_type *type) { @@ -414,7 +377,7 @@ brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar) } } -static void +void brw_nir_lower_shared(nir_shader *nir) { nir_assign_var_locations(&nir->shared, &nir->num_shared, @@ -510,27 +473,6 @@ brw_preprocess_nir(nir_shader *nir, bool is_scalar) return nir; } -/** Lower input and output loads and stores for i965. */ -nir_shader * -brw_nir_lower_io(nir_shader *nir, - const struct brw_device_info *devinfo, - bool is_scalar, - bool use_legacy_snorm_formula, - const uint8_t *vs_attrib_wa_flags) -{ - bool progress; /* Written by OPT and OPT_V */ - (void)progress; - - OPT_V(brw_nir_lower_inputs, devinfo, is_scalar, - use_legacy_snorm_formula, vs_attrib_wa_flags); - OPT_V(brw_nir_lower_outputs, devinfo, is_scalar); - if (nir->stage == MESA_SHADER_COMPUTE) - OPT_V(brw_nir_lower_shared); - OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : type_size_vec4); - - return nir_optimize(nir, is_scalar); -} - /* Prepare the given shader for codegen * * This function is intended to be called right before going into the actual @@ -549,6 +491,8 @@ brw_postprocess_nir(nir_shader *nir, bool progress; /* Written by OPT and OPT_V */ (void)progress; + nir = nir_optimize(nir, is_scalar); + if (devinfo->gen >= 6) { /* Try and fuse multiply-adds */ // OPT(brw_nir_opt_peephole_ffma); @@ -608,7 +552,6 @@ brw_create_nir(struct brw_context *brw, bool is_scalar) { struct gl_context *ctx = &brw->ctx; - const struct brw_device_info *devinfo = brw->intelScreen->devinfo; const nir_shader_compiler_options *options = ctx->Const.ShaderCompilerOptions[stage].NirOptions; bool progress; @@ -635,12 +578,8 @@ brw_create_nir(struct brw_context *brw, OPT_V(nir_lower_atomics, shader_prog); } - if (nir->stage != MESA_SHADER_VERTEX && - nir->stage != MESA_SHADER_TESS_CTRL && - nir->stage != MESA_SHADER_TESS_EVAL && - nir->stage != MESA_SHADER_FRAGMENT) { - nir = brw_nir_lower_io(nir, devinfo, is_scalar, false, NULL); - } + if (nir->stage == MESA_SHADER_COMPUTE) + OPT_V(brw_nir_lower_shared); return nir; } diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index 9a90e36964b..0ef34735561 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -82,11 +82,21 @@ nir_shader *brw_create_nir(struct brw_context *brw, bool is_scalar); nir_shader *brw_preprocess_nir(nir_shader *nir, bool is_scalar); -nir_shader *brw_nir_lower_io(nir_shader *nir, - const struct brw_device_info *devinfo, - bool is_scalar, - bool use_legacy_snorm_formula, - const uint8_t *vs_attrib_wa_flags); + +void brw_nir_lower_shared(nir_shader *nir); +void brw_nir_lower_vs_inputs(nir_shader *nir, + const struct brw_device_info *devinfo, + bool is_scalar, + bool use_legacy_snorm_formula, + const uint8_t *vs_attrib_wa_flags); +void brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar, + const struct brw_vue_map *vue_map); +void brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue); +void brw_nir_lower_fs_inputs(nir_shader *nir); +void brw_nir_lower_vue_outputs(nir_shader *nir, bool is_scalar); +void brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue); +void brw_nir_lower_fs_outputs(nir_shader *nir); + nir_shader *brw_postprocess_nir(nir_shader *nir, const struct brw_device_info *devinfo, bool is_scalar); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 91e47800e1f..dfe6afcf6d0 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -1227,10 +1227,17 @@ brw_compile_tes(const struct brw_compiler *compiler, const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL]; nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); - nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); nir->info.inputs_read = key->inputs_read; nir->info.patch_inputs_read = key->patch_inputs_read; - nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar, false, NULL); + + struct brw_vue_map input_vue_map; + brw_compute_tess_vue_map(&input_vue_map, + nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID, + nir->info.patch_inputs_read); + + nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); + brw_nir_lower_tes_inputs(nir, &input_vue_map); + brw_nir_lower_vue_outputs(nir, is_scalar); nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); brw_compute_vue_map(devinfo, &prog_data->base.vue_map, @@ -1249,11 +1256,6 @@ brw_compile_tes(const struct brw_compiler *compiler, /* URB entry sizes are stored as a multiple of 64 bytes. */ prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; - struct brw_vue_map input_vue_map; - brw_compute_tess_vue_map(&input_vue_map, - nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID, - nir->info.patch_inputs_read); - bool need_patch_header = nir->info.system_values_read & (BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_OUTER) | BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_INNER)); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 8136339332b..0032634f023 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -2006,9 +2006,9 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, is_scalar); - shader = brw_nir_lower_io(shader, compiler->devinfo, is_scalar, - use_legacy_snorm_formula, - key->gl_attrib_wa_flags); + brw_nir_lower_vs_inputs(shader, compiler->devinfo, is_scalar, + use_legacy_snorm_formula, key->gl_attrib_wa_flags); + brw_nir_lower_vue_outputs(shader, is_scalar); shader = brw_postprocess_nir(shader, compiler->devinfo, is_scalar); const unsigned *assembly = NULL; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 3f30f5b92d1..7df6c721430 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -594,11 +594,31 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, memset(&c, 0, sizeof(c)); c.key = *key; + const bool is_scalar = compiler->scalar_stage[MESA_SHADER_GEOMETRY]; nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); + + /* The GLSL linker will have already matched up GS inputs and the outputs + * of prior stages. The driver does extend VS outputs in some cases, but + * only for legacy OpenGL or Gen4-5 hardware, neither of which offer + * geometry shader support. So we can safely ignore that. + * + * For SSO pipelines, we use a fixed VUE map layout based on variable + * locations, so we can rely on rendezvous-by-location making this work. + * + * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not + * written by previous stages and shows up via payload magic. + */ + GLbitfield64 inputs_read = + shader->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID; + brw_compute_vue_map(compiler->devinfo, + &c.input_vue_map, inputs_read, + shader->info.separate_shader); + shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex, - compiler->scalar_stage[MESA_SHADER_GEOMETRY]); - shader = brw_postprocess_nir(shader, compiler->devinfo, - compiler->scalar_stage[MESA_SHADER_GEOMETRY]); + is_scalar); + brw_nir_lower_vue_inputs(shader, is_scalar, &c.input_vue_map); + brw_nir_lower_vue_outputs(shader, is_scalar); + shader = brw_postprocess_nir(shader, compiler->devinfo, is_scalar); prog_data->include_primitive_id = (shader->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) != 0; @@ -775,23 +795,6 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, prog_data->vertices_in = shader->info.gs.vertices_in; - /* The GLSL linker will have already matched up GS inputs and the outputs - * of prior stages. The driver does extend VS outputs in some cases, but - * only for legacy OpenGL or Gen4-5 hardware, neither of which offer - * geometry shader support. So we can safely ignore that. - * - * For SSO pipelines, we use a fixed VUE map layout based on variable - * locations, so we can rely on rendezvous-by-location making this work. - * - * However, we need to ignore VARYING_SLOT_PRIMITIVE_ID, as it's not - * written by previous stages and shows up via payload magic. - */ - GLbitfield64 inputs_read = - shader->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID; - brw_compute_vue_map(compiler->devinfo, - &c.input_vue_map, inputs_read, - shader->info.separate_shader); - /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we * need to program a URB read length of ceiling(num_slots / 2). */ @@ -807,7 +810,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, brw_print_vue_map(stderr, &prog_data->base.vue_map); } - if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) { + if (is_scalar) { /* TODO: Support instanced GS. We have basically no tests... */ assert(prog_data->invocations == 1); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 2207909e710..17d5f2aeff4 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -676,9 +676,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) case nir_intrinsic_load_instance_id: case nir_intrinsic_load_base_instance: case nir_intrinsic_load_draw_id: - case nir_intrinsic_load_invocation_id: - case nir_intrinsic_load_tess_level_inner: - case nir_intrinsic_load_tess_level_outer: { + case nir_intrinsic_load_invocation_id: { gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic); src_reg val = src_reg(nir_system_values[sv]); assert(val.file != BAD_FILE); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index df6b44dde14..f344eaad664 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -511,18 +511,25 @@ brw_compile_tcs(const struct brw_compiler *compiler, const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_CTRL]; nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); - nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); nir->info.outputs_written = key->outputs_written; nir->info.patch_outputs_written = key->patch_outputs_written; - nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar, false, NULL); - nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); - prog_data->instances = DIV_ROUND_UP(nir->info.tcs.vertices_out, 2); + struct brw_vue_map input_vue_map; + brw_compute_vue_map(devinfo, &input_vue_map, + nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID, + true); brw_compute_tess_vue_map(&vue_prog_data->vue_map, nir->info.outputs_written, nir->info.patch_outputs_written); + nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); + brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map); + brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map); + nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); + + prog_data->instances = DIV_ROUND_UP(nir->info.tcs.vertices_out, 2); + /* Compute URB entry size. The maximum allowed URB entry size is 32k. * That divides up as follows: * @@ -549,11 +556,6 @@ brw_compile_tcs(const struct brw_compiler *compiler, /* URB entry sizes are stored as a multiple of 64 bytes. */ vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64; - struct brw_vue_map input_vue_map; - brw_compute_vue_map(devinfo, &input_vue_map, - nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID, - true); - /* HS does not use the usual payload pushing from URB to GRFs, * because we don't have enough registers for a full-size payload, and * the hardware is broken on Haswell anyway. diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp index ce5fefc75a9..e3c23f1a52f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp @@ -28,6 +28,7 @@ */ #include "brw_vec4_tes.h" +#include "brw_cfg.h" namespace brw { @@ -53,39 +54,10 @@ vec4_tes_visitor::make_reg_for_system_value(int location, const glsl_type *type) void vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr) { - const struct brw_tes_prog_data *tes_prog_data = - (const struct brw_tes_prog_data *) prog_data; - switch (instr->intrinsic) { - case nir_intrinsic_load_tess_level_outer: { - dst_reg dst(this, glsl_type::vec4_type); - nir_system_values[SYSTEM_VALUE_TESS_LEVEL_OUTER] = dst; - - dst_reg temp(this, glsl_type::vec4_type); - vec4_instruction *read = - emit(VEC4_OPCODE_URB_READ, temp, input_read_header); - read->offset = 1; - read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; - emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX))); - break; - } - case nir_intrinsic_load_tess_level_inner: { - dst_reg dst(this, glsl_type::vec2_type); - nir_system_values[SYSTEM_VALUE_TESS_LEVEL_INNER] = dst; - - /* Set up the message header to reference the proper parts of the URB */ - dst_reg temp(this, glsl_type::vec4_type); - vec4_instruction *read = - emit(VEC4_OPCODE_URB_READ, temp, input_read_header); - read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; - if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) { - emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX))); - } else { - read->offset = 1; - emit(MOV(dst, src_reg(temp))); - } + case nir_intrinsic_load_tess_level_outer: + case nir_intrinsic_load_tess_level_inner: break; - } default: vec4_visitor::nir_setup_system_value_intrinsic(instr); } @@ -105,6 +77,25 @@ vec4_tes_visitor::setup_payload() reg = setup_uniforms(reg); + foreach_block_and_inst(block, vec4_instruction, inst, cfg) { + for (int i = 0; i < 3; i++) { + if (inst->src[i].file != ATTR) + continue; + + struct brw_reg grf = + brw_vec4_grf(reg + inst->src[i].nr / 2, 4 * (inst->src[i].nr % 2)); + grf = stride(grf, 0, 4, 1); + grf.swizzle = inst->src[i].swizzle; + grf.type = inst->src[i].type; + grf.abs = inst->src[i].abs; + grf.negate = inst->src[i].negate; + + inst->src[i] = grf; + } + } + + reg += 8 * prog_data->urb_read_length; + this->first_non_payload_grf = reg; } @@ -148,12 +139,30 @@ vec4_tes_visitor::emit_urb_write_opcode(bool complete) void vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) { + const struct brw_tes_prog_data *tes_prog_data = + (const struct brw_tes_prog_data *) prog_data; + switch (instr->intrinsic) { case nir_intrinsic_load_tess_coord: /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */ emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), src_reg(brw_vec8_grf(1, 0)))); break; + case nir_intrinsic_load_tess_level_outer: + emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), + swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), + BRW_SWIZZLE_WZYX))); + break; + case nir_intrinsic_load_tess_level_inner: + if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) { + emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), + swizzle(src_reg(ATTR, 0, glsl_type::vec4_type), + BRW_SWIZZLE_WZYX))); + } else { + emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), + src_reg(ATTR, 1, glsl_type::float_type))); + } + break; case nir_intrinsic_load_primitive_id: emit(TES_OPCODE_GET_PRIMITIVE_ID, get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD)); @@ -169,6 +178,19 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) header = src_reg(this, glsl_type::uvec4_type); emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header), input_read_header, indirect_offset); + } else { + /* Arbitrarily only push up to 24 vec4 slots worth of data, + * which is 12 registers (since each holds 2 vec4 slots). + */ + const unsigned max_push_slots = 24; + if (imm_offset < max_push_slots) { + emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), + src_reg(ATTR, imm_offset, glsl_type::ivec4_type))); + prog_data->urb_read_length = + MAX2(prog_data->urb_read_length, + DIV_ROUND_UP(imm_offset + 1, 2)); + break; + } } dst_reg temp(this, glsl_type::ivec4_type); diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c index 2383401d14c..31354582964 100644 --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c @@ -757,7 +757,9 @@ bool intel_get_memcpy(mesa_format tiledFormat, GLenum format, *cpp = 1; *mem_copy = memcpy; } else if ((tiledFormat == MESA_FORMAT_B8G8R8A8_UNORM) || - (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM)) { + (tiledFormat == MESA_FORMAT_B8G8R8X8_UNORM) || + (tiledFormat == MESA_FORMAT_B8G8R8A8_SRGB) || + (tiledFormat == MESA_FORMAT_B8G8R8X8_SRGB)) { *cpp = 4; if (format == GL_BGRA) { *mem_copy = memcpy; @@ -766,7 +768,9 @@ bool intel_get_memcpy(mesa_format tiledFormat, GLenum format, : rgba8_copy_aligned_src; } } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) || - (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM)) { + (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM) || + (tiledFormat == MESA_FORMAT_R8G8B8A8_SRGB) || + (tiledFormat == MESA_FORMAT_R8G8B8X8_SRGB)) { *cpp = 4; if (format == GL_BGRA) { /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp index 32e8b8f8867..7b3b9e534b3 100644 --- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp @@ -215,6 +215,40 @@ TEST_F(saturate_propagation_test, neg_mov_sat) fs_reg dst0 = v->vgrf(glsl_type::float_type); fs_reg dst1 = v->vgrf(glsl_type::float_type); fs_reg src0 = v->vgrf(glsl_type::float_type); + bld.RNDU(dst0, src0); + dst0.negate = true; + set_saturate(true, bld.MOV(dst1, dst0)); + + /* = Before = + * + * 0: rndu(8) dst0 src0 + * 1: mov.sat(8) dst1 -dst0 + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_FALSE(saturate_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_RNDU, instruction(block0, 0)->opcode); + EXPECT_FALSE(instruction(block0, 0)->saturate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode); + EXPECT_TRUE(instruction(block0, 1)->saturate); +} + +TEST_F(saturate_propagation_test, add_neg_mov_sat) +{ + const fs_builder &bld = v->bld; + fs_reg dst0 = v->vgrf(glsl_type::float_type); + fs_reg dst1 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); fs_reg src1 = v->vgrf(glsl_type::float_type); bld.ADD(dst0, src0, src1); dst0.negate = true; @@ -226,7 +260,8 @@ TEST_F(saturate_propagation_test, neg_mov_sat) * 1: mov.sat(8) dst1 -dst0 * * = After = - * (no changes) + * 0: add.sat(8) dst0 -src0 -src1 + * 1: mov(8) dst1 dst0 */ v->calculate_cfg(); @@ -235,13 +270,138 @@ TEST_F(saturate_propagation_test, neg_mov_sat) EXPECT_EQ(0, block0->start_ip); EXPECT_EQ(1, block0->end_ip); - EXPECT_FALSE(saturate_propagation(v)); + EXPECT_TRUE(saturate_propagation(v)); EXPECT_EQ(0, block0->start_ip); EXPECT_EQ(1, block0->end_ip); EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode); + EXPECT_TRUE(instruction(block0, 0)->saturate); + EXPECT_TRUE(instruction(block0, 0)->src[0].negate); + EXPECT_TRUE(instruction(block0, 0)->src[1].negate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode); + EXPECT_FALSE(instruction(block0, 1)->saturate); +} + +TEST_F(saturate_propagation_test, mul_neg_mov_sat) +{ + const fs_builder &bld = v->bld; + fs_reg dst0 = v->vgrf(glsl_type::float_type); + fs_reg dst1 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + bld.MUL(dst0, src0, src1); + dst0.negate = true; + set_saturate(true, bld.MOV(dst1, dst0)); + + /* = Before = + * + * 0: mul(8) dst0 src0 src1 + * 1: mov.sat(8) dst1 -dst0 + * + * = After = + * 0: mul.sat(8) dst0 src0 -src1 + * 1: mov(8) dst1 dst0 + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(saturate_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode); + EXPECT_TRUE(instruction(block0, 0)->saturate); + EXPECT_TRUE(instruction(block0, 0)->src[0].negate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode); + EXPECT_FALSE(instruction(block0, 1)->saturate); + EXPECT_FALSE(instruction(block0, 1)->src[0].negate); +} + +TEST_F(saturate_propagation_test, mul_mov_sat_neg_mov_sat) +{ + const fs_builder &bld = v->bld; + fs_reg dst0 = v->vgrf(glsl_type::float_type); + fs_reg dst1 = v->vgrf(glsl_type::float_type); + fs_reg dst2 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + bld.MUL(dst0, src0, src1); + set_saturate(true, bld.MOV(dst1, dst0)); + dst0.negate = true; + set_saturate(true, bld.MOV(dst2, dst0)); + + /* = Before = + * + * 0: mul(8) dst0 src0 src1 + * 1: mov.sat(8) dst1 dst0 + * 2: mov.sat(8) dst2 -dst0 + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_FALSE(saturate_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode); EXPECT_FALSE(instruction(block0, 0)->saturate); + EXPECT_FALSE(instruction(block0, 0)->src[1].negate); EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode); EXPECT_TRUE(instruction(block0, 1)->saturate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 2)->opcode); + EXPECT_TRUE(instruction(block0, 2)->src[0].negate); + EXPECT_TRUE(instruction(block0, 2)->saturate); +} + +TEST_F(saturate_propagation_test, mul_neg_mov_sat_neg_mov_sat) +{ + const fs_builder &bld = v->bld; + fs_reg dst0 = v->vgrf(glsl_type::float_type); + fs_reg dst1 = v->vgrf(glsl_type::float_type); + fs_reg dst2 = v->vgrf(glsl_type::float_type); + fs_reg src0 = v->vgrf(glsl_type::float_type); + fs_reg src1 = v->vgrf(glsl_type::float_type); + bld.MUL(dst0, src0, src1); + dst0.negate = true; + set_saturate(true, bld.MOV(dst1, dst0)); + set_saturate(true, bld.MOV(dst2, dst0)); + + /* = Before = + * + * 0: mul(8) dst0 src0 src1 + * 1: mov.sat(8) dst1 -dst0 + * 2: mov.sat(8) dst2 -dst0 + * + * = After = + * (no changes) + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + + EXPECT_FALSE(saturate_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(2, block0->end_ip); + EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode); + EXPECT_FALSE(instruction(block0, 0)->saturate); + EXPECT_FALSE(instruction(block0, 0)->src[1].negate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode); + EXPECT_TRUE(instruction(block0, 1)->src[0].negate); + EXPECT_TRUE(instruction(block0, 1)->saturate); + EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 2)->opcode); + EXPECT_TRUE(instruction(block0, 2)->src[0].negate); + EXPECT_TRUE(instruction(block0, 2)->saturate); } TEST_F(saturate_propagation_test, abs_mov_sat) diff --git a/src/mesa/main/dlist.c b/src/mesa/main/dlist.c index afd2d83cb59..6dfb84be9aa 100644 --- a/src/mesa/main/dlist.c +++ b/src/mesa/main/dlist.c @@ -304,8 +304,8 @@ typedef enum OPCODE_SAMPLE_COVERAGE, /* GL_ARB_window_pos */ OPCODE_WINDOW_POS_ARB, - /* GL_NV_fragment_program */ - OPCODE_BIND_PROGRAM_NV, + /* GL_ARB_vertex_program */ + OPCODE_BIND_PROGRAM_ARB, OPCODE_PROGRAM_LOCAL_PARAMETER_ARB, /* GL_EXT_stencil_two_side */ OPCODE_ACTIVE_STENCIL_FACE_EXT, @@ -4957,15 +4957,15 @@ save_SampleCoverageARB(GLclampf value, GLboolean invert) /* - * GL_NV_fragment_program + * GL_ARB_vertex_program */ static void GLAPIENTRY -save_BindProgramNV(GLenum target, GLuint id) +save_BindProgramARB(GLenum target, GLuint id) { GET_CURRENT_CONTEXT(ctx); Node *n; ASSERT_OUTSIDE_SAVE_BEGIN_END_AND_FLUSH(ctx); - n = alloc_instruction(ctx, OPCODE_BIND_PROGRAM_NV, 2); + n = alloc_instruction(ctx, OPCODE_BIND_PROGRAM_ARB, 2); if (n) { n[1].e = target; n[2].ui = id; @@ -8607,7 +8607,7 @@ execute_list(struct gl_context *ctx, GLuint list) case OPCODE_WINDOW_POS_ARB: /* GL_ARB_window_pos */ CALL_WindowPos3f(ctx->Exec, (n[1].f, n[2].f, n[3].f)); break; - case OPCODE_BIND_PROGRAM_NV: /* GL_ARB_vertex_program */ + case OPCODE_BIND_PROGRAM_ARB: /* GL_ARB_vertex_program */ CALL_BindProgramARB(ctx->Exec, (n[1].e, n[2].ui)); break; case OPCODE_PROGRAM_LOCAL_PARAMETER_ARB: @@ -9787,13 +9787,6 @@ _mesa_initialize_save_table(const struct gl_context *ctx) SET_WindowPos4sMESA(table, save_WindowPos4sMESA); SET_WindowPos4svMESA(table, save_WindowPos4svMESA); - /* 233. GL_NV_vertex_program */ - /* The following commands DO NOT go into display lists: - * AreProgramsResidentNV, IsProgramNV, GenProgramsNV, DeleteProgramsNV, - * VertexAttribPointerNV, GetProgram*, GetVertexAttrib* - */ - SET_BindProgramARB(table, save_BindProgramNV); - /* 245. GL_ATI_fragment_shader */ SET_BindFragmentShaderATI(table, save_BindFragmentShaderATI); SET_SetFragmentShaderConstantATI(table, save_SetFragmentShaderConstantATI); @@ -9838,7 +9831,7 @@ _mesa_initialize_save_table(const struct gl_context *ctx) /* ARB 27. GL_ARB_fragment_program */ /* glVertexAttrib* functions alias the NV ones, handled elsewhere */ SET_ProgramStringARB(table, save_ProgramStringARB); - SET_BindProgramARB(table, save_BindProgramNV); + SET_BindProgramARB(table, save_BindProgramARB); SET_ProgramEnvParameter4dARB(table, save_ProgramEnvParameter4dARB); SET_ProgramEnvParameter4dvARB(table, save_ProgramEnvParameter4dvARB); SET_ProgramEnvParameter4fARB(table, save_ProgramEnvParameter4fARB); diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index 68f36178f32..50e050e9009 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -200,6 +200,7 @@ EXT(EXT_framebuffer_object , dummy_true EXT(EXT_framebuffer_sRGB , EXT_framebuffer_sRGB , GLL, GLC, x , x , 1998) EXT(EXT_gpu_program_parameters , EXT_gpu_program_parameters , GLL, x , x , x , 2006) EXT(EXT_gpu_shader4 , EXT_gpu_shader4 , GLL, GLC, x , x , 2006) +EXT(EXT_gpu_shader5 , ARB_gpu_shader5 , x , x , x , 31, 2014) EXT(EXT_map_buffer_range , ARB_map_buffer_range , x , x , ES1, ES2, 2012) EXT(EXT_multi_draw_arrays , dummy_true , GLL, x , ES1, ES2, 1999) EXT(EXT_packed_depth_stencil , dummy_true , GLL, GLC, x , x , 2005) @@ -243,7 +244,7 @@ EXT(EXT_texture_object , dummy_true EXT(EXT_texture_rectangle , NV_texture_rectangle , GLL, x , x , x , 2004) EXT(EXT_texture_rg , ARB_texture_rg , x , x , x , ES2, 2011) EXT(EXT_texture_sRGB , EXT_texture_sRGB , GLL, GLC, x , x , 2004) -EXT(EXT_texture_sRGB_decode , EXT_texture_sRGB_decode , GLL, GLC, x , x , 2006) +EXT(EXT_texture_sRGB_decode , EXT_texture_sRGB_decode , GLL, GLC, x , 30, 2006) EXT(EXT_texture_shared_exponent , EXT_texture_shared_exponent , GLL, GLC, x , x , 2004) EXT(EXT_texture_snorm , EXT_texture_snorm , GLL, GLC, x , x , 2009) EXT(EXT_texture_swizzle , EXT_texture_swizzle , GLL, GLC, x , x , 2008) @@ -282,7 +283,6 @@ EXT(NV_depth_clamp , ARB_depth_clamp EXT(NV_draw_buffers , dummy_true , x , x , x , ES2, 2011) EXT(NV_fbo_color_attachments , dummy_true , x , x , x , ES2, 2010) EXT(NV_fog_distance , NV_fog_distance , GLL, x , x , x , 2001) -EXT(NV_fragment_program_option , NV_fragment_program_option , GLL, x , x , x , 2005) EXT(NV_light_max_exponent , dummy_true , GLL, x , x , x , 1999) EXT(NV_packed_depth_stencil , dummy_true , GLL, GLC, x , x , 2000) EXT(NV_point_sprite , NV_point_sprite , GLL, GLC, x , x , 2001) @@ -319,6 +319,7 @@ EXT(OES_framebuffer_object , dummy_true EXT(OES_geometry_point_size , OES_geometry_shader , x , x , x , 31, 2015) EXT(OES_geometry_shader , OES_geometry_shader , x , x , x , 31, 2015) EXT(OES_get_program_binary , dummy_true , x , x , x , ES2, 2008) +EXT(OES_gpu_shader5 , ARB_gpu_shader5 , x , x , x , 31, 2014) EXT(OES_mapbuffer , dummy_true , x , x , ES1, ES2, 2005) EXT(OES_packed_depth_stencil , dummy_true , x , x , ES1, ES2, 2007) EXT(OES_point_size_array , dummy_true , x , x , ES1, x , 2004) diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 1f10050c891..c9e1518ab23 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -2471,7 +2471,6 @@ static void bind_framebuffer(GLenum target, GLuint framebuffer, bool allow_user_names) { struct gl_framebuffer *newDrawFb, *newReadFb; - struct gl_framebuffer *oldDrawFb, *oldReadFb; GLboolean bindReadBuf, bindDrawBuf; GET_CURRENT_CONTEXT(ctx); @@ -2525,18 +2524,23 @@ bind_framebuffer(GLenum target, GLuint framebuffer, bool allow_user_names) newReadFb = ctx->WinSysReadBuffer; } - assert(newDrawFb); - assert(newDrawFb != &DummyFramebuffer); + _mesa_bind_framebuffers(ctx, + bindDrawBuf ? newDrawFb : ctx->DrawBuffer, + bindReadBuf ? newReadFb : ctx->ReadBuffer); +} - /* save pointers to current/old framebuffers */ - oldDrawFb = ctx->DrawBuffer; - oldReadFb = ctx->ReadBuffer; +void +_mesa_bind_framebuffers(struct gl_context *ctx, + struct gl_framebuffer *newDrawFb, + struct gl_framebuffer *newReadFb) +{ + struct gl_framebuffer *const oldDrawFb = ctx->DrawBuffer; + struct gl_framebuffer *const oldReadFb = ctx->ReadBuffer; + const bool bindDrawBuf = oldDrawFb != newDrawFb; + const bool bindReadBuf = oldReadFb != newReadFb; - /* check if really changing bindings */ - if (oldDrawFb == newDrawFb) - bindDrawBuf = GL_FALSE; - if (oldReadFb == newReadFb) - bindReadBuf = GL_FALSE; + assert(newDrawFb); + assert(newDrawFb != &DummyFramebuffer); /* * OK, now bind the new Draw/Read framebuffers, if they're changing. @@ -2573,7 +2577,12 @@ bind_framebuffer(GLenum target, GLuint framebuffer, bool allow_user_names) } if ((bindDrawBuf || bindReadBuf) && ctx->Driver.BindFramebuffer) { - ctx->Driver.BindFramebuffer(ctx, target, newDrawFb, newReadFb); + /* The few classic drivers that actually hook this function really only + * want to know if the draw framebuffer changed. + */ + ctx->Driver.BindFramebuffer(ctx, + bindDrawBuf ? GL_FRAMEBUFFER : GL_READ_FRAMEBUFFER, + newDrawFb, newReadFb); } } @@ -2815,6 +2824,7 @@ reuse_framebuffer_texture_attachment(struct gl_framebuffer *fb, dst_att->Complete = src_att->Complete; dst_att->TextureLevel = src_att->TextureLevel; dst_att->Zoffset = src_att->Zoffset; + dst_att->Layered = src_att->Layered; } diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index f9a60605ea5..540bd9d40c3 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -137,6 +137,11 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx, GLint *params, const char *caller); +extern void +_mesa_bind_framebuffers(struct gl_context *ctx, + struct gl_framebuffer *newDrawFb, + struct gl_framebuffer *newReadFb); + extern GLboolean GLAPIENTRY _mesa_IsRenderbuffer(GLuint renderbuffer); diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c index 34cc9218add..d72bc71aa72 100644 --- a/src/mesa/main/ffvertex_prog.c +++ b/src/mesa/main/ffvertex_prog.c @@ -537,7 +537,6 @@ static void emit_arg( struct prog_src_register *src, src->File = reg.file; src->Index = reg.idx; src->Swizzle = reg.swz; - src->Abs = reg.abs; src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE; src->RelAddr = 0; /* Check that bitfield sizes aren't exceeded */ @@ -552,8 +551,6 @@ static void emit_dst( struct prog_dst_register *dst, dst->Index = reg.idx; /* allow zero as a shorthand for xyzw */ dst->WriteMask = mask ? mask : WRITEMASK_XYZW; - dst->CondMask = COND_TR; /* always pass cond test */ - dst->CondSwizzle = SWIZZLE_NOOP; /* Check that bitfield sizes aren't exceeded */ assert(dst->Index == reg.idx); } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 2ca9cbf808a..a2e3c26c321 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2951,7 +2951,6 @@ struct gl_pipeline_shader_state struct gl_shader_compiler_options { /** Driver-selectable options: */ - GLboolean EmitCondCodes; /**< Use condition codes? */ GLboolean EmitNoLoops; GLboolean EmitNoFunctions; GLboolean EmitNoCont; /**< Emit CONT opcode? */ @@ -3927,7 +3926,6 @@ struct gl_extensions GLboolean MESA_ycbcr_texture; GLboolean NV_conditional_render; GLboolean NV_fog_distance; - GLboolean NV_fragment_program_option; GLboolean NV_point_sprite; GLboolean NV_primitive_restart; GLboolean NV_texture_barrier; diff --git a/src/mesa/main/shaderobj.c b/src/mesa/main/shaderobj.c index 203ccef7fc4..9a4eb6b56fd 100644 --- a/src/mesa/main/shaderobj.c +++ b/src/mesa/main/shaderobj.c @@ -240,6 +240,8 @@ init_shader_program(struct gl_shader_program *prog) prog->TransformFeedback.BufferMode = GL_INTERLEAVED_ATTRIBS; + exec_list_make_empty(&prog->EmptyUniformLocations); + prog->InfoLog = ralloc_strdup(prog, ""); } diff --git a/src/mesa/main/texparam.c b/src/mesa/main/texparam.c index 20770a77e15..3b769f436b7 100644 --- a/src/mesa/main/texparam.c +++ b/src/mesa/main/texparam.c @@ -568,8 +568,7 @@ set_tex_parameteri(struct gl_context *ctx, goto invalid_pname; case GL_TEXTURE_SRGB_DECODE_EXT: - if (_mesa_is_desktop_gl(ctx) - && ctx->Extensions.EXT_texture_sRGB_decode) { + if (ctx->Extensions.EXT_texture_sRGB_decode) { GLenum decode = params[0]; if (!target_allows_setting_sampler_parameters(texObj->Target)) diff --git a/src/mesa/program/Android.mk b/src/mesa/program/Android.mk index 0bc97a6b3d4..6f485439d64 100644 --- a/src/mesa/program/Android.mk +++ b/src/mesa/program/Android.mk @@ -44,7 +44,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := libmesa_program LOCAL_MODULE_CLASS := STATIC_LIBRARIES -LOCAL_STATIC_LIBRARIES := libmesa_glsl +LOCAL_STATIC_LIBRARIES := libmesa_nir intermediates := $(call local-generated-sources-dir) @@ -74,12 +74,13 @@ $(intermediates)/program/lex.yy.c: $(LOCAL_PATH)/program_lexer.l LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/mesa \ - $(MESA_TOP)/src/glsl \ $(MESA_TOP)/src/compiler/nir \ $(MESA_TOP)/src/gallium/auxiliary \ $(MESA_TOP)/src/gallium/include LOCAL_EXPORT_C_INCLUDE_DIRS := $(intermediates) +LOCAL_GENERATED_SOURCES += $(MESA_GEN_NIR_H) + include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 495048d4bfc..10d931c8b6b 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -105,7 +105,6 @@ public: this->file = file; this->index = 0; this->writemask = writemask; - this->cond_mask = COND_TR; this->reladdr = NULL; } @@ -114,7 +113,6 @@ public: this->file = PROGRAM_UNDEFINED; this->index = 0; this->writemask = 0; - this->cond_mask = COND_TR; this->reladdr = NULL; } @@ -123,7 +121,6 @@ public: gl_register_file file; /**< PROGRAM_* from Mesa */ int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ - GLuint cond_mask:4; /** Register index should be offset by the integer in this reg. */ src_reg *reladdr; }; @@ -144,7 +141,6 @@ dst_reg::dst_reg(src_reg reg) this->file = reg.file; this->index = reg.index; this->writemask = WRITEMASK_XYZW; - this->cond_mask = COND_TR; this->reladdr = reg.reladdr; } @@ -159,7 +155,6 @@ public: src_reg src[3]; /** Pointer to the ir source this tree came from for debugging */ ir_instruction *ir; - GLboolean cond_update; bool saturate; int sampler; /**< sampler index */ int tex_target; /**< One of TEXTURE_*_INDEX */ @@ -2112,44 +2107,23 @@ ir_to_mesa_visitor::visit(ir_return *ir) void ir_to_mesa_visitor::visit(ir_discard *ir) { - if (ir->condition) { - ir->condition->accept(this); - this->result.negate = ~this->result.negate; - emit(ir, OPCODE_KIL, undef_dst, this->result); - } else { - emit(ir, OPCODE_KIL_NV); - } + if (!ir->condition) + ir->condition = new(mem_ctx) ir_constant(true); + + ir->condition->accept(this); + this->result.negate = ~this->result.negate; + emit(ir, OPCODE_KIL, undef_dst, this->result); } void ir_to_mesa_visitor::visit(ir_if *ir) { - ir_to_mesa_instruction *cond_inst, *if_inst; - ir_to_mesa_instruction *prev_inst; - - prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail(); + ir_to_mesa_instruction *if_inst; ir->condition->accept(this); assert(this->result.file != PROGRAM_UNDEFINED); - if (this->options->EmitCondCodes) { - cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail(); - - /* See if we actually generated any instruction for generating - * the condition. If not, then cook up a move to a temp so we - * have something to set cond_update on. - */ - if (cond_inst == prev_inst) { - src_reg temp = get_temp(glsl_type::bool_type); - cond_inst = emit(ir->condition, OPCODE_MOV, dst_reg(temp), result); - } - cond_inst->cond_update = GL_TRUE; - - if_inst = emit(ir->condition, OPCODE_IF); - if_inst->dst.cond_mask = COND_NE; - } else { - if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result); - } + if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result); this->instructions.push_tail(if_inst); @@ -2206,10 +2180,6 @@ mesa_src_reg_from_ir_src_reg(src_reg reg) mesa_reg.Swizzle = reg.swizzle; mesa_reg.RelAddr = reg.reladdr != NULL; mesa_reg.Negate = reg.negate; - mesa_reg.Abs = 0; - mesa_reg.HasIndex2 = GL_FALSE; - mesa_reg.RelAddr2 = 0; - mesa_reg.Index2 = 0; return mesa_reg; } @@ -2793,12 +2763,10 @@ get_mesa_program(struct gl_context *ctx, i = 0; foreach_in_list(const ir_to_mesa_instruction, inst, &v.instructions) { mesa_inst->Opcode = inst->op; - mesa_inst->CondUpdate = inst->cond_update; if (inst->saturate) mesa_inst->Saturate = GL_TRUE; mesa_inst->DstReg.File = inst->dst.file; mesa_inst->DstReg.Index = inst->dst.index; - mesa_inst->DstReg.CondMask = inst->dst.cond_mask; mesa_inst->DstReg.WriteMask = inst->dst.writemask; mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL; mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]); @@ -2998,8 +2966,6 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) if (linked_prog) { _mesa_copy_linked_program_data((gl_shader_stage) i, prog, linked_prog); - _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, - linked_prog); if (!ctx->Driver.ProgramStringNotify(ctx, _mesa_shader_stage_to_program(i), linked_prog)) { diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c index 2c52d0db508..8f167be60cd 100644 --- a/src/mesa/program/prog_execute.c +++ b/src/mesa/program/prog_execute.c @@ -200,12 +200,6 @@ fetch_vector4(const struct prog_src_register *source, result[3] = src[GET_SWZ(source->Swizzle, 3)]; } - if (source->Abs) { - result[0] = fabsf(result[0]); - result[1] = fabsf(result[1]); - result[2] = fabsf(result[2]); - result[3] = fabsf(result[3]); - } if (source->Negate) { assert(source->Negate == NEGATE_XYZW); result[0] = -result[0]; @@ -258,12 +252,6 @@ fetch_vector4_deriv(struct gl_context * ctx, result[2] = deriv[GET_SWZ(source->Swizzle, 2)]; result[3] = deriv[GET_SWZ(source->Swizzle, 3)]; - if (source->Abs) { - result[0] = fabsf(result[0]); - result[1] = fabsf(result[1]); - result[2] = fabsf(result[2]); - result[3] = fabsf(result[3]); - } if (source->Negate) { assert(source->Negate == NEGATE_XYZW); result[0] = -result[0]; @@ -289,9 +277,6 @@ fetch_vector1(const struct prog_src_register *source, result[0] = src[GET_SWZ(source->Swizzle, 0)]; - if (source->Abs) { - result[0] = fabsf(result[0]); - } if (source->Negate) { result[0] = -result[0]; } @@ -329,66 +314,6 @@ fetch_texel(struct gl_context *ctx, /** - * Test value against zero and return GT, LT, EQ or UN if NaN. - */ -static inline GLuint -generate_cc(float value) -{ - if (value != value) - return COND_UN; /* NaN */ - if (value > 0.0F) - return COND_GT; - if (value < 0.0F) - return COND_LT; - return COND_EQ; -} - - -/** - * Test if the ccMaskRule is satisfied by the given condition code. - * Used to mask destination writes according to the current condition code. - */ -static inline GLboolean -test_cc(GLuint condCode, GLuint ccMaskRule) -{ - switch (ccMaskRule) { - case COND_EQ: return (condCode == COND_EQ); - case COND_NE: return (condCode != COND_EQ); - case COND_LT: return (condCode == COND_LT); - case COND_GE: return (condCode == COND_GT || condCode == COND_EQ); - case COND_LE: return (condCode == COND_LT || condCode == COND_EQ); - case COND_GT: return (condCode == COND_GT); - case COND_TR: return GL_TRUE; - case COND_FL: return GL_FALSE; - default: return GL_TRUE; - } -} - - -/** - * Evaluate the 4 condition codes against a predicate and return GL_TRUE - * or GL_FALSE to indicate result. - */ -static inline GLboolean -eval_condition(const struct gl_program_machine *machine, - const struct prog_instruction *inst) -{ - const GLuint swizzle = inst->DstReg.CondSwizzle; - const GLuint condMask = inst->DstReg.CondMask; - if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) || - test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) || - test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) || - test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) { - return GL_TRUE; - } - else { - return GL_FALSE; - } -} - - - -/** * Store 4 floats into a register. Observe the instructions saturate and * set-condition-code flags. */ @@ -418,30 +343,6 @@ store_vector4(const struct prog_instruction *inst, value = clampedValue; } - if (dstReg->CondMask != COND_TR) { - /* condition codes may turn off some writes */ - if (writeMask & WRITEMASK_X) { - if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)], - dstReg->CondMask)) - writeMask &= ~WRITEMASK_X; - } - if (writeMask & WRITEMASK_Y) { - if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)], - dstReg->CondMask)) - writeMask &= ~WRITEMASK_Y; - } - if (writeMask & WRITEMASK_Z) { - if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)], - dstReg->CondMask)) - writeMask &= ~WRITEMASK_Z; - } - if (writeMask & WRITEMASK_W) { - if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)], - dstReg->CondMask)) - writeMask &= ~WRITEMASK_W; - } - } - #ifdef NAN_CHECK assert(!IS_INF_OR_NAN(value[0])); assert(!IS_INF_OR_NAN(value[0])); @@ -457,24 +358,6 @@ store_vector4(const struct prog_instruction *inst, dst[2] = value[2]; if (writeMask & WRITEMASK_W) dst[3] = value[3]; - - if (inst->CondUpdate) { - if (writeMask & WRITEMASK_X) - machine->CondCodes[0] = generate_cc(value[0]); - if (writeMask & WRITEMASK_Y) - machine->CondCodes[1] = generate_cc(value[1]); - if (writeMask & WRITEMASK_Z) - machine->CondCodes[2] = generate_cc(value[2]); - if (writeMask & WRITEMASK_W) - machine->CondCodes[3] = generate_cc(value[3]); -#if DEBUG_PROG - printf("CondCodes=(%s,%s,%s,%s) for:\n", - _mesa_condcode_string(machine->CondCodes[0]), - _mesa_condcode_string(machine->CondCodes[1]), - _mesa_condcode_string(machine->CondCodes[2]), - _mesa_condcode_string(machine->CondCodes[3])); -#endif - } } @@ -572,31 +455,25 @@ _mesa_execute_program(struct gl_context * ctx, case OPCODE_BRK: /* break out of loop (conditional) */ assert(program->Instructions[inst->BranchTarget].Opcode == OPCODE_ENDLOOP); - if (eval_condition(machine, inst)) { - /* break out of loop */ - /* pc++ at end of for-loop will put us after the ENDLOOP inst */ - pc = inst->BranchTarget; - } + /* break out of loop */ + /* pc++ at end of for-loop will put us after the ENDLOOP inst */ + pc = inst->BranchTarget; break; case OPCODE_CONT: /* continue loop (conditional) */ assert(program->Instructions[inst->BranchTarget].Opcode == OPCODE_ENDLOOP); - if (eval_condition(machine, inst)) { - /* continue at ENDLOOP */ - /* Subtract 1 here since we'll do pc++ at end of for-loop */ - pc = inst->BranchTarget - 1; - } + /* continue at ENDLOOP */ + /* Subtract 1 here since we'll do pc++ at end of for-loop */ + pc = inst->BranchTarget - 1; break; case OPCODE_CAL: /* Call subroutine (conditional) */ - if (eval_condition(machine, inst)) { - /* call the subroutine */ - if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) { - return GL_TRUE; /* Per GL_NV_vertex_program2 spec */ - } - machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */ - /* Subtract 1 here since we'll do pc++ at end of for-loop */ - pc = inst->BranchTarget - 1; + /* call the subroutine */ + if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) { + return GL_TRUE; /* Per GL_NV_vertex_program2 spec */ } + machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */ + /* Subtract 1 here since we'll do pc++ at end of for-loop */ + pc = inst->BranchTarget - 1; break; case OPCODE_CMP: { @@ -778,9 +655,6 @@ _mesa_execute_program(struct gl_context * ctx, fetch_vector1(&inst->SrcReg[0], machine, a); cond = (a[0] != 0.0F); } - else { - cond = eval_condition(machine, inst); - } if (DEBUG_PROG) { printf("IF: %d\n", cond); } @@ -805,11 +679,6 @@ _mesa_execute_program(struct gl_context * ctx, case OPCODE_ENDIF: /* nothing */ break; - case OPCODE_KIL_NV: /* NV_f_p only (conditional) */ - if (eval_condition(machine, inst)) { - return GL_FALSE; - } - break; case OPCODE_KIL: /* ARB_f_p only */ { GLfloat a[4]; @@ -1071,13 +940,11 @@ _mesa_execute_program(struct gl_context * ctx, } break; case OPCODE_RET: /* return from subroutine (conditional) */ - if (eval_condition(machine, inst)) { - if (machine->StackDepth == 0) { - return GL_TRUE; /* Per GL_NV_vertex_program2 spec */ - } - /* subtract one because of pc++ in the for loop */ - pc = machine->CallStack[--machine->StackDepth] - 1; + if (machine->StackDepth == 0) { + return GL_TRUE; /* Per GL_NV_vertex_program2 spec */ } + /* subtract one because of pc++ in the for loop */ + pc = machine->CallStack[--machine->StackDepth] - 1; break; case OPCODE_RSQ: /* 1 / sqrt() */ { @@ -1321,7 +1188,7 @@ _mesa_execute_program(struct gl_context * ctx, store_vector4(inst, machine, color); } break; - case OPCODE_TXD: /* GL_NV_fragment_program only */ + case OPCODE_TXD: /* Texture lookup w/ partial derivatives for LOD */ { GLfloat texcoord[4], dtdx[4], dtdy[4], color[4]; @@ -1371,26 +1238,6 @@ _mesa_execute_program(struct gl_context * ctx, store_vector4(inst, machine, color); } break; - case OPCODE_TXP_NV: /* GL_NV_fragment_program only */ - /* Texture lookup w/ projective divide, as above, but do not - * do the divide by w if sampling from a cube map. - */ - { - GLfloat texcoord[4], color[4]; - - fetch_vector4(&inst->SrcReg[0], machine, texcoord); - if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX && - texcoord[3] != 0.0F) { - texcoord[0] /= texcoord[3]; - texcoord[1] /= texcoord[3]; - texcoord[2] /= texcoord[3]; - } - - fetch_texel(ctx, machine, inst, texcoord, 0.0, color); - - store_vector4(inst, machine, color); - } - break; case OPCODE_TRUNC: /* truncate toward zero */ { GLfloat a[4], result[4]; diff --git a/src/mesa/program/prog_execute.h b/src/mesa/program/prog_execute.h index ffaa0c94fa2..3603e3bdddf 100644 --- a/src/mesa/program/prog_execute.h +++ b/src/mesa/program/prog_execute.h @@ -63,7 +63,6 @@ struct gl_program_machine GLfloat Temporaries[MAX_PROGRAM_TEMPS][4]; GLfloat Outputs[MAX_PROGRAM_OUTPUTS][4]; GLfloat (*EnvParams)[4]; /**< Vertex or Fragment env parameters */ - GLuint CondCodes[4]; /**< COND_* value for x/y/z/w */ GLint AddressReg[MAX_PROGRAM_ADDRESS_REGS][4]; GLfloat SystemValues[SYSTEM_VALUE_MAX][4]; diff --git a/src/mesa/program/prog_instruction.c b/src/mesa/program/prog_instruction.c index 21ef35337f6..0b80383b147 100644 --- a/src/mesa/program/prog_instruction.c +++ b/src/mesa/program/prog_instruction.c @@ -52,11 +52,8 @@ _mesa_init_instructions(struct prog_instruction *inst, GLuint count) inst[i].DstReg.File = PROGRAM_UNDEFINED; inst[i].DstReg.WriteMask = WRITEMASK_XYZW; - inst[i].DstReg.CondMask = COND_TR; - inst[i].DstReg.CondSwizzle = SWIZZLE_NOOP; inst[i].Saturate = GL_FALSE; - inst[i].Precision = FLOAT32; } } @@ -154,7 +151,6 @@ static const struct instruction_info InstInfo[MAX_OPCODE] = { { OPCODE_FRC, "FRC", 1, 1 }, { OPCODE_IF, "IF", 1, 0 }, { OPCODE_KIL, "KIL", 1, 0 }, - { OPCODE_KIL_NV, "KIL_NV", 0, 0 }, { OPCODE_LG2, "LG2", 1, 1 }, { OPCODE_LIT, "LIT", 1, 1 }, { OPCODE_LOG, "LOG", 1, 1 }, @@ -188,7 +184,6 @@ static const struct instruction_info InstInfo[MAX_OPCODE] = { { OPCODE_TXD, "TXD", 3, 1 }, { OPCODE_TXL, "TXL", 1, 1 }, { OPCODE_TXP, "TXP", 1, 1 }, - { OPCODE_TXP_NV, "TXP_NV", 1, 1 }, { OPCODE_TRUNC, "TRUNC", 1, 1 }, { OPCODE_XPD, "XPD", 2, 1 } }; diff --git a/src/mesa/program/prog_instruction.h b/src/mesa/program/prog_instruction.h index d56f96cfaa1..0120227711a 100644 --- a/src/mesa/program/prog_instruction.h +++ b/src/mesa/program/prog_instruction.h @@ -92,32 +92,6 @@ /** - * Condition codes - */ -/*@{*/ -#define COND_GT 1 /**< greater than zero */ -#define COND_EQ 2 /**< equal to zero */ -#define COND_LT 3 /**< less than zero */ -#define COND_UN 4 /**< unordered (NaN) */ -#define COND_GE 5 /**< greater than or equal to zero */ -#define COND_LE 6 /**< less than or equal to zero */ -#define COND_NE 7 /**< not equal to zero */ -#define COND_TR 8 /**< always true */ -#define COND_FL 9 /**< always false */ -/*@}*/ - - -/** - * Instruction precision for GL_NV_fragment_program - */ -/*@{*/ -#define FLOAT32 0x1 -#define FLOAT16 0x2 -#define FIXED12 0x4 -/*@}*/ - - -/** * Per-component negation masks */ /*@{*/ @@ -166,7 +140,6 @@ enum prog_opcode { OPCODE_FRC, /* X X 2 X X */ OPCODE_IF, /* opt */ OPCODE_KIL, /* X X */ - OPCODE_KIL_NV, /* X X */ OPCODE_LG2, /* X X 2 X X */ OPCODE_LIT, /* X X X X */ OPCODE_LOG, /* X X */ @@ -200,7 +173,6 @@ enum prog_opcode { OPCODE_TXD, /* X X */ OPCODE_TXL, /* 3 2 X */ OPCODE_TXP, /* X X */ - OPCODE_TXP_NV, /* 3 X */ OPCODE_TRUNC, /* X */ OPCODE_XPD, /* X X */ MAX_OPCODE @@ -226,31 +198,12 @@ struct prog_src_register GLuint Swizzle:12; GLuint RelAddr:1; - /** Take the component-wise absolute value */ - GLuint Abs:1; - /** - * Post-Abs negation. + * Negation. * This will either be NEGATE_NONE or NEGATE_XYZW, except for the SWZ * instruction which allows per-component negation. */ GLuint Negate:4; - - /** - * Is the register two-dimensional. - * Two dimensional registers are of the - * REGISTER[index][index2] format. - * They are used by the geometry shaders where - * the first index is the index within an array - * and the second index is the semantic of the - * array, e.g. gl_PositionIn[index] would become - * INPUT[index][gl_PositionIn] - */ - GLuint HasIndex2:1; - GLuint RelAddr2:1; - GLint Index2:(INST_INDEX_BITS+1); /**< Extra bit here for sign bit. - * May be negative for relative - * addressing. */ }; @@ -263,26 +216,6 @@ struct prog_dst_register GLuint Index:INST_INDEX_BITS; /**< Unsigned, never negative */ GLuint WriteMask:4; GLuint RelAddr:1; - - /** - * \name Conditional destination update control. - * - * \since - * NV_fragment_program_option, NV_vertex_program2, NV_vertex_program2_option. - */ - /*@{*/ - /** - * Takes one of the 9 possible condition values (EQ, FL, GT, GE, LE, LT, - * NE, TR, or UN). Dest reg is only written to if the matching - * (swizzled) condition code value passes. When a conditional update mask - * is not specified, this will be \c COND_TR. - */ - GLuint CondMask:4; - - /** - * Condition code swizzle value. - */ - GLuint CondSwizzle:12; }; @@ -296,44 +229,14 @@ struct prog_instruction struct prog_dst_register DstReg; /** - * Indicates that the instruction should update the condition code - * register. - * - * \since - * NV_fragment_program_option, NV_vertex_program2, NV_vertex_program2_option. - */ - GLuint CondUpdate:1; - - /** - * If prog_instruction::CondUpdate is \c GL_TRUE, this value selects the - * condition code register that is to be updated. - * - * In GL_NV_fragment_program or GL_NV_vertex_program2 mode, only condition - * code register 0 is available. In GL_NV_vertex_program3 mode, condition - * code registers 0 and 1 are available. - * - * \since - * NV_fragment_program_option, NV_vertex_program2, NV_vertex_program2_option. - */ - GLuint CondDst:1; - - /** * Saturate each value of the vectored result to the range [0,1]. * * \since - * NV_fragment_program_option, NV_vertex_program3. + * ARB_fragment_program */ GLuint Saturate:1; /** - * Per-instruction selectable precision: FLOAT32, FLOAT16, FIXED12. - * - * \since - * NV_fragment_program_option. - */ - GLuint Precision:3; - - /** * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions. */ /*@{*/ diff --git a/src/mesa/program/prog_noise.c b/src/mesa/program/prog_noise.c index ac920c23f34..e2e209e5bd3 100644 --- a/src/mesa/program/prog_noise.c +++ b/src/mesa/program/prog_noise.c @@ -188,7 +188,7 @@ grad4(int hash, float x, float y, float z, float t) * Details can be found where this table is used, in the 4D noise method. * TODO: This should not be required, backport it from Bill's GLSL code! */ -static unsigned char simplex[64][4] = { +static const unsigned char simplex[64][4] = { {0, 1, 2, 3}, {0, 1, 3, 2}, {0, 0, 0, 0}, {0, 2, 3, 1}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {1, 2, 3, 0}, {0, 2, 1, 3}, {0, 0, 0, 0}, {0, 3, 1, 2}, {0, 3, 2, 1}, diff --git a/src/mesa/program/prog_opt_constant_fold.c b/src/mesa/program/prog_opt_constant_fold.c index e2518e660e6..1d796dcf2e4 100644 --- a/src/mesa/program/prog_opt_constant_fold.c +++ b/src/mesa/program/prog_opt_constant_fold.c @@ -82,7 +82,6 @@ src_regs_are_same(const struct prog_src_register *a, return (a->File == b->File) && (a->Index == b->Index) && (a->Swizzle == b->Swizzle) - && (a->Abs == b->Abs) && (a->Negate == b->Negate) && (a->RelAddr == 0) && (b->RelAddr == 0); @@ -99,13 +98,6 @@ get_value(struct gl_program *prog, struct prog_src_register *r, float *data) data[2] = value[GET_SWZ(r->Swizzle, 2)].f; data[3] = value[GET_SWZ(r->Swizzle, 3)].f; - if (r->Abs) { - data[0] = fabsf(data[0]); - data[1] = fabsf(data[1]); - data[2] = fabsf(data[2]); - data[3] = fabsf(data[3]); - } - if (r->Negate & 0x01) { data[0] = -data[0]; } diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c index f9e9035fc3e..c6d22644419 100644 --- a/src/mesa/program/prog_optimize.c +++ b/src/mesa/program/prog_optimize.c @@ -60,52 +60,47 @@ get_src_arg_mask(const struct prog_instruction *inst, assert(arg < _mesa_num_inst_src_regs(inst->Opcode)); /* Form the dst register, find the written channels */ - if (inst->CondUpdate) { + switch (inst->Opcode) { + case OPCODE_MOV: + case OPCODE_MIN: + case OPCODE_MAX: + case OPCODE_ABS: + case OPCODE_ADD: + case OPCODE_MAD: + case OPCODE_MUL: + case OPCODE_SUB: + case OPCODE_CMP: + case OPCODE_FLR: + case OPCODE_FRC: + case OPCODE_LRP: + case OPCODE_SEQ: + case OPCODE_SGE: + case OPCODE_SGT: + case OPCODE_SLE: + case OPCODE_SLT: + case OPCODE_SNE: + case OPCODE_SSG: + channel_mask = inst->DstReg.WriteMask & dst_mask; + break; + case OPCODE_RCP: + case OPCODE_SIN: + case OPCODE_COS: + case OPCODE_RSQ: + case OPCODE_POW: + case OPCODE_EX2: + case OPCODE_LOG: + channel_mask = WRITEMASK_X; + break; + case OPCODE_DP2: + channel_mask = WRITEMASK_XY; + break; + case OPCODE_DP3: + case OPCODE_XPD: + channel_mask = WRITEMASK_XYZ; + break; + default: channel_mask = WRITEMASK_XYZW; - } - else { - switch (inst->Opcode) { - case OPCODE_MOV: - case OPCODE_MIN: - case OPCODE_MAX: - case OPCODE_ABS: - case OPCODE_ADD: - case OPCODE_MAD: - case OPCODE_MUL: - case OPCODE_SUB: - case OPCODE_CMP: - case OPCODE_FLR: - case OPCODE_FRC: - case OPCODE_LRP: - case OPCODE_SEQ: - case OPCODE_SGE: - case OPCODE_SGT: - case OPCODE_SLE: - case OPCODE_SLT: - case OPCODE_SNE: - case OPCODE_SSG: - channel_mask = inst->DstReg.WriteMask & dst_mask; - break; - case OPCODE_RCP: - case OPCODE_SIN: - case OPCODE_COS: - case OPCODE_RSQ: - case OPCODE_POW: - case OPCODE_EX2: - case OPCODE_LOG: - channel_mask = WRITEMASK_X; - break; - case OPCODE_DP2: - channel_mask = WRITEMASK_XY; - break; - case OPCODE_DP3: - case OPCODE_XPD: - channel_mask = WRITEMASK_XYZ; - break; - default: - channel_mask = WRITEMASK_XYZW; - break; - } + break; } /* Now, given the src swizzle and the written channels, find which @@ -302,17 +297,6 @@ _mesa_remove_dead_code_global(struct gl_program *prog) printf("abort remove dead code (indirect temp)\n"); goto done; } - - if (inst->CondUpdate) { - /* If we're writing to this register and setting condition - * codes we cannot remove the instruction. Prevent removal - * by setting the 'read' flag. - */ - tempRead[index][0] = GL_TRUE; - tempRead[index][1] = GL_TRUE; - tempRead[index][2] = GL_TRUE; - tempRead[index][3] = GL_TRUE; - } } } @@ -461,14 +445,9 @@ can_downward_mov_be_modifed(const struct prog_instruction *mov) { return mov->Opcode == OPCODE_MOV && - mov->CondUpdate == GL_FALSE && mov->SrcReg[0].RelAddr == 0 && mov->SrcReg[0].Negate == 0 && - mov->SrcReg[0].Abs == 0 && - mov->SrcReg[0].HasIndex2 == 0 && - mov->SrcReg[0].RelAddr2 == 0 && - mov->DstReg.RelAddr == 0 && - mov->DstReg.CondMask == COND_TR; + mov->DstReg.RelAddr == 0; } @@ -536,8 +515,7 @@ _mesa_remove_extra_move_use(struct gl_program *prog) if (inst2->SrcReg[arg].File != mov->DstReg.File || inst2->SrcReg[arg].Index != mov->DstReg.Index || - inst2->SrcReg[arg].RelAddr || - inst2->SrcReg[arg].Abs) + inst2->SrcReg[arg].RelAddr) continue; read_mask = get_src_arg_mask(inst2, arg, NO_MASK); @@ -786,8 +764,7 @@ _mesa_remove_extra_moves(struct gl_program *prog) if (prevInst->DstReg.File == PROGRAM_TEMPORARY && prevInst->DstReg.Index == id && - prevInst->DstReg.RelAddr == 0 && - prevInst->DstReg.CondMask == COND_TR) { + prevInst->DstReg.RelAddr == 0) { const GLuint dst_mask = prevInst->DstReg.WriteMask; enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask); diff --git a/src/mesa/program/prog_print.c b/src/mesa/program/prog_print.c index bb7c2c6e527..08381b42523 100644 --- a/src/mesa/program/prog_print.c +++ b/src/mesa/program/prog_print.c @@ -354,8 +354,7 @@ arb_output_attrib_string(GLuint index, GLenum progType) */ static const char * reg_string(gl_register_file f, GLint index, gl_prog_print_mode mode, - GLboolean relAddr, const struct gl_program *prog, - GLboolean hasIndex2, GLboolean relAddr2, GLint index2) + GLboolean relAddr, const struct gl_program *prog) { static char str[100]; const char *addr = relAddr ? "ADDR+" : ""; @@ -366,11 +365,6 @@ reg_string(gl_register_file f, GLint index, gl_prog_print_mode mode, case PROG_PRINT_DEBUG: sprintf(str, "%s[%s%d]", _mesa_register_file_name(f), addr, index); - if (hasIndex2) { - int offset = strlen(str); - const char *addr2 = relAddr2 ? "ADDR+" : ""; - sprintf(str+offset, "[%s%d]", addr2, index2); - } break; case PROG_PRINT_ARB: @@ -508,24 +502,6 @@ _mesa_writemask_string(GLuint writeMask) } -const char * -_mesa_condcode_string(GLuint condcode) -{ - switch (condcode) { - case COND_GT: return "GT"; - case COND_EQ: return "EQ"; - case COND_LT: return "LT"; - case COND_UN: return "UN"; - case COND_GE: return "GE"; - case COND_LE: return "LE"; - case COND_NE: return "NE"; - case COND_TR: return "TR"; - case COND_FL: return "FL"; - default: return "cond???"; - } -} - - static void fprint_dst_reg(FILE * f, const struct prog_dst_register *dstReg, @@ -534,17 +510,9 @@ fprint_dst_reg(FILE * f, { fprintf(f, "%s%s", reg_string((gl_register_file) dstReg->File, - dstReg->Index, mode, dstReg->RelAddr, prog, - GL_FALSE, GL_FALSE, 0), + dstReg->Index, mode, dstReg->RelAddr, prog), _mesa_writemask_string(dstReg->WriteMask)); - if (dstReg->CondMask != COND_TR) { - fprintf(f, " (%s.%s)", - _mesa_condcode_string(dstReg->CondMask), - _mesa_swizzle_string(dstReg->CondSwizzle, - GL_FALSE, GL_FALSE)); - } - #if 0 fprintf(f, "%s[%d]%s", _mesa_register_file_name((gl_register_file) dstReg->File), @@ -560,16 +528,11 @@ fprint_src_reg(FILE *f, gl_prog_print_mode mode, const struct gl_program *prog) { - const char *abs = srcReg->Abs ? "|" : ""; - - fprintf(f, "%s%s%s%s", - abs, + fprintf(f, "%s%s", reg_string((gl_register_file) srcReg->File, - srcReg->Index, mode, srcReg->RelAddr, prog, - srcReg->HasIndex2, srcReg->RelAddr2, srcReg->Index2), + srcReg->Index, mode, srcReg->RelAddr, prog), _mesa_swizzle_string(srcReg->Swizzle, - srcReg->Negate, GL_FALSE), - abs); + srcReg->Negate, GL_FALSE)); #if 0 fprintf(f, "%s[%d]%s", _mesa_register_file_name((gl_register_file) srcReg->File), @@ -600,8 +563,6 @@ _mesa_fprint_alu_instruction(FILE *f, GLuint j; fprintf(f, "%s", opcode_string); - if (inst->CondUpdate) - fprintf(f, ".C"); /* frag prog only */ if (inst->Saturate) @@ -714,16 +675,6 @@ _mesa_fprint_instruction_opt(FILE *f, fprint_src_reg(f, &inst->SrcReg[0], mode, prog); fprint_comment(f, inst); break; - case OPCODE_KIL_NV: - fprintf(f, "%s", _mesa_opcode_string(inst->Opcode)); - fprintf(f, " "); - fprintf(f, "%s.%s", - _mesa_condcode_string(inst->DstReg.CondMask), - _mesa_swizzle_string(inst->DstReg.CondSwizzle, - GL_FALSE, GL_FALSE)); - fprint_comment(f, inst); - break; - case OPCODE_ARL: fprintf(f, "ARL "); fprint_dst_reg(f, &inst->DstReg, mode, prog); @@ -732,19 +683,9 @@ _mesa_fprint_instruction_opt(FILE *f, fprint_comment(f, inst); break; case OPCODE_IF: - if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) { - /* Use ordinary register */ - fprintf(f, "IF "); - fprint_src_reg(f, &inst->SrcReg[0], mode, prog); - fprintf(f, "; "); - } - else { - /* Use cond codes */ - fprintf(f, "IF (%s%s);", - _mesa_condcode_string(inst->DstReg.CondMask), - _mesa_swizzle_string(inst->DstReg.CondSwizzle, - 0, GL_FALSE)); - } + fprintf(f, "IF "); + fprint_src_reg(f, &inst->SrcReg[0], mode, prog); + fprintf(f, "; "); fprintf(f, " # (if false, goto %d)", inst->BranchTarget); fprint_comment(f, inst); return indent + 3; @@ -762,10 +703,8 @@ _mesa_fprint_instruction_opt(FILE *f, break; case OPCODE_BRK: case OPCODE_CONT: - fprintf(f, "%s (%s%s); # (goto %d)", + fprintf(f, "%s; # (goto %d)", _mesa_opcode_string(inst->Opcode), - _mesa_condcode_string(inst->DstReg.CondMask), - _mesa_swizzle_string(inst->DstReg.CondSwizzle, 0, GL_FALSE), inst->BranchTarget); fprint_comment(f, inst); break; @@ -785,9 +724,7 @@ _mesa_fprint_instruction_opt(FILE *f, fprint_comment(f, inst); break; case OPCODE_RET: - fprintf(f, "RET (%s%s)", - _mesa_condcode_string(inst->DstReg.CondMask), - _mesa_swizzle_string(inst->DstReg.CondSwizzle, 0, GL_FALSE)); + fprintf(f, "RET"); fprint_comment(f, inst); break; diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index 74cbbfb1cc6..1f916ab9299 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -230,9 +230,6 @@ ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) def = nir_fmov_alu(b, src, 4); - if (prog_src->Abs) - def = nir_fabs(b, def); - if (prog_src->Negate) def = nir_fneg(b, def); } else { @@ -258,9 +255,6 @@ ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) chans[i] = &mov->dest.dest.ssa; } - if (prog_src->Abs) - chans[i] = nir_fabs(b, chans[i]); - if (prog_src->Negate & (1 << i)) chans[i] = nir_fneg(b, chans[i]); } @@ -592,11 +586,6 @@ ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src, op = nir_texop_tex; num_srcs = 2; break; - case OPCODE_TXP_NV: - assert(!"not handled"); - op = nir_texop_tex; - num_srcs = 2; - break; default: fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode); abort(); @@ -743,7 +732,6 @@ static const nir_op op_trans[MAX_OPCODE] = { [OPCODE_TXD] = 0, [OPCODE_TXL] = 0, [OPCODE_TXP] = 0, - [OPCODE_TXP_NV] = 0, [OPCODE_XPD] = 0, }; @@ -882,7 +870,6 @@ ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst) case OPCODE_TXD: case OPCODE_TXL: case OPCODE_TXP: - case OPCODE_TXP_NV: ptn_tex(b, dest, src, prog_inst); break; diff --git a/src/mesa/program/program_lexer.l b/src/mesa/program/program_lexer.l index 2fcd71f2288..bb169b930ce 100644 --- a/src/mesa/program/program_lexer.l +++ b/src/mesa/program/program_lexer.l @@ -32,7 +32,6 @@ #define require_ARB_vp (yyextra->mode == ARB_vertex) #define require_ARB_fp (yyextra->mode == ARB_fragment) -#define require_NV_fp (yyextra->option.NV_fragment) #define require_shadow (yyextra->option.Shadow) #define require_rect (yyextra->option.TexRect) #define require_texarray (yyextra->option.TexArray) @@ -161,9 +160,6 @@ exp [Ee][-+]?[0-9]+ frac "."[0-9]+ dot "."[ \t]* -sz [HRX]? -szf [HR]? -cc C? sat (_SAT)? %option prefix="_mesa_program_lexer_" @@ -184,59 +180,52 @@ OUTPUT { return OUTPUT; } PARAM { return PARAM; } TEMP { yylval->integer = at_temp; return TEMP; } -ABS{sz}{cc}{sat} { return_opcode( 1, VECTOR_OP, ABS, 3); } -ADD{sz}{cc}{sat} { return_opcode( 1, BIN_OP, ADD, 3); } +ABS{sat} { return_opcode( 1, VECTOR_OP, ABS, 3); } +ADD{sat} { return_opcode( 1, BIN_OP, ADD, 3); } ARL { return_opcode(require_ARB_vp, ARL, ARL, 3); } CMP{sat} { return_opcode(require_ARB_fp, TRI_OP, CMP, 3); } -COS{szf}{cc}{sat} { return_opcode(require_ARB_fp, SCALAR_OP, COS, 3); } +COS{sat} { return_opcode(require_ARB_fp, SCALAR_OP, COS, 3); } -DDX{szf}{cc}{sat} { return_opcode(require_NV_fp, VECTOR_OP, DDX, 3); } -DDY{szf}{cc}{sat} { return_opcode(require_NV_fp, VECTOR_OP, DDY, 3); } -DP3{sz}{cc}{sat} { return_opcode( 1, BIN_OP, DP3, 3); } -DP4{sz}{cc}{sat} { return_opcode( 1, BIN_OP, DP4, 3); } -DPH{sz}{cc}{sat} { return_opcode( 1, BIN_OP, DPH, 3); } -DST{szf}{cc}{sat} { return_opcode( 1, BIN_OP, DST, 3); } +DP3{sat} { return_opcode( 1, BIN_OP, DP3, 3); } +DP4{sat} { return_opcode( 1, BIN_OP, DP4, 3); } +DPH{sat} { return_opcode( 1, BIN_OP, DPH, 3); } +DST{sat} { return_opcode( 1, BIN_OP, DST, 3); } -EX2{szf}{cc}{sat} { return_opcode( 1, SCALAR_OP, EX2, 3); } +EX2{sat} { return_opcode( 1, SCALAR_OP, EX2, 3); } EXP { return_opcode(require_ARB_vp, SCALAR_OP, EXP, 3); } -FLR{sz}{cc}{sat} { return_opcode( 1, VECTOR_OP, FLR, 3); } -FRC{sz}{cc}{sat} { return_opcode( 1, VECTOR_OP, FRC, 3); } +FLR{sat} { return_opcode( 1, VECTOR_OP, FLR, 3); } +FRC{sat} { return_opcode( 1, VECTOR_OP, FRC, 3); } KIL { return_opcode(require_ARB_fp, KIL, KIL, 3); } -LIT{szf}{cc}{sat} { return_opcode( 1, VECTOR_OP, LIT, 3); } -LG2{szf}{cc}{sat} { return_opcode( 1, SCALAR_OP, LG2, 3); } +LIT{sat} { return_opcode( 1, VECTOR_OP, LIT, 3); } +LG2{sat} { return_opcode( 1, SCALAR_OP, LG2, 3); } LOG { return_opcode(require_ARB_vp, SCALAR_OP, LOG, 3); } -LRP{sz}{cc}{sat} { return_opcode(require_ARB_fp, TRI_OP, LRP, 3); } +LRP{sat} { return_opcode(require_ARB_fp, TRI_OP, LRP, 3); } -MAD{sz}{cc}{sat} { return_opcode( 1, TRI_OP, MAD, 3); } -MAX{sz}{cc}{sat} { return_opcode( 1, BIN_OP, MAX, 3); } -MIN{sz}{cc}{sat} { return_opcode( 1, BIN_OP, MIN, 3); } -MOV{sz}{cc}{sat} { return_opcode( 1, VECTOR_OP, MOV, 3); } -MUL{sz}{cc}{sat} { return_opcode( 1, BIN_OP, MUL, 3); } +MAD{sat} { return_opcode( 1, TRI_OP, MAD, 3); } +MAX{sat} { return_opcode( 1, BIN_OP, MAX, 3); } +MIN{sat} { return_opcode( 1, BIN_OP, MIN, 3); } +MOV{sat} { return_opcode( 1, VECTOR_OP, MOV, 3); } +MUL{sat} { return_opcode( 1, BIN_OP, MUL, 3); } -POW{szf}{cc}{sat} { return_opcode( 1, BINSC_OP, POW, 3); } +POW{sat} { return_opcode( 1, BINSC_OP, POW, 3); } -RCP{szf}{cc}{sat} { return_opcode( 1, SCALAR_OP, RCP, 3); } -RSQ{szf}{cc}{sat} { return_opcode( 1, SCALAR_OP, RSQ, 3); } +RCP{sat} { return_opcode( 1, SCALAR_OP, RCP, 3); } +RSQ{sat} { return_opcode( 1, SCALAR_OP, RSQ, 3); } SCS{sat} { return_opcode(require_ARB_fp, SCALAR_OP, SCS, 3); } -SEQ{sz}{cc}{sat} { return_opcode(require_NV_fp, BIN_OP, SEQ, 3); } -SGE{sz}{cc}{sat} { return_opcode( 1, BIN_OP, SGE, 3); } -SGT{sz}{cc}{sat} { return_opcode(require_NV_fp, BIN_OP, SGT, 3); } -SIN{szf}{cc}{sat} { return_opcode(require_ARB_fp, SCALAR_OP, SIN, 3); } -SLE{sz}{cc}{sat} { return_opcode(require_NV_fp, BIN_OP, SLE, 3); } -SLT{sz}{cc}{sat} { return_opcode( 1, BIN_OP, SLT, 3); } -SNE{sz}{cc}{sat} { return_opcode(require_NV_fp, BIN_OP, SNE, 3); } -SUB{sz}{cc}{sat} { return_opcode( 1, BIN_OP, SUB, 3); } +SGE{sat} { return_opcode( 1, BIN_OP, SGE, 3); } +SIN{sat} { return_opcode(require_ARB_fp, SCALAR_OP, SIN, 3); } +SLT{sat} { return_opcode( 1, BIN_OP, SLT, 3); } +SUB{sat} { return_opcode( 1, BIN_OP, SUB, 3); } SWZ{sat} { return_opcode( 1, SWZ, SWZ, 3); } -TEX{cc}{sat} { return_opcode(require_ARB_fp, SAMPLE_OP, TEX, 3); } -TXB{cc}{sat} { return_opcode(require_ARB_fp, SAMPLE_OP, TXB, 3); } -TXD{cc}{sat} { return_opcode(require_NV_fp, TXD_OP, TXD, 3); } -TXP{cc}{sat} { return_opcode(require_ARB_fp, SAMPLE_OP, TXP, 3); } +TEX{sat} { return_opcode(require_ARB_fp, SAMPLE_OP, TEX, 3); } +TXB{sat} { return_opcode(require_ARB_fp, SAMPLE_OP, TXB, 3); } +TXP{sat} { return_opcode(require_ARB_fp, SAMPLE_OP, TXP, 3); } XPD{sat} { return_opcode( 1, BIN_OP, XPD, 3); } diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y index 635f5d09d60..ad94fe032bc 100644 --- a/src/mesa/program/program_parse.y +++ b/src/mesa/program/program_parse.y @@ -212,8 +212,6 @@ static struct asm_instruction *asm_instruction_copy_ctor( %type <sym> addrReg %type <swiz_mask> addrComponent addrWriteMask -%type <dst_reg> ccMaskRule ccTest ccMaskRule2 ccTest2 optionalCcMask - %type <result> resultBinding resultColBinding %type <integer> optFaceType optColorType %type <integer> optResultFaceType optResultColorType @@ -471,13 +469,6 @@ KIL_instruction: KIL swizzleSrcReg $$ = asm_instruction_ctor(OPCODE_KIL, NULL, & $2, NULL, NULL); state->fragment.UsesKill = 1; } - | KIL ccTest - { - $$ = asm_instruction_ctor(OPCODE_KIL_NV, NULL, NULL, NULL, NULL); - $$->Base.DstReg.CondMask = $2.CondMask; - $$->Base.DstReg.CondSwizzle = $2.CondSwizzle; - state->fragment.UsesKill = 1; - } ; TXD_instruction: TXD_OP maskedDstReg ',' swizzleSrcReg ',' swizzleSrcReg ',' swizzleSrcReg ',' texImageUnit ',' texTarget @@ -564,21 +555,6 @@ scalarSrcReg: optionalSign scalarUse $$.Base.Negate = ~$$.Base.Negate; } } - | optionalSign '|' scalarUse '|' - { - $$ = $3; - - if (!state->option.NV_fragment) { - yyerror(& @2, state, "unexpected character '|'"); - YYERROR; - } - - if ($1) { - $$.Base.Negate = ~$$.Base.Negate; - } - - $$.Base.Abs = 1; - } ; scalarUse: srcReg scalarSuffix @@ -588,23 +564,6 @@ scalarUse: srcReg scalarSuffix $$.Base.Swizzle = _mesa_combine_swizzles($$.Base.Swizzle, $2.swizzle); } - | paramConstScalarUse - { - struct asm_symbol temp_sym; - - if (!state->option.NV_fragment) { - yyerror(& @1, state, "expected scalar suffix"); - YYERROR; - } - - memset(& temp_sym, 0, sizeof(temp_sym)); - temp_sym.param_binding_begin = ~0; - initialize_symbol_from_const(state->prog, & temp_sym, & $1, GL_TRUE); - - set_src_reg_swz(& $$, PROGRAM_CONSTANT, - temp_sym.param_binding_begin, - temp_sym.param_binding_swizzle); - } ; swizzleSrcReg: optionalSign srcReg swizzleSuffix @@ -618,32 +577,12 @@ swizzleSrcReg: optionalSign srcReg swizzleSuffix $$.Base.Swizzle = _mesa_combine_swizzles($$.Base.Swizzle, $3.swizzle); } - | optionalSign '|' srcReg swizzleSuffix '|' - { - $$ = $3; - - if (!state->option.NV_fragment) { - yyerror(& @2, state, "unexpected character '|'"); - YYERROR; - } - - if ($1) { - $$.Base.Negate = ~$$.Base.Negate; - } - - $$.Base.Abs = 1; - $$.Base.Swizzle = _mesa_combine_swizzles($$.Base.Swizzle, - $4.swizzle); - } - ; -maskedDstReg: dstReg optionalMask optionalCcMask +maskedDstReg: dstReg optionalMask { $$ = $1; $$.WriteMask = $2.mask; - $$.CondMask = $3.CondMask; - $$.CondSwizzle = $3.CondSwizzle; if ($$.File == PROGRAM_OUTPUT) { /* Technically speaking, this should check that it is in @@ -1029,79 +968,6 @@ optionalMask: MASK4 | MASK3 | MASK2 | MASK1 | { $$.swizzle = SWIZZLE_NOOP; $$.mask = WRITEMASK_XYZW; } ; -optionalCcMask: '(' ccTest ')' - { - $$ = $2; - } - | '(' ccTest2 ')' - { - $$ = $2; - } - | - { - $$.CondMask = COND_TR; - $$.CondSwizzle = SWIZZLE_NOOP; - } - ; - -ccTest: ccMaskRule swizzleSuffix - { - $$ = $1; - $$.CondSwizzle = $2.swizzle; - } - ; - -ccTest2: ccMaskRule2 swizzleSuffix - { - $$ = $1; - $$.CondSwizzle = $2.swizzle; - } - ; - -ccMaskRule: IDENTIFIER - { - const int cond = _mesa_parse_cc($1); - if ((cond == 0) || ($1[2] != '\0')) { - char *const err_str = - make_error_string("invalid condition code \"%s\"", $1); - - yyerror(& @1, state, (err_str != NULL) - ? err_str : "invalid condition code"); - - if (err_str != NULL) { - free(err_str); - } - - YYERROR; - } - - $$.CondMask = cond; - $$.CondSwizzle = SWIZZLE_NOOP; - } - ; - -ccMaskRule2: USED_IDENTIFIER - { - const int cond = _mesa_parse_cc($1); - if ((cond == 0) || ($1[2] != '\0')) { - char *const err_str = - make_error_string("invalid condition code \"%s\"", $1); - - yyerror(& @1, state, (err_str != NULL) - ? err_str : "invalid condition code"); - - if (err_str != NULL) { - free(err_str); - } - - YYERROR; - } - - $$.CondMask = cond; - $$.CondSwizzle = SWIZZLE_NOOP; - } - ; - namingStatement: ATTRIB_statement | PARAM_statement | TEMP_statement @@ -1930,46 +1796,7 @@ optionalSign: '+' { $$ = FALSE; } | { $$ = FALSE; } ; -TEMP_statement: optVarSize TEMP { $<integer>$ = $2; } varNameList - ; - -optVarSize: string - { - /* NV_fragment_program_option defines the size qualifiers in a - * fairly broken way. "SHORT" or "LONG" can optionally be used - * before TEMP or OUTPUT. However, neither is a reserved word! - * This means that we have to parse it as an identifier, then check - * to make sure it's one of the valid values. *sigh* - * - * In addition, the grammar in the extension spec does *not* allow - * the size specifier to be optional, but all known implementations - * do. - */ - if (!state->option.NV_fragment) { - yyerror(& @1, state, "unexpected IDENTIFIER"); - YYERROR; - } - - if (strcmp("SHORT", $1) == 0) { - } else if (strcmp("LONG", $1) == 0) { - } else { - char *const err_str = - make_error_string("invalid storage size specifier \"%s\"", - $1); - - yyerror(& @1, state, (err_str != NULL) - ? err_str : "invalid storage size specifier"); - - if (err_str != NULL) { - free(err_str); - } - - YYERROR; - } - } - | - { - } +TEMP_statement: TEMP { $<integer>$ = $1; } varNameList ; ADDRESS_statement: ADDRESS { $<integer>$ = $1; } varNameList @@ -1991,16 +1818,16 @@ varNameList: varNameList ',' IDENTIFIER } ; -OUTPUT_statement: optVarSize OUTPUT IDENTIFIER '=' resultBinding +OUTPUT_statement: OUTPUT IDENTIFIER '=' resultBinding { struct asm_symbol *const s = - declare_variable(state, $3, at_output, & @3); + declare_variable(state, $2, at_output, & @2); if (s == NULL) { - free($3); + free($2); YYERROR; } else { - s->output_binding = $5; + s->output_binding = $4; } } ; @@ -2248,9 +2075,6 @@ asm_instruction_set_operands(struct asm_instruction *inst, inst->Base.DstReg = *dst; } - /* The only instruction that doesn't have any source registers is the - * condition-code based KIL instruction added by NV_fragment_program_option. - */ if (src0 != NULL) { inst->Base.SrcReg[0] = src0->Base; inst->SrcReg[0] = *src0; @@ -2306,10 +2130,7 @@ asm_instruction_copy_ctor(const struct prog_instruction *base, if (inst) { _mesa_init_instructions(& inst->Base, 1); inst->Base.Opcode = base->Opcode; - inst->Base.CondUpdate = base->CondUpdate; - inst->Base.CondDst = base->CondDst; inst->Base.Saturate = base->Saturate; - inst->Base.Precision = base->Precision; asm_instruction_set_operands(inst, dst, src0, src1, src2); } @@ -2324,8 +2145,6 @@ init_dst_reg(struct prog_dst_register *r) memset(r, 0, sizeof(*r)); r->File = PROGRAM_UNDEFINED; r->WriteMask = WRITEMASK_XYZW; - r->CondMask = COND_TR; - r->CondSwizzle = SWIZZLE_NOOP; } @@ -2346,8 +2165,6 @@ set_dst_reg(struct prog_dst_register *r, gl_register_file file, GLint index) r->File = file; r->Index = index; r->WriteMask = WRITEMASK_XYZW; - r->CondMask = COND_TR; - r->CondSwizzle = SWIZZLE_NOOP; } diff --git a/src/mesa/program/program_parse_extra.c b/src/mesa/program/program_parse_extra.c index 71f86d13ace..1c5f5794dd5 100644 --- a/src/mesa/program/program_parse_extra.c +++ b/src/mesa/program/program_parse_extra.c @@ -38,46 +38,10 @@ _mesa_parse_instruction_suffix(const struct asm_parser_state *state, const char *suffix, struct prog_instruction *inst) { - inst->CondUpdate = 0; - inst->CondDst = 0; inst->Saturate = GL_FALSE; - inst->Precision = FLOAT32; - /* The first possible suffix element is the precision specifier from - * NV_fragment_program_option. - */ - if (state->option.NV_fragment) { - switch (suffix[0]) { - case 'H': - inst->Precision = FLOAT16; - suffix++; - break; - case 'R': - inst->Precision = FLOAT32; - suffix++; - break; - case 'X': - inst->Precision = FIXED12; - suffix++; - break; - default: - break; - } - } - - /* The next possible suffix element is the condition code modifier selection - * from NV_fragment_program_option. - */ - if (state->option.NV_fragment) { - if (suffix[0] == 'C') { - inst->CondUpdate = 1; - suffix++; - } - } - - - /* The final possible suffix element is the saturation selector from + /* The only possible suffix element is the saturation selector from * ARB_fragment_program. */ if (state->mode == ARB_fragment) { @@ -95,60 +59,6 @@ _mesa_parse_instruction_suffix(const struct asm_parser_state *state, int -_mesa_parse_cc(const char *s) -{ - int cond = 0; - - switch (s[0]) { - case 'E': - if (s[1] == 'Q') { - cond = COND_EQ; - } - break; - - case 'F': - if (s[1] == 'L') { - cond = COND_FL; - } - break; - - case 'G': - if (s[1] == 'E') { - cond = COND_GE; - } else if (s[1] == 'T') { - cond = COND_GT; - } - break; - - case 'L': - if (s[1] == 'E') { - cond = COND_LE; - } else if (s[1] == 'T') { - cond = COND_LT; - } - break; - - case 'N': - if (s[1] == 'E') { - cond = COND_NE; - } - break; - - case 'T': - if (s[1] == 'R') { - cond = COND_TR; - } - break; - - default: - break; - } - - return ((cond == 0) || (s[2] != '\0')) ? 0 : cond; -} - - -int _mesa_ARBvp_parse_option(struct asm_parser_state *state, const char *option) { if (strcmp(option, "ARB_position_invariant") == 0) { @@ -269,17 +179,6 @@ _mesa_ARBfp_parse_option(struct asm_parser_state *state, const char *option) state->option.DrawBuffers = 1; return 1; } - } else if (strncmp(option, "NV_fragment_program", 19) == 0) { - option += 19; - - /* Other NV_fragment_program strings may be supported later. - */ - if (option[0] == '\0') { - if (state->ctx->Extensions.NV_fragment_program_option) { - state->option.NV_fragment = 1; - return 1; - } - } } return 0; diff --git a/src/mesa/program/program_parser.h b/src/mesa/program/program_parser.h index 04c64f446be..af7b2a0be3e 100644 --- a/src/mesa/program/program_parser.h +++ b/src/mesa/program/program_parser.h @@ -207,7 +207,6 @@ struct asm_parser_state { unsigned Shadow:1; unsigned TexRect:1; unsigned TexArray:1; - unsigned NV_fragment:1; unsigned OriginUpperLeft:1; unsigned PixelCenterInteger:1; } option; @@ -286,17 +285,4 @@ extern int _mesa_ARBfp_parse_option(struct asm_parser_state *state, extern int _mesa_parse_instruction_suffix(const struct asm_parser_state *state, const char *suffix, struct prog_instruction *inst); -/** - * Parses a condition code name - * - * The condition code names (e.g., \c LT, \c GT, \c NE) were added to assembly - * shaders with the \c GL_NV_fragment_program_option extension. This function - * converts a string representation into one of the \c COND_ macros. - * - * \return - * One of the \c COND_ macros defined in prog_instruction.h on success or zero - * on failure. - */ -extern int _mesa_parse_cc(const char *s); - /*@}*/ diff --git a/src/mesa/program/programopt.c b/src/mesa/program/programopt.c index 24dde57725e..4dd9998a887 100644 --- a/src/mesa/program/programopt.c +++ b/src/mesa/program/programopt.c @@ -426,7 +426,6 @@ is_texture_instruction(const struct prog_instruction *inst) case OPCODE_TXD: case OPCODE_TXL: case OPCODE_TXP: - case OPCODE_TXP_NV: return GL_TRUE; default: return GL_FALSE; diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 943582d447a..26e463e0437 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -166,7 +166,6 @@ public: this->index = index; this->index2D = 0; this->writemask = writemask; - this->cond_mask = COND_TR; this->reladdr = NULL; this->reladdr2 = NULL; this->has_index2 = false; @@ -180,7 +179,6 @@ public: this->index = 0; this->index2D = 0; this->writemask = writemask; - this->cond_mask = COND_TR; this->reladdr = NULL; this->reladdr2 = NULL; this->has_index2 = false; @@ -195,7 +193,6 @@ public: this->index = 0; this->index2D = 0; this->writemask = 0; - this->cond_mask = COND_TR; this->reladdr = NULL; this->reladdr2 = NULL; this->has_index2 = false; @@ -208,7 +205,6 @@ public: int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ int index2D; int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ - GLuint cond_mask:4; int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ /** Register index should be offset by the integer in this reg. */ st_src_reg *reladdr; @@ -239,7 +235,6 @@ st_dst_reg::st_dst_reg(st_src_reg reg) this->file = reg.file; this->index = reg.index; this->writemask = WRITEMASK_XYZW; - this->cond_mask = COND_TR; this->reladdr = reg.reladdr; this->index2D = reg.index2D; this->reladdr2 = reg.reladdr2; @@ -3675,7 +3670,6 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) l.index = storage->index; l.reladdr = NULL; l.writemask = WRITEMASK_XYZW; - l.cond_mask = COND_TR; for (i = 0; i < type_size(param->type); i++) { emit_asm(ir, TGSI_OPCODE_MOV, l, r); diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index be47823a048..8772efb0944 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -311,15 +311,6 @@ translate_src( struct st_translate *t, { struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); - if (t->procType == TGSI_PROCESSOR_GEOMETRY && SrcReg->HasIndex2) { - src = src_register( t, SrcReg->File, SrcReg->Index2 ); - if (SrcReg->RelAddr2) - src = ureg_src_dimension_indirect( src, ureg_src(t->address[0]), - SrcReg->Index); - else - src = ureg_src_dimension( src, SrcReg->Index); - } - src = ureg_swizzle( src, GET_SWZ( SrcReg->Swizzle, 0 ) & 0x3, GET_SWZ( SrcReg->Swizzle, 1 ) & 0x3, @@ -329,9 +320,6 @@ translate_src( struct st_translate *t, if (SrcReg->Negate == NEGATE_XYZW) src = ureg_negate(src); - if (SrcReg->Abs) - src = ureg_abs(src); - if (SrcReg->RelAddr) { src = ureg_src_indirect( src, ureg_src(t->address[0])); if (SrcReg->File != PROGRAM_INPUT && @@ -535,9 +523,6 @@ translate_opcode( unsigned op ) return TGSI_OPCODE_TRUNC; case OPCODE_KIL: return TGSI_OPCODE_KILL_IF; - case OPCODE_KIL_NV: - /* XXX we don't support condition codes in TGSI */ - return TGSI_OPCODE_KILL; case OPCODE_LG2: return TGSI_OPCODE_LG2; case OPCODE_LOG: diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c index 4fbf66b9db7..022fb17b6fd 100644 --- a/src/mesa/swrast/s_fragprog.c +++ b/src/mesa/swrast/s_fragprog.c @@ -189,12 +189,6 @@ init_machine(struct gl_context *ctx, struct gl_program_machine *machine, machine->CurElement = col; - /* init condition codes */ - machine->CondCodes[0] = COND_EQ; - machine->CondCodes[1] = COND_EQ; - machine->CondCodes[2] = COND_EQ; - machine->CondCodes[3] = COND_EQ; - /* init call stack */ machine->StackDepth = 0; diff --git a/src/mesa/tnl/t_vb_program.c b/src/mesa/tnl/t_vb_program.c index 1494349716c..21fd6cd1a06 100644 --- a/src/mesa/tnl/t_vb_program.c +++ b/src/mesa/tnl/t_vb_program.c @@ -232,12 +232,6 @@ init_machine(struct gl_context *ctx, struct gl_program_machine *machine, machine->NumDeriv = 0; - /* init condition codes */ - machine->CondCodes[0] = COND_EQ; - machine->CondCodes[1] = COND_EQ; - machine->CondCodes[2] = COND_EQ; - machine->CondCodes[3] = COND_EQ; - /* init call stack */ machine->StackDepth = 0; |