diff options
Diffstat (limited to 'src/intel/common')
-rw-r--r-- | src/intel/common/gen_decoder.h | 2 | ||||
-rw-r--r-- | src/intel/common/gen_device_info.c | 902 | ||||
-rw-r--r-- | src/intel/common/gen_device_info.h | 213 | ||||
-rw-r--r-- | src/intel/common/gen_l3_config.h | 2 | ||||
-rw-r--r-- | src/intel/common/meson.build | 2 |
5 files changed, 2 insertions, 1119 deletions
diff --git a/src/intel/common/gen_decoder.h b/src/intel/common/gen_decoder.h index ff388700287..7ae80cd23ed 100644 --- a/src/intel/common/gen_decoder.h +++ b/src/intel/common/gen_decoder.h @@ -28,7 +28,7 @@ #include <stdbool.h> #include <stdio.h> -#include "common/gen_device_info.h" +#include "dev/gen_device_info.h" #include "util/hash_table.h" #ifdef __cplusplus diff --git a/src/intel/common/gen_device_info.c b/src/intel/common/gen_device_info.c deleted file mode 100644 index 1773009d33c..00000000000 --- a/src/intel/common/gen_device_info.c +++ /dev/null @@ -1,902 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include "gen_device_info.h" -#include "compiler/shader_enums.h" -#include "util/macros.h" - -/** - * Get the PCI ID for the device name. - * - * Returns -1 if the device is not known. - */ -int -gen_device_name_to_pci_device_id(const char *name) -{ - static const struct { - const char *name; - int pci_id; - } name_map[] = { - { "brw", 0x2a02 }, - { "g4x", 0x2a42 }, - { "ilk", 0x0042 }, - { "snb", 0x0126 }, - { "ivb", 0x016a }, - { "hsw", 0x0d2e }, - { "byt", 0x0f33 }, - { "bdw", 0x162e }, - { "chv", 0x22B3 }, - { "skl", 0x1912 }, - { "bxt", 0x5A85 }, - { "kbl", 0x5912 }, - { "glk", 0x3185 }, - { "cnl", 0x5a52 }, - { "icl", 0x8a52 }, - }; - - for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) { - if (!strcmp(name_map[i].name, name)) - return name_map[i].pci_id; - } - - return -1; -} - -/** - * Get the overridden PCI ID for the device. This is set with the - * INTEL_DEVID_OVERRIDE environment variable. - * - * Returns -1 if the override is not set. - */ -int -gen_get_pci_device_id_override(void) -{ - if (geteuid() == getuid()) { - const char *devid_override = getenv("INTEL_DEVID_OVERRIDE"); - if (devid_override) { - const int id = gen_device_name_to_pci_device_id(devid_override); - return id >= 0 ? id : strtol(devid_override, NULL, 0); - } - } - - return -1; -} - -static const struct gen_device_info gen_device_info_i965 = { - .gen = 4, - .has_negative_rhw_bug = true, - .num_slices = 1, - .num_subslices = { 1, }, - .num_thread_per_eu = 4, - .max_vs_threads = 16, - .max_gs_threads = 2, - .max_wm_threads = 8 * 4, - .urb = { - .size = 256, - }, - .timestamp_frequency = 12500000, -}; - -static const struct gen_device_info gen_device_info_g4x = { - .gen = 4, - .has_pln = true, - .has_compr4 = true, - .has_surface_tile_offset = true, - .is_g4x = true, - .num_slices = 1, - .num_subslices = { 1, }, - .num_thread_per_eu = 5, - .max_vs_threads = 32, - .max_gs_threads = 2, - .max_wm_threads = 10 * 5, - .urb = { - .size = 384, - }, - .timestamp_frequency = 12500000, -}; - -static const struct gen_device_info gen_device_info_ilk = { - .gen = 5, - .has_pln = true, - .has_compr4 = true, - .has_surface_tile_offset = true, - .num_slices = 1, - .num_subslices = { 1, }, - .num_thread_per_eu = 6, - .max_vs_threads = 72, - .max_gs_threads = 32, - .max_wm_threads = 12 * 6, - .urb = { - .size = 1024, - }, - .timestamp_frequency = 12500000, -}; - -static const struct gen_device_info gen_device_info_snb_gt1 = { - .gen = 6, - .gt = 1, - .has_hiz_and_separate_stencil = true, - .has_llc = true, - .has_pln = true, - .has_surface_tile_offset = true, - .needs_unlit_centroid_workaround = true, - .num_slices = 1, - .num_subslices = { 1, }, - .num_thread_per_eu = 6, /* Not confirmed */ - .max_vs_threads = 24, - .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */ - .max_wm_threads = 40, - .urb = { - .size = 32, - .min_entries = { - [MESA_SHADER_VERTEX] = 24, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 256, - [MESA_SHADER_GEOMETRY] = 256, - }, - }, - .timestamp_frequency = 12500000, -}; - -static const struct gen_device_info gen_device_info_snb_gt2 = { - .gen = 6, - .gt = 2, - .has_hiz_and_separate_stencil = true, - .has_llc = true, - .has_pln = true, - .has_surface_tile_offset = true, - .needs_unlit_centroid_workaround = true, - .num_slices = 1, - .num_subslices = { 1, }, - .num_thread_per_eu = 6, /* Not confirmed */ - .max_vs_threads = 60, - .max_gs_threads = 60, - .max_wm_threads = 80, - .urb = { - .size = 64, - .min_entries = { - [MESA_SHADER_VERTEX] = 24, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 256, - [MESA_SHADER_GEOMETRY] = 256, - }, - }, - .timestamp_frequency = 12500000, -}; - -#define GEN7_FEATURES \ - .gen = 7, \ - .has_hiz_and_separate_stencil = true, \ - .must_use_separate_stencil = true, \ - .has_llc = true, \ - .has_pln = true, \ - .has_64bit_types = true, \ - .has_surface_tile_offset = true, \ - .timestamp_frequency = 12500000 - -static const struct gen_device_info gen_device_info_ivb_gt1 = { - GEN7_FEATURES, .is_ivybridge = true, .gt = 1, - .num_slices = 1, - .num_subslices = { 1, }, - .num_thread_per_eu = 6, - .l3_banks = 2, - .max_vs_threads = 36, - .max_tcs_threads = 36, - .max_tes_threads = 36, - .max_gs_threads = 36, - .max_wm_threads = 48, - .max_cs_threads = 36, - .urb = { - .size = 128, - .min_entries = { - [MESA_SHADER_VERTEX] = 32, - [MESA_SHADER_TESS_EVAL] = 10, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 512, - [MESA_SHADER_TESS_CTRL] = 32, - [MESA_SHADER_TESS_EVAL] = 288, - [MESA_SHADER_GEOMETRY] = 192, - }, - }, -}; - -static const struct gen_device_info gen_device_info_ivb_gt2 = { - GEN7_FEATURES, .is_ivybridge = true, .gt = 2, - .num_slices = 1, - .num_subslices = { 1, }, - .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of - * @max_wm_threads ... */ - .l3_banks = 4, - .max_vs_threads = 128, - .max_tcs_threads = 128, - .max_tes_threads = 128, - .max_gs_threads = 128, - .max_wm_threads = 172, - .max_cs_threads = 64, - .urb = { - .size = 256, - .min_entries = { - [MESA_SHADER_VERTEX] = 32, - [MESA_SHADER_TESS_EVAL] = 10, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 704, - [MESA_SHADER_TESS_CTRL] = 64, - [MESA_SHADER_TESS_EVAL] = 448, - [MESA_SHADER_GEOMETRY] = 320, - }, - }, -}; - -static const struct gen_device_info gen_device_info_byt = { - GEN7_FEATURES, .is_baytrail = true, .gt = 1, - .num_slices = 1, - .num_subslices = { 1, }, - .num_thread_per_eu = 8, - .l3_banks = 1, - .has_llc = false, - .max_vs_threads = 36, - .max_tcs_threads = 36, - .max_tes_threads = 36, - .max_gs_threads = 36, - .max_wm_threads = 48, - .max_cs_threads = 32, - .urb = { - .size = 128, - .min_entries = { - [MESA_SHADER_VERTEX] = 32, - [MESA_SHADER_TESS_EVAL] = 10, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 512, - [MESA_SHADER_TESS_CTRL] = 32, - [MESA_SHADER_TESS_EVAL] = 288, - [MESA_SHADER_GEOMETRY] = 192, - }, - }, -}; - -#define HSW_FEATURES \ - GEN7_FEATURES, \ - .is_haswell = true, \ - .supports_simd16_3src = true, \ - .has_resource_streamer = true - -static const struct gen_device_info gen_device_info_hsw_gt1 = { - HSW_FEATURES, .gt = 1, - .num_slices = 1, - .num_subslices = { 1, }, - .num_thread_per_eu = 7, - .l3_banks = 2, - .max_vs_threads = 70, - .max_tcs_threads = 70, - .max_tes_threads = 70, - .max_gs_threads = 70, - .max_wm_threads = 102, - .max_cs_threads = 70, - .urb = { - .size = 128, - .min_entries = { - [MESA_SHADER_VERTEX] = 32, - [MESA_SHADER_TESS_EVAL] = 10, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 640, - [MESA_SHADER_TESS_CTRL] = 64, - [MESA_SHADER_TESS_EVAL] = 384, - [MESA_SHADER_GEOMETRY] = 256, - }, - }, -}; - -static const struct gen_device_info gen_device_info_hsw_gt2 = { - HSW_FEATURES, .gt = 2, - .num_slices = 1, - .num_subslices = { 2, }, - .num_thread_per_eu = 7, - .l3_banks = 4, - .max_vs_threads = 280, - .max_tcs_threads = 256, - .max_tes_threads = 280, - .max_gs_threads = 256, - .max_wm_threads = 204, - .max_cs_threads = 70, - .urb = { - .size = 256, - .min_entries = { - [MESA_SHADER_VERTEX] = 64, - [MESA_SHADER_TESS_EVAL] = 10, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 1664, - [MESA_SHADER_TESS_CTRL] = 128, - [MESA_SHADER_TESS_EVAL] = 960, - [MESA_SHADER_GEOMETRY] = 640, - }, - }, -}; - -static const struct gen_device_info gen_device_info_hsw_gt3 = { - HSW_FEATURES, .gt = 3, - .num_slices = 2, - .num_subslices = { 2, }, - .num_thread_per_eu = 7, - .l3_banks = 8, - .max_vs_threads = 280, - .max_tcs_threads = 256, - .max_tes_threads = 280, - .max_gs_threads = 256, - .max_wm_threads = 408, - .max_cs_threads = 70, - .urb = { - .size = 512, - .min_entries = { - [MESA_SHADER_VERTEX] = 64, - [MESA_SHADER_TESS_EVAL] = 10, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 1664, - [MESA_SHADER_TESS_CTRL] = 128, - [MESA_SHADER_TESS_EVAL] = 960, - [MESA_SHADER_GEOMETRY] = 640, - }, - }, -}; - -/* It's unclear how well supported sampling from the hiz buffer is on GEN8, - * so keep things conservative for now and set has_sample_with_hiz = false. - */ -#define GEN8_FEATURES \ - .gen = 8, \ - .has_hiz_and_separate_stencil = true, \ - .has_resource_streamer = true, \ - .must_use_separate_stencil = true, \ - .has_llc = true, \ - .has_sample_with_hiz = false, \ - .has_pln = true, \ - .has_integer_dword_mul = true, \ - .has_64bit_types = true, \ - .supports_simd16_3src = true, \ - .has_surface_tile_offset = true, \ - .max_vs_threads = 504, \ - .max_tcs_threads = 504, \ - .max_tes_threads = 504, \ - .max_gs_threads = 504, \ - .max_wm_threads = 384, \ - .timestamp_frequency = 12500000 - -static const struct gen_device_info gen_device_info_bdw_gt1 = { - GEN8_FEATURES, .gt = 1, - .is_broadwell = true, - .num_slices = 1, - .num_subslices = { 2, }, - .num_thread_per_eu = 7, - .l3_banks = 2, - .max_cs_threads = 42, - .urb = { - .size = 192, - .min_entries = { - [MESA_SHADER_VERTEX] = 64, - [MESA_SHADER_TESS_EVAL] = 34, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 2560, - [MESA_SHADER_TESS_CTRL] = 504, - [MESA_SHADER_TESS_EVAL] = 1536, - [MESA_SHADER_GEOMETRY] = 960, - }, - } -}; - -static const struct gen_device_info gen_device_info_bdw_gt2 = { - GEN8_FEATURES, .gt = 2, - .is_broadwell = true, - .num_slices = 1, - .num_subslices = { 3, }, - .num_thread_per_eu = 7, - .l3_banks = 4, - .max_cs_threads = 56, - .urb = { - .size = 384, - .min_entries = { - [MESA_SHADER_VERTEX] = 64, - [MESA_SHADER_TESS_EVAL] = 34, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 2560, - [MESA_SHADER_TESS_CTRL] = 504, - [MESA_SHADER_TESS_EVAL] = 1536, - [MESA_SHADER_GEOMETRY] = 960, - }, - } -}; - -static const struct gen_device_info gen_device_info_bdw_gt3 = { - GEN8_FEATURES, .gt = 3, - .is_broadwell = true, - .num_slices = 2, - .num_subslices = { 3, 3, }, - .num_thread_per_eu = 7, - .l3_banks = 8, - .max_cs_threads = 56, - .urb = { - .size = 384, - .min_entries = { - [MESA_SHADER_VERTEX] = 64, - [MESA_SHADER_TESS_EVAL] = 34, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 2560, - [MESA_SHADER_TESS_CTRL] = 504, - [MESA_SHADER_TESS_EVAL] = 1536, - [MESA_SHADER_GEOMETRY] = 960, - }, - } -}; - -static const struct gen_device_info gen_device_info_chv = { - GEN8_FEATURES, .is_cherryview = 1, .gt = 1, - .has_llc = false, - .has_integer_dword_mul = false, - .num_slices = 1, - .num_subslices = { 2, }, - .num_thread_per_eu = 7, - .l3_banks = 2, - .max_vs_threads = 80, - .max_tcs_threads = 80, - .max_tes_threads = 80, - .max_gs_threads = 80, - .max_wm_threads = 128, - .max_cs_threads = 6 * 7, - .urb = { - .size = 192, - .min_entries = { - [MESA_SHADER_VERTEX] = 34, - [MESA_SHADER_TESS_EVAL] = 34, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 640, - [MESA_SHADER_TESS_CTRL] = 80, - [MESA_SHADER_TESS_EVAL] = 384, - [MESA_SHADER_GEOMETRY] = 256, - }, - } -}; - -#define GEN9_HW_INFO \ - .gen = 9, \ - .max_vs_threads = 336, \ - .max_gs_threads = 336, \ - .max_tcs_threads = 336, \ - .max_tes_threads = 336, \ - .max_cs_threads = 56, \ - .timestamp_frequency = 12000000, \ - .urb = { \ - .size = 384, \ - .min_entries = { \ - [MESA_SHADER_VERTEX] = 64, \ - [MESA_SHADER_TESS_EVAL] = 34, \ - }, \ - .max_entries = { \ - [MESA_SHADER_VERTEX] = 1856, \ - [MESA_SHADER_TESS_CTRL] = 672, \ - [MESA_SHADER_TESS_EVAL] = 1120, \ - [MESA_SHADER_GEOMETRY] = 640, \ - }, \ - } - -#define GEN9_LP_FEATURES \ - GEN8_FEATURES, \ - GEN9_HW_INFO, \ - .has_integer_dword_mul = false, \ - .gt = 1, \ - .has_llc = false, \ - .has_sample_with_hiz = true, \ - .num_slices = 1, \ - .num_thread_per_eu = 6, \ - .max_vs_threads = 112, \ - .max_tcs_threads = 112, \ - .max_tes_threads = 112, \ - .max_gs_threads = 112, \ - .max_cs_threads = 6 * 6, \ - .timestamp_frequency = 19200000, \ - .urb = { \ - .size = 192, \ - .min_entries = { \ - [MESA_SHADER_VERTEX] = 34, \ - [MESA_SHADER_TESS_EVAL] = 34, \ - }, \ - .max_entries = { \ - [MESA_SHADER_VERTEX] = 704, \ - [MESA_SHADER_TESS_CTRL] = 256, \ - [MESA_SHADER_TESS_EVAL] = 416, \ - [MESA_SHADER_GEOMETRY] = 256, \ - }, \ - } - -#define GEN9_LP_FEATURES_3X6 \ - GEN9_LP_FEATURES, \ - .num_subslices = { 3, } - -#define GEN9_LP_FEATURES_2X6 \ - GEN9_LP_FEATURES, \ - .num_subslices = { 2, }, \ - .max_vs_threads = 56, \ - .max_tcs_threads = 56, \ - .max_tes_threads = 56, \ - .max_gs_threads = 56, \ - .max_cs_threads = 6 * 6, \ - .urb = { \ - .size = 128, \ - .min_entries = { \ - [MESA_SHADER_VERTEX] = 34, \ - [MESA_SHADER_TESS_EVAL] = 34, \ - }, \ - .max_entries = { \ - [MESA_SHADER_VERTEX] = 352, \ - [MESA_SHADER_TESS_CTRL] = 128, \ - [MESA_SHADER_TESS_EVAL] = 208, \ - [MESA_SHADER_GEOMETRY] = 128, \ - }, \ - } - -#define GEN9_FEATURES \ - GEN8_FEATURES, \ - GEN9_HW_INFO, \ - .has_sample_with_hiz = true, \ - .num_thread_per_eu = 7 - -static const struct gen_device_info gen_device_info_skl_gt1 = { - GEN9_FEATURES, .gt = 1, - .is_skylake = true, - .num_slices = 1, - .num_subslices = { 2, }, - .l3_banks = 2, - .urb.size = 192, -}; - -static const struct gen_device_info gen_device_info_skl_gt2 = { - GEN9_FEATURES, .gt = 2, - .is_skylake = true, - .num_slices = 1, - .num_subslices = { 3, }, - .l3_banks = 4, -}; - -static const struct gen_device_info gen_device_info_skl_gt3 = { - GEN9_FEATURES, .gt = 3, - .is_skylake = true, - .num_slices = 2, - .num_subslices = { 3, 3, }, - .l3_banks = 8, -}; - -static const struct gen_device_info gen_device_info_skl_gt4 = { - GEN9_FEATURES, .gt = 4, - .is_skylake = true, - .num_slices = 3, - .num_subslices = { 3, 3, 3, }, - .l3_banks = 12, - /* From the "L3 Allocation and Programming" documentation: - * - * "URB is limited to 1008KB due to programming restrictions. This is not a - * restriction of the L3 implementation, but of the FF and other clients. - * Therefore, in a GT4 implementation it is possible for the programmed - * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but - * only 1008KB of this will be used." - */ - .urb.size = 1008 / 3, -}; - -static const struct gen_device_info gen_device_info_bxt = { - GEN9_LP_FEATURES_3X6, - .is_broxton = true, - .l3_banks = 2, -}; - -static const struct gen_device_info gen_device_info_bxt_2x6 = { - GEN9_LP_FEATURES_2X6, - .is_broxton = true, - .l3_banks = 1, -}; -/* - * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+. - * There's no KBL entry. Using the default SKL (GEN9) GS entries value. - */ - -static const struct gen_device_info gen_device_info_kbl_gt1 = { - GEN9_FEATURES, - .is_kabylake = true, - .gt = 1, - - .max_cs_threads = 7 * 6, - .urb.size = 192, - .num_slices = 1, - .num_subslices = { 2, }, - .l3_banks = 2, -}; - -static const struct gen_device_info gen_device_info_kbl_gt1_5 = { - GEN9_FEATURES, - .is_kabylake = true, - .gt = 1, - - .max_cs_threads = 7 * 6, - .num_slices = 1, - .num_subslices = { 3, }, - .l3_banks = 4, -}; - -static const struct gen_device_info gen_device_info_kbl_gt2 = { - GEN9_FEATURES, - .is_kabylake = true, - .gt = 2, - - .num_slices = 1, - .num_subslices = { 3, }, - .l3_banks = 4, -}; - -static const struct gen_device_info gen_device_info_kbl_gt3 = { - GEN9_FEATURES, - .is_kabylake = true, - .gt = 3, - - .num_slices = 2, - .num_subslices = { 3, 3, }, - .l3_banks = 8, -}; - -static const struct gen_device_info gen_device_info_kbl_gt4 = { - GEN9_FEATURES, - .is_kabylake = true, - .gt = 4, - - /* - * From the "L3 Allocation and Programming" documentation: - * - * "URB is limited to 1008KB due to programming restrictions. This - * is not a restriction of the L3 implementation, but of the FF and - * other clients. Therefore, in a GT4 implementation it is - * possible for the programmed allocation of the L3 data array to - * provide 3*384KB=1152KB for URB, but only 1008KB of this - * will be used." - */ - .urb.size = 1008 / 3, - .num_slices = 3, - .num_subslices = { 3, 3, 3, }, - .l3_banks = 12, -}; - -static const struct gen_device_info gen_device_info_glk = { - GEN9_LP_FEATURES_3X6, - .is_geminilake = true, - .l3_banks = 2, -}; - -/*TODO: Initialize l3_banks when we know the number. */ -static const struct gen_device_info gen_device_info_glk_2x6 = { - GEN9_LP_FEATURES_2X6, - .is_geminilake = true, -}; - -static const struct gen_device_info gen_device_info_cfl_gt1 = { - GEN9_FEATURES, - .is_coffeelake = true, - .gt = 1, - - .num_slices = 1, - .num_subslices = { 2, }, - .l3_banks = 2, -}; -static const struct gen_device_info gen_device_info_cfl_gt2 = { - GEN9_FEATURES, - .is_coffeelake = true, - .gt = 2, - - .num_slices = 1, - .num_subslices = { 3, }, - .l3_banks = 4, -}; - -static const struct gen_device_info gen_device_info_cfl_gt3 = { - GEN9_FEATURES, - .is_coffeelake = true, - .gt = 3, - - .num_slices = 2, - .num_subslices = { 3, 3, }, - .l3_banks = 8, -}; - -#define GEN10_HW_INFO \ - .gen = 10, \ - .num_thread_per_eu = 7, \ - .max_vs_threads = 728, \ - .max_gs_threads = 432, \ - .max_tcs_threads = 432, \ - .max_tes_threads = 624, \ - .max_cs_threads = 56, \ - .timestamp_frequency = 19200000, \ - .urb = { \ - .size = 256, \ - .min_entries = { \ - [MESA_SHADER_VERTEX] = 64, \ - [MESA_SHADER_TESS_EVAL] = 34, \ - }, \ - .max_entries = { \ - [MESA_SHADER_VERTEX] = 3936, \ - [MESA_SHADER_TESS_CTRL] = 896, \ - [MESA_SHADER_TESS_EVAL] = 2064, \ - [MESA_SHADER_GEOMETRY] = 832, \ - }, \ - } - -#define subslices(args...) { args, } - -#define GEN10_FEATURES(_gt, _slices, _subslices, _l3) \ - GEN8_FEATURES, \ - GEN10_HW_INFO, \ - .has_sample_with_hiz = true, \ - .gt = _gt, \ - .num_slices = _slices, \ - .num_subslices = _subslices, \ - .l3_banks = _l3 - -static const struct gen_device_info gen_device_info_cnl_2x8 = { - /* GT0.5 */ - GEN10_FEATURES(1, 1, subslices(2), 2), - .is_cannonlake = true, -}; - -static const struct gen_device_info gen_device_info_cnl_3x8 = { - /* GT1 */ - GEN10_FEATURES(1, 1, subslices(3), 3), - .is_cannonlake = true, -}; - -static const struct gen_device_info gen_device_info_cnl_4x8 = { - /* GT 1.5 */ - GEN10_FEATURES(1, 2, subslices(2, 2), 6), - .is_cannonlake = true, -}; - -static const struct gen_device_info gen_device_info_cnl_5x8 = { - /* GT2 */ - GEN10_FEATURES(2, 2, subslices(3, 2), 6), - .is_cannonlake = true, -}; - -#define GEN11_HW_INFO \ - .gen = 11, \ - .has_pln = false, \ - .max_vs_threads = 364, \ - .max_gs_threads = 224, \ - .max_tcs_threads = 224, \ - .max_tes_threads = 364, \ - .max_cs_threads = 56, \ - .urb = { \ - .size = 1024, \ - .min_entries = { \ - [MESA_SHADER_VERTEX] = 64, \ - [MESA_SHADER_TESS_EVAL] = 34, \ - }, \ - .max_entries = { \ - [MESA_SHADER_VERTEX] = 2384, \ - [MESA_SHADER_TESS_CTRL] = 1032, \ - [MESA_SHADER_TESS_EVAL] = 2384, \ - [MESA_SHADER_GEOMETRY] = 1032, \ - }, \ - } - -#define GEN11_FEATURES(_gt, _slices, _subslices, _l3) \ - GEN8_FEATURES, \ - GEN11_HW_INFO, \ - .has_64bit_types = false, \ - .has_integer_dword_mul = false, \ - .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \ - .num_subslices = _subslices - -static const struct gen_device_info gen_device_info_icl_8x8 = { - GEN11_FEATURES(2, 1, subslices(8), 8), -}; - -static const struct gen_device_info gen_device_info_icl_6x8 = { - GEN11_FEATURES(1, 1, subslices(6), 6), -}; - -static const struct gen_device_info gen_device_info_icl_4x8 = { - GEN11_FEATURES(1, 1, subslices(4), 6), -}; - -static const struct gen_device_info gen_device_info_icl_1x8 = { - GEN11_FEATURES(1, 1, subslices(1), 6), -}; - -bool -gen_get_device_info(int devid, struct gen_device_info *devinfo) -{ - switch (devid) { -#undef CHIPSET -#define CHIPSET(id, family, name) \ - case id: *devinfo = gen_device_info_##family; break; -#include "pci_ids/i965_pci_ids.h" - default: - fprintf(stderr, "i965_dri.so does not support the 0x%x PCI ID.\n", devid); - return false; - } - - /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer: - * - * "Scratch Space per slice is computed based on 4 sub-slices. SW must - * allocate scratch space enough so that each slice has 4 slices allowed." - * - * The equivalent internal documentation says that this programming note - * applies to all Gen9+ platforms. - * - * The hardware typically calculates the scratch space pointer by taking - * the base address, and adding per-thread-scratch-space * thread ID. - * Extra padding can be necessary depending how the thread IDs are - * calculated for a particular shader stage. - */ - - switch(devinfo->gen) { - case 9: - case 10: - devinfo->max_wm_threads = 64 /* threads-per-PSD */ - * devinfo->num_slices - * 4; /* effective subslices per slice */ - break; - case 11: - devinfo->max_wm_threads = 128 /* threads-per-PSD */ - * devinfo->num_slices - * 8; /* subslices per slice */ - break; - default: - break; - } - - assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices)); - - return true; -} - -const char * -gen_get_device_name(int devid) -{ - switch (devid) { -#undef CHIPSET -#define CHIPSET(id, family, name) case id: return name; -#include "pci_ids/i965_pci_ids.h" - default: - return NULL; - } -} diff --git a/src/intel/common/gen_device_info.h b/src/intel/common/gen_device_info.h deleted file mode 100644 index b8044d00032..00000000000 --- a/src/intel/common/gen_device_info.h +++ /dev/null @@ -1,213 +0,0 @@ - /* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef GEN_DEVICE_INFO_H -#define GEN_DEVICE_INFO_H - -#include <stdbool.h> -#include <stdint.h> - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Intel hardware information and quirks - */ -struct gen_device_info -{ - int gen; /**< Generation number: 4, 5, 6, 7, ... */ - int gt; - - bool is_g4x; - bool is_ivybridge; - bool is_baytrail; - bool is_haswell; - bool is_broadwell; - bool is_cherryview; - bool is_skylake; - bool is_broxton; - bool is_kabylake; - bool is_geminilake; - bool is_coffeelake; - bool is_cannonlake; - - bool has_hiz_and_separate_stencil; - bool must_use_separate_stencil; - bool has_sample_with_hiz; - bool has_llc; - - bool has_pln; - bool has_64bit_types; - bool has_integer_dword_mul; - bool has_compr4; - bool has_surface_tile_offset; - bool supports_simd16_3src; - bool has_resource_streamer; - - /** - * \name Intel hardware quirks - * @{ - */ - bool has_negative_rhw_bug; - - /** - * Some versions of Gen hardware don't do centroid interpolation correctly - * on unlit pixels, causing incorrect values for derivatives near triangle - * edges. Enabling this flag causes the fragment shader to use - * non-centroid interpolation for unlit pixels, at the expense of two extra - * fragment shader instructions. - */ - bool needs_unlit_centroid_workaround; - /** @} */ - - /** - * \name GPU hardware limits - * - * In general, you can find shader thread maximums by looking at the "Maximum - * Number of Threads" field in the Intel PRM description of the 3DSTATE_VS, - * 3DSTATE_GS, 3DSTATE_HS, 3DSTATE_DS, and 3DSTATE_PS commands. URB entry - * limits come from the "Number of URB Entries" field in the - * 3DSTATE_URB_VS command and friends. - * - * These fields are used to calculate the scratch space to allocate. The - * amount of scratch space can be larger without being harmful on modern - * GPUs, however, prior to Haswell, programming the maximum number of threads - * to greater than the hardware maximum would cause GPU performance to tank. - * - * @{ - */ - /** - * Total number of slices present on the device whether or not they've been - * fused off. - * - * XXX: CS thread counts are limited by the inability to do cross subslice - * communication. It is the effectively the number of logical threads which - * can be executed in a subslice. Fuse configurations may cause this number - * to change, so we program @max_cs_threads as the lower maximum. - */ - unsigned num_slices; - - /** - * Number of subslices for each slice (used to be uniform until CNL). - */ - unsigned num_subslices[3]; - - /** - * Number of threads per eu, varies between 4 and 8 between generations. - */ - unsigned num_thread_per_eu; - - unsigned l3_banks; - unsigned max_vs_threads; /**< Maximum Vertex Shader threads */ - unsigned max_tcs_threads; /**< Maximum Hull Shader threads */ - unsigned max_tes_threads; /**< Maximum Domain Shader threads */ - unsigned max_gs_threads; /**< Maximum Geometry Shader threads. */ - /** - * Theoretical maximum number of Pixel Shader threads. - * - * PSD means Pixel Shader Dispatcher. On modern Intel GPUs, hardware will - * automatically scale pixel shader thread count, based on a single value - * programmed into 3DSTATE_PS. - * - * To calculate the maximum number of threads for Gen8 beyond (which have - * multiple Pixel Shader Dispatchers): - * - * - Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD" - * - Usually there's only one PSD per subslice, so use the number of - * subslices for number of PSDs. - * - For max_wm_threads, the total should be PSD threads * #PSDs. - */ - unsigned max_wm_threads; - - /** - * Maximum Compute Shader threads. - * - * Thread count * number of EUs per subslice - */ - unsigned max_cs_threads; - - struct { - /** - * Hardware default URB size. - * - * The units this is expressed in are somewhat inconsistent: 512b units - * on Gen4-5, KB on Gen6-7, and KB times the slice count on Gen8+. - * - * Look up "URB Size" in the "Device Attributes" page, and take the - * maximum. Look up the slice count for each GT SKU on the same page. - * urb.size = URB Size (kbytes) / slice count - */ - unsigned size; - - /** - * The minimum number of URB entries. See the 3DSTATE_URB_<XS> docs. - */ - unsigned min_entries[4]; - - /** - * The maximum number of URB entries. See the 3DSTATE_URB_<XS> docs. - */ - unsigned max_entries[4]; - } urb; - - /** - * For the longest time the timestamp frequency for Gen's timestamp counter - * could be assumed to be 12.5MHz, where the least significant bit neatly - * corresponded to 80 nanoseconds. - * - * Since Gen9 the numbers aren't so round, with a a frequency of 12MHz for - * SKL (or scale factor of 83.33333333) and a frequency of 19200000Hz for - * BXT. - * - * For simplicty to fit with the current code scaling by a single constant - * to map from raw timestamps to nanoseconds we now do the conversion in - * floating point instead of integer arithmetic. - * - * In general it's probably worth noting that the documented constants we - * have for the per-platform timestamp frequencies aren't perfect and - * shouldn't be trusted for scaling and comparing timestamps with a large - * delta. - * - * E.g. with crude testing on my system using the 'correct' scale factor I'm - * seeing a drift of ~2 milliseconds per second. - */ - uint64_t timestamp_frequency; - - /** @} */ -}; - -#define gen_device_info_is_9lp(devinfo) \ - ((devinfo)->is_broxton || (devinfo)->is_geminilake) - -int gen_get_pci_device_id_override(void); -int gen_device_name_to_pci_device_id(const char *name); -bool gen_get_device_info(int devid, struct gen_device_info *devinfo); -const char *gen_get_device_name(int devid); - -#ifdef __cplusplus -} -#endif - -#endif /* GEN_DEVICE_INFO_H */ diff --git a/src/intel/common/gen_l3_config.h b/src/intel/common/gen_l3_config.h index 8dc7dda0fcc..33da8bb19de 100644 --- a/src/intel/common/gen_l3_config.h +++ b/src/intel/common/gen_l3_config.h @@ -26,7 +26,7 @@ #include <stdio.h> -#include "gen_device_info.h" +#include "dev/gen_device_info.h" /** * Chunk of L3 cache reserved for some specific purpose. diff --git a/src/intel/common/meson.build b/src/intel/common/meson.build index 19472e306f4..d35d5e8f78e 100644 --- a/src/intel/common/meson.build +++ b/src/intel/common/meson.build @@ -26,8 +26,6 @@ files_libintel_common = files( 'gen_debug.h', 'gen_decoder.c', 'gen_decoder.h', - 'gen_device_info.c', - 'gen_device_info.h', 'gen_l3_config.c', 'gen_l3_config.h', 'gen_urb_config.c', |