diff options
author | Chia-I Wu <olvaffe@gmail.com> | 2015-05-29 13:08:18 +0800 |
---|---|---|
committer | Chia-I Wu <olvaffe@gmail.com> | 2015-06-15 01:06:44 +0800 |
commit | 3ff40be0eecfd6bbcc17471590e44042b3ffa5d3 (patch) | |
tree | d6780e019b2396bb5abfe75c780fafd6c65f4cff | |
parent | 62bb6437187b439d5959ccab094762163713a992 (diff) |
ilo: add ilo_state_sol
-rw-r--r-- | src/gallium/drivers/ilo/Makefile.sources | 2 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/core/ilo_state_sol.c | 320 | ||||
-rw-r--r-- | src/gallium/drivers/ilo/core/ilo_state_sol.h | 126 |
3 files changed, 448 insertions, 0 deletions
diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index e1f6d22b0f6..79fb0c8d9a0 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -29,6 +29,8 @@ C_SOURCES := \ core/ilo_state_raster.h \ core/ilo_state_sampler.c \ core/ilo_state_sampler.h \ + core/ilo_state_sol.c \ + core/ilo_state_sol.h \ core/ilo_state_surface.c \ core/ilo_state_surface.h \ core/ilo_state_viewport.c \ diff --git a/src/gallium/drivers/ilo/core/ilo_state_sol.c b/src/gallium/drivers/ilo/core/ilo_state_sol.c new file mode 100644 index 00000000000..dbc4b894f6a --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_sol.c @@ -0,0 +1,320 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> + */ + +#include "ilo_debug.h" +#include "ilo_state_sol.h" + +static bool +sol_stream_validate_gen7(const struct ilo_dev *dev, + const struct ilo_state_sol_stream_info *stream) +{ + uint8_t i; + + ILO_DEV_ASSERT(dev, 7, 8); + + assert(stream->vue_read_base + stream->vue_read_count <= + stream->cv_vue_attr_count); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 200: + * + * "(Stream 0 Vertex Read Offset) + * Format: U1 count of 256-bit units + * + * Specifies amount of data to skip over before reading back Stream 0 + * vertex data. Must be zero if the GS is enabled and the Output + * Vertex Size field in 3DSTATE_GS is programmed to 0 (i.e., one 16B + * unit)." + * + * "(Stream 0 Vertex Read Length) + * Format: U5-1 count of 256-bit units + * + * Specifies amount of vertex data to read back for Stream 0 vertices, + * starting at the Stream 0 Vertex Read Offset location. Maximum + * readback is 17 256-bit units (34 128-bit vertex attributes). Read + * data past the end of the valid vertex data has undefined contents, + * and therefore shouldn't be used to source stream out data. Must be + * zero (i.e., read length = 256b) if the GS is enabled and the Output + * Vertex Size field in 3DSTATE_GS is programmed to 0 (i.e., one 16B + * unit)." + */ + assert(stream->vue_read_base == 0 || stream->vue_read_base == 2); + assert(stream->vue_read_count <= 34); + + assert(stream->decl_count <= ILO_STATE_SOL_MAX_DECL_COUNT); + + for (i = 0; i < stream->decl_count; i++) { + const struct ilo_state_sol_decl_info *decl = &stream->decls[i]; + + assert(decl->is_hole || decl->attr < stream->vue_read_count); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 205: + * + * "There is only enough internal storage for the 128-bit vertex + * header and 32 128-bit vertex attributes." + */ + assert(decl->attr < 33); + + assert(decl->component_base < 4 && + decl->component_base + decl->component_count <= 4); + assert(decl->buffer < ILO_STATE_SOL_MAX_BUFFER_COUNT); + } + + return true; +} + +static bool +sol_validate_gen7(const struct ilo_dev *dev, + const struct ilo_state_sol_info *info) +{ + uint8_t i; + + ILO_DEV_ASSERT(dev, 7, 8); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 198: + * + * "This bit (Render Stream Select) is used even if SO Function Enable + * is DISABLED." + * + * From the Haswell PRM, volume 2b, page 796: + * + * "SO Function Enable must also be ENABLED in order for thiis field + * (Render Stream Select) to select a stream for rendering. When SO + * Function Enable is DISABLED and Rendering Disable is cleared (i.e., + * rendering is enabled), StreamID is ignored downstream of the SO + * stage, allowing any stream to be rendered." + * + * We want Gen7 behavior, but we have to require users to follow Gen7.5 + * behavior: info->sol_enable must be set for info->render_stream to work. + */ + + for (i = 0; i < ARRAY_SIZE(info->streams); i++) { + if (!sol_stream_validate_gen7(dev, &info->streams[i])) + return false; + } + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 208: + * + * "(Surface Pitch) + * [0,2048] Must be 0 or a multiple of 4 Bytes." + */ + for (i = 0; i < ARRAY_SIZE(info->buffer_strides); i++) { + assert(info->buffer_strides[i] <= 2048 && + info->buffer_strides[i] % 4 == 0); + } + + return true; +} + +static bool +sol_set_gen7_3DSTATE_STREAMOUT(struct ilo_state_sol *so, + const struct ilo_dev *dev, + const struct ilo_state_sol_info *info) +{ + struct { + uint8_t offset; + uint8_t len; + } vue_read[ILO_STATE_SOL_MAX_STREAM_COUNT]; + uint8_t i; + uint32_t dw1, dw2; + + ILO_DEV_ASSERT(dev, 7, 8); + + if (!sol_validate_gen7(dev, info)) + return false; + + for (i = 0; i < ARRAY_SIZE(info->streams); i++) { + const struct ilo_state_sol_stream_info *stream = &info->streams[i]; + + vue_read[i].offset = stream->vue_read_base / 2; + /* + * In pairs minus 1. URB entries are aligned to 512-bits. There is no + * need to worry about reading past entries. + */ + vue_read[i].len = (stream->vue_read_count + 1) / 2; + if (vue_read[i].len) + vue_read[i].len--; + } + + dw1 = info->render_stream << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT | + info->tristrip_reorder << GEN7_SO_DW1_REORDER_MODE__SHIFT; + + if (info->sol_enable) + dw1 |= GEN7_SO_DW1_SO_ENABLE; + + if (info->render_disable) + dw1 |= GEN7_SO_DW1_RENDER_DISABLE; + + if (info->stats_enable) + dw1 |= GEN7_SO_DW1_STATISTICS; + + if (ilo_dev_gen(dev) < ILO_GEN(8)) { + const uint8_t buffer_enables = + ((bool) info->buffer_strides[3]) << 3 | + ((bool) info->buffer_strides[2]) << 2 | + ((bool) info->buffer_strides[1]) << 1 | + ((bool) info->buffer_strides[0]); + + dw1 |= buffer_enables << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT; + } + + dw2 = vue_read[3].offset << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT | + vue_read[3].len << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT | + vue_read[2].offset << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT | + vue_read[2].len << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT | + vue_read[1].offset << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT | + vue_read[1].len << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT | + vue_read[0].offset << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT | + vue_read[0].len << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(so->so) >= 4); + so->so[0] = dw1; + so->so[1] = dw2; + + if (ilo_dev_gen(dev) >= ILO_GEN(8)) { + uint32_t dw3, dw4; + + dw3 = info->buffer_strides[1] << GEN8_SO_DW3_BUFFER1_PITCH__SHIFT | + info->buffer_strides[0] << GEN8_SO_DW3_BUFFER0_PITCH__SHIFT; + dw4 = info->buffer_strides[3] << GEN8_SO_DW4_BUFFER3_PITCH__SHIFT | + info->buffer_strides[2] << GEN8_SO_DW4_BUFFER2_PITCH__SHIFT; + + so->so[2] = dw3; + so->so[3] = dw4; + } + + return true; +} + +static bool +sol_set_gen7_3DSTATE_SO_DECL_LIST(struct ilo_state_sol *so, + const struct ilo_dev *dev, + const struct ilo_state_sol_info *info, + uint8_t max_decl_count) +{ + uint64_t decl_list[ILO_STATE_SOL_MAX_DECL_COUNT]; + uint8_t decl_counts[ILO_STATE_SOL_MAX_STREAM_COUNT]; + uint8_t buffer_selects[ILO_STATE_SOL_MAX_STREAM_COUNT]; + uint32_t dw1, dw2; + uint8_t i, j; + + ILO_DEV_ASSERT(dev, 7, 8); + + memset(decl_list, 0, sizeof(decl_list[0]) * max_decl_count); + + for (i = 0; i < ARRAY_SIZE(info->streams); i++) { + const struct ilo_state_sol_stream_info *stream = &info->streams[i]; + + assert(stream->decl_count <= max_decl_count); + decl_counts[i] = stream->decl_count; + buffer_selects[i] = 0; + + for (j = 0; j < stream->decl_count; j++) { + const struct ilo_state_sol_decl_info *decl = &stream->decls[j]; + const uint8_t mask = ((1 << decl->component_count) - 1) << + decl->component_base; + uint16_t val; + + val = decl->buffer << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | + mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; + + if (decl->is_hole) + val |= GEN7_SO_DECL_HOLE_FLAG; + else + val |= decl->attr << GEN7_SO_DECL_REG_INDEX__SHIFT; + + decl_list[j] |= (uint64_t) val << (16 * i); + buffer_selects[i] |= 1 << decl->buffer; + } + } + + dw1 = buffer_selects[3] << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT | + buffer_selects[2] << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT | + buffer_selects[1] << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT | + buffer_selects[0] << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT; + dw2 = decl_counts[3] << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT | + decl_counts[2] << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT | + decl_counts[1] << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT | + decl_counts[0] << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT; + + STATIC_ASSERT(ARRAY_SIZE(so->so) >= 6); + so->so[4] = dw1; + so->so[5] = dw2; + + STATIC_ASSERT(ARRAY_SIZE(so->decl[0]) == 2); + memcpy(so->decl, decl_list, sizeof(so->decl[0]) * max_decl_count); + so->decl_count = max_decl_count; + + return true; +} + +bool +ilo_state_sol_init(struct ilo_state_sol *so, + const struct ilo_dev *dev, + const struct ilo_state_sol_info *info) +{ + bool ret = true; + + assert(ilo_is_zeroed(so, sizeof(*so))); + assert(ilo_is_zeroed(info->data, info->data_size)); + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + uint8_t max_decl_count, i; + + max_decl_count = info->streams[0].decl_count; + for (i = 1; i < ARRAY_SIZE(info->streams); i++) { + if (max_decl_count < info->streams[i].decl_count) + max_decl_count = info->streams[i].decl_count; + } + + assert(ilo_state_sol_data_size(dev, max_decl_count) <= info->data_size); + so->decl = (uint32_t (*)[2]) info->data; + + ret &= sol_set_gen7_3DSTATE_STREAMOUT(so, dev, info); + ret &= sol_set_gen7_3DSTATE_SO_DECL_LIST(so, dev, info, max_decl_count); + } + + assert(ret); + + return ret; +} + +bool +ilo_state_sol_init_disabled(struct ilo_state_sol *sol, + const struct ilo_dev *dev, + bool render_disable) +{ + struct ilo_state_sol_info info; + + memset(&info, 0, sizeof(info)); + info.render_disable = render_disable; + + return ilo_state_sol_init(sol, dev, &info); +} diff --git a/src/gallium/drivers/ilo/core/ilo_state_sol.h b/src/gallium/drivers/ilo/core/ilo_state_sol.h new file mode 100644 index 00000000000..c5c693e5e56 --- /dev/null +++ b/src/gallium/drivers/ilo/core/ilo_state_sol.h @@ -0,0 +1,126 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2015 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu <olv@lunarg.com> + */ + +#ifndef ILO_STATE_SOL_H +#define ILO_STATE_SOL_H + +#include "genhw/genhw.h" + +#include "ilo_core.h" +#include "ilo_dev.h" + +/* + * From the Ivy Bridge PRM, volume 2 part 1, page 193: + * + * "Incoming topologies are tagged with a 2-bit StreamID." + */ +#define ILO_STATE_SOL_MAX_STREAM_COUNT 4 + +/* + * From the Ivy Bridge PRM, volume 2 part 1, page 195: + * + * "Up to four SO buffers are supported." + */ +#define ILO_STATE_SOL_MAX_BUFFER_COUNT 4 + +/* + * From the Ivy Bridge PRM, volume 2 part 1, page 201: + * + * "All 128 decls..." + */ +#define ILO_STATE_SOL_MAX_DECL_COUNT 128 + +/** + * Output a vertex attribute. + */ +struct ilo_state_sol_decl_info { + /* select an attribute from read ones */ + uint8_t attr; + bool is_hole; + + /* which components to write */ + uint8_t component_base; + uint8_t component_count; + + /* destination buffer */ + uint8_t buffer; +}; + +struct ilo_state_sol_stream_info { + /* which VUE attributes to read */ + uint8_t cv_vue_attr_count; + uint8_t vue_read_base; + uint8_t vue_read_count; + + uint8_t decl_count; + const struct ilo_state_sol_decl_info *decls; +}; + +struct ilo_state_sol_info { + void *data; + size_t data_size; + + bool sol_enable; + bool stats_enable; + enum gen_reorder_mode tristrip_reorder; + + bool render_disable; + /* ignored when SOL is disabled */ + uint8_t render_stream; + + /* a buffer is disabled when its stride is zero */ + uint16_t buffer_strides[ILO_STATE_SOL_MAX_BUFFER_COUNT]; + + struct ilo_state_sol_stream_info streams[ILO_STATE_SOL_MAX_STREAM_COUNT]; +}; + +struct ilo_state_sol { + uint32_t so[6]; + + uint32_t (*decl)[2]; + uint8_t decl_count; +}; + +static inline size_t +ilo_state_sol_data_size(const struct ilo_dev *dev, uint8_t max_decl_count) +{ + const struct ilo_state_sol *so = NULL; + return (ilo_dev_gen(dev) >= ILO_GEN(7)) ? + sizeof(so->decl[0]) * max_decl_count : 0; +} + +bool +ilo_state_sol_init(struct ilo_state_sol *sol, + const struct ilo_dev *dev, + const struct ilo_state_sol_info *info); + +bool +ilo_state_sol_init_disabled(struct ilo_state_sol *sol, + const struct ilo_dev *dev, + bool render_disable); + +#endif /* ILO_STATE_SOL_H */ |