aboutsummaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2015-10-09 11:22:20 +0200
committerSamuel Pitoiset <[email protected]>2015-10-16 21:57:44 +0200
commitc4896c99cbe10b829981250465baf0b00e18ba40 (patch)
tree7d434918a651d126688dc266d928a095632466ce /src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
parent7abd707251f29aaf27f83644e47d2dc8b75e10c6 (diff)
nvc0: fix unaligned mem access when reading MP counters on Fermi
Memory access have to be aligned to 128-bits. Note that this doesn't happen when the card only has TPC. This patch fixes the following dmesg fail: gr: GPC0/TPC1/MP trap: global 00000004 [MULTIPLE_WARP_ERRORS] warp 000f [UNALIGNED_MEM_ACCESS] Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Ilia Mirkin <[email protected]>
Diffstat (limited to 'src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c')
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c18
1 files changed, 12 insertions, 6 deletions
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 2f827b0a944..8eb3b3ef14a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -247,7 +247,7 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] =
* mov b32 $r11 c0[0x4]
* ext u32 $r8 $r9 0x414
* (not $p0) exit
- * mul $r8 u32 $r8 u32 36
+ * mul $r8 u32 $r8 u32 48
* add b32 $r10 $c $r10 $r8
* add b32 $r11 $r11 0x0 $c
* mov b32 $r8 c0[0x8]
@@ -270,7 +270,7 @@ static const uint64_t nvc0_read_hw_sm_counters_code[] =
0x280040001002dde4ULL,
0x7000c01050921c03ULL,
0x80000000000021e7ULL,
- 0x1000000090821c02ULL,
+ 0x10000000c0821c02ULL,
0x4801000020a29c03ULL,
0x0800000000b2dc42ULL,
0x2800400020021de4ULL,
@@ -473,7 +473,7 @@ nvc0_hw_sm_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
/* set sequence field to 0 (used to check if result is available) */
for (i = 0; i < screen->mp_count; ++i) {
- const unsigned b = (0x24 / 4) * i;
+ const unsigned b = (0x30 / 4) * i;
hq->data[b + 8] = 0;
}
hq->sequence++;
@@ -617,7 +617,7 @@ nvc0_hw_sm_query_read_data(uint32_t count[32][8],
unsigned p, c;
for (p = 0; p < mp_count; ++p) {
- const unsigned b = (0x24 / 4) * p;
+ const unsigned b = (0x30 / 4) * p;
for (c = 0; c < cfg->num_counters; ++c) {
if (hq->data[b + 8] != hq->sequence) {
@@ -815,7 +815,10 @@ nvc0_hw_sm_create_query(struct nvc0_context *nvc0, unsigned type)
*/
space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t);
} else {
- /* for each MP:
+ /*
+ * Note that padding is used to align memory access to 128 bits.
+ *
+ * for each MP:
* [00] = MP.C0
* [04] = MP.C1
* [08] = MP.C2
@@ -825,8 +828,11 @@ nvc0_hw_sm_create_query(struct nvc0_context *nvc0, unsigned type)
* [18] = MP.C6
* [1c] = MP.C7
* [20] = MP.sequence
+ * [24] = padding
+ * [28] = padding
+ * [2c] = padding
*/
- space = (8 + 1) * nvc0->screen->mp_count * sizeof(uint32_t);
+ space = (8 + 1 + 3) * nvc0->screen->mp_count * sizeof(uint32_t);
}
if (!nvc0_hw_query_allocate(nvc0, &hq->base, space)) {