diff options
author | Ben Skeggs <[email protected]> | 2008-02-13 14:21:23 +1100 |
---|---|---|
committer | Ben Skeggs <[email protected]> | 2008-02-13 14:21:23 +1100 |
commit | b397a2bb203c2b28b746af7828d9ad192cde0bc1 (patch) | |
tree | 77448bf1db79ae112f057717109aaa98e5696b15 | |
parent | 5ba3dbe2cc8a9af5cae01f45eaf497f834400170 (diff) | |
parent | e20e89e48287808068086ec148920dd89495d813 (diff) |
Merge branch 'upstream-gallium-0.1' into darktama-gallium-0.1
53 files changed, 3298 insertions, 2092 deletions
diff --git a/progs/demos/lodbias.c b/progs/demos/lodbias.c index a4db22e26e4..c5a2a1b4573 100644 --- a/progs/demos/lodbias.c +++ b/progs/demos/lodbias.c @@ -159,6 +159,18 @@ static void Key( unsigned char key, int x, int y ) case 'B': Bias += 10; break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + Bias = 100.0 * (key - '0'); + break; case 27: exit(0); break; diff --git a/progs/tests/arbnpot.c b/progs/tests/arbnpot.c index 8107717e271..05ba85dad9e 100644 --- a/progs/tests/arbnpot.c +++ b/progs/tests/arbnpot.c @@ -113,44 +113,67 @@ static void Init( void ) minDim = imgWidth < imgHeight ? imgWidth : imgHeight; glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB, imgWidth, 0, - imgFormat, GL_UNSIGNED_BYTE, image); - assert(glGetError() == GL_NO_ERROR); + /* + * 1D Texture. Test proxy first, if that works, test non-proxy target. + */ glTexImage1D(GL_PROXY_TEXTURE_1D, 0, GL_RGB, imgWidth, 0, imgFormat, GL_UNSIGNED_BYTE, image); glGetTexLevelParameteriv(GL_PROXY_TEXTURE_1D, 0, GL_TEXTURE_WIDTH, &w); - assert(w == imgWidth); + assert(w == imgWidth || w == 0); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, imgWidth, imgHeight, 0, - imgFormat, GL_UNSIGNED_BYTE, image); - assert(glGetError() == GL_NO_ERROR); + if (w) { + glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB, imgWidth, 0, + imgFormat, GL_UNSIGNED_BYTE, image); + assert(glGetError() == GL_NO_ERROR); + } + + /* + * 2D Texture + */ glTexImage2D(GL_PROXY_TEXTURE_2D, 0, GL_RGB, imgWidth, imgHeight, 0, imgFormat, GL_UNSIGNED_BYTE, image); glGetTexLevelParameteriv(GL_PROXY_TEXTURE_2D, 0, GL_TEXTURE_WIDTH, &w); - assert(w == imgWidth); + assert(w == imgWidth || w == 0); - glTexImage3D(GL_TEXTURE_3D, 0, GL_RGB, imgWidth, imgHeight, 1, 0, - imgFormat, GL_UNSIGNED_BYTE, image); - assert(glGetError() == GL_NO_ERROR); + if (w) { + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, imgWidth, imgHeight, 0, + imgFormat, GL_UNSIGNED_BYTE, image); + assert(glGetError() == GL_NO_ERROR); + } + + /* + * 3D Texture + */ glTexImage3D(GL_PROXY_TEXTURE_3D, 0, GL_RGB, imgWidth, imgHeight, 1, 0, imgFormat, GL_UNSIGNED_BYTE, image); glGetTexLevelParameteriv(GL_PROXY_TEXTURE_3D, 0, GL_TEXTURE_WIDTH, &w); - assert(w == imgWidth); + assert(w == imgWidth || w == 0); + + if (w) { + glTexImage3D(GL_TEXTURE_3D, 0, GL_RGB, imgWidth, imgHeight, 1, 0, + imgFormat, GL_UNSIGNED_BYTE, image); + assert(glGetError() == GL_NO_ERROR); + } - glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, GL_RGB, - minDim, minDim, 0, - imgFormat, GL_UNSIGNED_BYTE, image); - assert(glGetError() == GL_NO_ERROR); + /* + * Cube Texture + */ glTexImage2D(GL_PROXY_TEXTURE_CUBE_MAP, 0, GL_RGB, minDim, minDim, 0, imgFormat, GL_UNSIGNED_BYTE, image); glGetTexLevelParameteriv(GL_PROXY_TEXTURE_CUBE_MAP, 0, GL_TEXTURE_WIDTH, &w); - assert(w == minDim); + assert(w == minDim || w == 0); + if (w) { + glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, GL_RGB, + minDim, minDim, 0, + imgFormat, GL_UNSIGNED_BYTE, image); + assert(glGetError() == GL_NO_ERROR); + } glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); diff --git a/progs/tests/mipmap_limits.c b/progs/tests/mipmap_limits.c index dc066cab1f0..d6d6e467b0f 100644 --- a/progs/tests/mipmap_limits.c +++ b/progs/tests/mipmap_limits.c @@ -131,8 +131,8 @@ static void display(void) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, BaseLevel); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, MaxLevel); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_LOD, MinLod); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LOD, MaxLod); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_LOD, MinLod); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LOD, MaxLod); if (NearestFilter) { glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); @@ -194,22 +194,22 @@ key(unsigned char k, int x, int y) MaxLevel = 10; break; case 'l': - LodBias -= 0.02; + LodBias -= 0.25; break; case 'L': - LodBias += 0.02; + LodBias += 0.25; break; case 'n': - MinLod -= 0.02; + MinLod -= 0.25; break; case 'N': - MinLod += 0.02; + MinLod += 0.25; break; case 'x': - MaxLod -= 0.02; + MaxLod -= 0.25; break; case 'X': - MaxLod += 0.02; + MaxLod += 0.25; break; case 'f': NearestFilter = !NearestFilter; diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c index 1e61829e8f1..d3d19589518 100644 --- a/src/mesa/main/mipmap.c +++ b/src/mesa/main/mipmap.c @@ -36,462 +36,377 @@ +static GLint +bytes_per_pixel(GLenum datatype, GLuint comps) +{ + GLint b = _mesa_sizeof_packed_type(datatype); + assert(b >= 0); + return b * comps; +} + + /** * Average together two rows of a source image to produce a single new * row in the dest image. It's legal for the two source rows to point * to the same data. The source width must be equal to either the * dest width or two times the dest width. + * \param datatype GL_UNSIGNED_BYTE, GL_UNSIGNED_SHORT, GL_FLOAT, etc. + * \param comps number of components per pixel (1..4) */ static void -do_row(const struct gl_texture_format *format, GLint srcWidth, +do_row(GLenum datatype, GLuint comps, GLint srcWidth, const GLvoid *srcRowA, const GLvoid *srcRowB, GLint dstWidth, GLvoid *dstRow) { const GLuint k0 = (srcWidth == dstWidth) ? 0 : 1; const GLuint colStride = (srcWidth == dstWidth) ? 1 : 2; + ASSERT(comps >= 1); + ASSERT(comps <= 4); + /* This assertion is no longer valid with non-power-of-2 textures assert(srcWidth == dstWidth || srcWidth == 2 * dstWidth); */ - switch (format->MesaFormat) { - case MESA_FORMAT_RGBA: - { - GLuint i, j, k; - const GLchan (*rowA)[4] = (const GLchan (*)[4]) srcRowA; - const GLchan (*rowB)[4] = (const GLchan (*)[4]) srcRowB; - GLchan (*dst)[4] = (GLchan (*)[4]) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - dst[i][0] = (rowA[j][0] + rowA[k][0] + - rowB[j][0] + rowB[k][0]) / 4; - dst[i][1] = (rowA[j][1] + rowA[k][1] + - rowB[j][1] + rowB[k][1]) / 4; - dst[i][2] = (rowA[j][2] + rowA[k][2] + - rowB[j][2] + rowB[k][2]) / 4; - dst[i][3] = (rowA[j][3] + rowA[k][3] + - rowB[j][3] + rowB[k][3]) / 4; - } + if (datatype == GL_UNSIGNED_BYTE && comps == 4) { + GLuint i, j, k; + const GLubyte(*rowA)[4] = (const GLubyte(*)[4]) srcRowA; + const GLubyte(*rowB)[4] = (const GLubyte(*)[4]) srcRowB; + GLubyte(*dst)[4] = (GLubyte(*)[4]) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4; + dst[i][3] = (rowA[j][3] + rowA[k][3] + rowB[j][3] + rowB[k][3]) / 4; } - return; - case MESA_FORMAT_RGB: - { - GLuint i, j, k; - const GLchan (*rowA)[3] = (const GLchan (*)[3]) srcRowA; - const GLchan (*rowB)[3] = (const GLchan (*)[3]) srcRowB; - GLchan (*dst)[3] = (GLchan (*)[3]) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - dst[i][0] = (rowA[j][0] + rowA[k][0] + - rowB[j][0] + rowB[k][0]) / 4; - dst[i][1] = (rowA[j][1] + rowA[k][1] + - rowB[j][1] + rowB[k][1]) / 4; - dst[i][2] = (rowA[j][2] + rowA[k][2] + - rowB[j][2] + rowB[k][2]) / 4; - } - } - return; - case MESA_FORMAT_ALPHA: - case MESA_FORMAT_LUMINANCE: - case MESA_FORMAT_INTENSITY: - { - GLuint i, j, k; - const GLchan *rowA = (const GLchan *) srcRowA; - const GLchan *rowB = (const GLchan *) srcRowB; - GLchan *dst = (GLchan *) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) / 4; - } + } + else if (datatype == GL_UNSIGNED_BYTE && comps == 3) { + GLuint i, j, k; + const GLubyte(*rowA)[3] = (const GLubyte(*)[3]) srcRowA; + const GLubyte(*rowB)[3] = (const GLubyte(*)[3]) srcRowB; + GLubyte(*dst)[3] = (GLubyte(*)[3]) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4; } - return; - case MESA_FORMAT_LUMINANCE_ALPHA: - { - GLuint i, j, k; - const GLchan (*rowA)[2] = (const GLchan (*)[2]) srcRowA; - const GLchan (*rowB)[2] = (const GLchan (*)[2]) srcRowB; - GLchan (*dst)[2] = (GLchan (*)[2]) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - dst[i][0] = (rowA[j][0] + rowA[k][0] + - rowB[j][0] + rowB[k][0]) / 4; - dst[i][1] = (rowA[j][1] + rowA[k][1] + - rowB[j][1] + rowB[k][1]) / 4; - } + } + else if (datatype == GL_UNSIGNED_BYTE && comps == 2) { + GLuint i, j, k; + const GLubyte(*rowA)[2] = (const GLubyte(*)[2]) srcRowA; + const GLubyte(*rowB)[2] = (const GLubyte(*)[2]) srcRowB; + GLubyte(*dst)[2] = (GLubyte(*)[2]) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) >> 2; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) >> 2; } - return; - case MESA_FORMAT_Z32: - { - GLuint i, j, k; - const GLuint *rowA = (const GLuint *) srcRowA; - const GLuint *rowB = (const GLuint *) srcRowB; - GLfloat *dst = (GLfloat *) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - dst[i] = rowA[j] / 4 + rowA[k] / 4 + rowB[j] / 4 + rowB[k] / 4; - } + } + else if (datatype == GL_UNSIGNED_BYTE && comps == 1) { + GLuint i, j, k; + const GLubyte *rowA = (const GLubyte *) srcRowA; + const GLubyte *rowB = (const GLubyte *) srcRowB; + GLubyte *dst = (GLubyte *) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) >> 2; } - return; - case MESA_FORMAT_Z16: - { - GLuint i, j, k; - const GLushort *rowA = (const GLushort *) srcRowA; - const GLushort *rowB = (const GLushort *) srcRowB; - GLushort *dst = (GLushort *) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) / 4; - } + } + + else if (datatype == GL_UNSIGNED_SHORT && comps == 4) { + GLuint i, j, k; + const GLushort(*rowA)[4] = (const GLushort(*)[4]) srcRowA; + const GLushort(*rowB)[4] = (const GLushort(*)[4]) srcRowB; + GLushort(*dst)[4] = (GLushort(*)[4]) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4; + dst[i][3] = (rowA[j][3] + rowA[k][3] + rowB[j][3] + rowB[k][3]) / 4; } - return; - /* Begin hardware formats */ - case MESA_FORMAT_RGBA8888: - case MESA_FORMAT_RGBA8888_REV: - case MESA_FORMAT_ARGB8888: - case MESA_FORMAT_ARGB8888_REV: -#if FEATURE_EXT_texture_sRGB - case MESA_FORMAT_SRGBA8: -#endif - { - GLuint i, j, k; - const GLubyte (*rowA)[4] = (const GLubyte (*)[4]) srcRowA; - const GLubyte (*rowB)[4] = (const GLubyte (*)[4]) srcRowB; - GLubyte (*dst)[4] = (GLubyte (*)[4]) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - dst[i][0] = (rowA[j][0] + rowA[k][0] + - rowB[j][0] + rowB[k][0]) / 4; - dst[i][1] = (rowA[j][1] + rowA[k][1] + - rowB[j][1] + rowB[k][1]) / 4; - dst[i][2] = (rowA[j][2] + rowA[k][2] + - rowB[j][2] + rowB[k][2]) / 4; - dst[i][3] = (rowA[j][3] + rowA[k][3] + - rowB[j][3] + rowB[k][3]) / 4; - } + } + else if (datatype == GL_UNSIGNED_SHORT && comps == 3) { + GLuint i, j, k; + const GLushort(*rowA)[3] = (const GLushort(*)[3]) srcRowA; + const GLushort(*rowB)[3] = (const GLushort(*)[3]) srcRowB; + GLushort(*dst)[3] = (GLushort(*)[3]) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4; } - return; - case MESA_FORMAT_RGB888: - case MESA_FORMAT_BGR888: -#if FEATURE_EXT_texture_sRGB - case MESA_FORMAT_SRGB8: -#endif - { - GLuint i, j, k; - const GLubyte (*rowA)[3] = (const GLubyte (*)[3]) srcRowA; - const GLubyte (*rowB)[3] = (const GLubyte (*)[3]) srcRowB; - GLubyte (*dst)[3] = (GLubyte (*)[3]) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - dst[i][0] = (rowA[j][0] + rowA[k][0] + - rowB[j][0] + rowB[k][0]) / 4; - dst[i][1] = (rowA[j][1] + rowA[k][1] + - rowB[j][1] + rowB[k][1]) / 4; - dst[i][2] = (rowA[j][2] + rowA[k][2] + - rowB[j][2] + rowB[k][2]) / 4; - } + } + else if (datatype == GL_UNSIGNED_SHORT && comps == 2) { + GLuint i, j, k; + const GLushort(*rowA)[2] = (const GLushort(*)[2]) srcRowA; + const GLushort(*rowB)[2] = (const GLushort(*)[2]) srcRowB; + GLushort(*dst)[2] = (GLushort(*)[2]) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; } - return; - case MESA_FORMAT_RGB565: - case MESA_FORMAT_RGB565_REV: - { - GLuint i, j, k; - const GLushort *rowA = (const GLushort *) srcRowA; - const GLushort *rowB = (const GLushort *) srcRowB; - GLushort *dst = (GLushort *) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - const GLint rowAr0 = rowA[j] & 0x1f; - const GLint rowAr1 = rowA[k] & 0x1f; - const GLint rowBr0 = rowB[j] & 0x1f; - const GLint rowBr1 = rowB[k] & 0x1f; - const GLint rowAg0 = (rowA[j] >> 5) & 0x3f; - const GLint rowAg1 = (rowA[k] >> 5) & 0x3f; - const GLint rowBg0 = (rowB[j] >> 5) & 0x3f; - const GLint rowBg1 = (rowB[k] >> 5) & 0x3f; - const GLint rowAb0 = (rowA[j] >> 11) & 0x1f; - const GLint rowAb1 = (rowA[k] >> 11) & 0x1f; - const GLint rowBb0 = (rowB[j] >> 11) & 0x1f; - const GLint rowBb1 = (rowB[k] >> 11) & 0x1f; - const GLint red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; - const GLint green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; - const GLint blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; - dst[i] = (blue << 11) | (green << 5) | red; - } + } + else if (datatype == GL_UNSIGNED_SHORT && comps == 1) { + GLuint i, j, k; + const GLushort *rowA = (const GLushort *) srcRowA; + const GLushort *rowB = (const GLushort *) srcRowB; + GLushort *dst = (GLushort *) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) / 4; } - return; - case MESA_FORMAT_ARGB4444: - case MESA_FORMAT_ARGB4444_REV: - { - GLuint i, j, k; - const GLushort *rowA = (const GLushort *) srcRowA; - const GLushort *rowB = (const GLushort *) srcRowB; - GLushort *dst = (GLushort *) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - const GLint rowAr0 = rowA[j] & 0xf; - const GLint rowAr1 = rowA[k] & 0xf; - const GLint rowBr0 = rowB[j] & 0xf; - const GLint rowBr1 = rowB[k] & 0xf; - const GLint rowAg0 = (rowA[j] >> 4) & 0xf; - const GLint rowAg1 = (rowA[k] >> 4) & 0xf; - const GLint rowBg0 = (rowB[j] >> 4) & 0xf; - const GLint rowBg1 = (rowB[k] >> 4) & 0xf; - const GLint rowAb0 = (rowA[j] >> 8) & 0xf; - const GLint rowAb1 = (rowA[k] >> 8) & 0xf; - const GLint rowBb0 = (rowB[j] >> 8) & 0xf; - const GLint rowBb1 = (rowB[k] >> 8) & 0xf; - const GLint rowAa0 = (rowA[j] >> 12) & 0xf; - const GLint rowAa1 = (rowA[k] >> 12) & 0xf; - const GLint rowBa0 = (rowB[j] >> 12) & 0xf; - const GLint rowBa1 = (rowB[k] >> 12) & 0xf; - const GLint red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; - const GLint green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; - const GLint blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; - const GLint alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2; - dst[i] = (alpha << 12) | (blue << 8) | (green << 4) | red; - } + } + + else if (datatype == GL_FLOAT && comps == 4) { + GLuint i, j, k; + const GLfloat(*rowA)[4] = (const GLfloat(*)[4]) srcRowA; + const GLfloat(*rowB)[4] = (const GLfloat(*)[4]) srcRowB; + GLfloat(*dst)[4] = (GLfloat(*)[4]) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + + rowB[j][0] + rowB[k][0]) * 0.25F; + dst[i][1] = (rowA[j][1] + rowA[k][1] + + rowB[j][1] + rowB[k][1]) * 0.25F; + dst[i][2] = (rowA[j][2] + rowA[k][2] + + rowB[j][2] + rowB[k][2]) * 0.25F; + dst[i][3] = (rowA[j][3] + rowA[k][3] + + rowB[j][3] + rowB[k][3]) * 0.25F; } - return; - case MESA_FORMAT_ARGB1555: - case MESA_FORMAT_ARGB1555_REV: /* XXX broken? */ - { - GLuint i, j, k; - const GLushort *rowA = (const GLushort *) srcRowA; - const GLushort *rowB = (const GLushort *) srcRowB; - GLushort *dst = (GLushort *) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - const GLint rowAr0 = rowA[j] & 0x1f; - const GLint rowAr1 = rowA[k] & 0x1f; - const GLint rowBr0 = rowB[j] & 0x1f; - const GLint rowBr1 = rowB[k] & 0xf; - const GLint rowAg0 = (rowA[j] >> 5) & 0x1f; - const GLint rowAg1 = (rowA[k] >> 5) & 0x1f; - const GLint rowBg0 = (rowB[j] >> 5) & 0x1f; - const GLint rowBg1 = (rowB[k] >> 5) & 0x1f; - const GLint rowAb0 = (rowA[j] >> 10) & 0x1f; - const GLint rowAb1 = (rowA[k] >> 10) & 0x1f; - const GLint rowBb0 = (rowB[j] >> 10) & 0x1f; - const GLint rowBb1 = (rowB[k] >> 10) & 0x1f; - const GLint rowAa0 = (rowA[j] >> 15) & 0x1; - const GLint rowAa1 = (rowA[k] >> 15) & 0x1; - const GLint rowBa0 = (rowB[j] >> 15) & 0x1; - const GLint rowBa1 = (rowB[k] >> 15) & 0x1; - const GLint red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; - const GLint green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; - const GLint blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; - const GLint alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2; - dst[i] = (alpha << 15) | (blue << 10) | (green << 5) | red; - } + } + else if (datatype == GL_FLOAT && comps == 3) { + GLuint i, j, k; + const GLfloat(*rowA)[3] = (const GLfloat(*)[3]) srcRowA; + const GLfloat(*rowB)[3] = (const GLfloat(*)[3]) srcRowB; + GLfloat(*dst)[3] = (GLfloat(*)[3]) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + + rowB[j][0] + rowB[k][0]) * 0.25F; + dst[i][1] = (rowA[j][1] + rowA[k][1] + + rowB[j][1] + rowB[k][1]) * 0.25F; + dst[i][2] = (rowA[j][2] + rowA[k][2] + + rowB[j][2] + rowB[k][2]) * 0.25F; } - return; - case MESA_FORMAT_AL88: - case MESA_FORMAT_AL88_REV: -#if FEATURE_EXT_texture_sRGB - case MESA_FORMAT_SLA8: -#endif - { - GLuint i, j, k; - const GLubyte (*rowA)[2] = (const GLubyte (*)[2]) srcRowA; - const GLubyte (*rowB)[2] = (const GLubyte (*)[2]) srcRowB; - GLubyte (*dst)[2] = (GLubyte (*)[2]) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - dst[i][0] = (rowA[j][0] + rowA[k][0] + - rowB[j][0] + rowB[k][0]) >> 2; - dst[i][1] = (rowA[j][1] + rowA[k][1] + - rowB[j][1] + rowB[k][1]) >> 2; - } + } + else if (datatype == GL_FLOAT && comps == 2) { + GLuint i, j, k; + const GLfloat(*rowA)[2] = (const GLfloat(*)[2]) srcRowA; + const GLfloat(*rowB)[2] = (const GLfloat(*)[2]) srcRowB; + GLfloat(*dst)[2] = (GLfloat(*)[2]) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + + rowB[j][0] + rowB[k][0]) * 0.25F; + dst[i][1] = (rowA[j][1] + rowA[k][1] + + rowB[j][1] + rowB[k][1]) * 0.25F; } - return; - case MESA_FORMAT_RGB332: - { - GLuint i, j, k; - const GLubyte *rowA = (const GLubyte *) srcRowA; - const GLubyte *rowB = (const GLubyte *) srcRowB; - GLubyte *dst = (GLubyte *) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - const GLint rowAr0 = rowA[j] & 0x3; - const GLint rowAr1 = rowA[k] & 0x3; - const GLint rowBr0 = rowB[j] & 0x3; - const GLint rowBr1 = rowB[k] & 0x3; - const GLint rowAg0 = (rowA[j] >> 2) & 0x7; - const GLint rowAg1 = (rowA[k] >> 2) & 0x7; - const GLint rowBg0 = (rowB[j] >> 2) & 0x7; - const GLint rowBg1 = (rowB[k] >> 2) & 0x7; - const GLint rowAb0 = (rowA[j] >> 5) & 0x7; - const GLint rowAb1 = (rowA[k] >> 5) & 0x7; - const GLint rowBb0 = (rowB[j] >> 5) & 0x7; - const GLint rowBb1 = (rowB[k] >> 5) & 0x7; - const GLint red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; - const GLint green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; - const GLint blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; - dst[i] = (blue << 5) | (green << 2) | red; - } + } + else if (datatype == GL_FLOAT && comps == 1) { + GLuint i, j, k; + const GLfloat *rowA = (const GLfloat *) srcRowA; + const GLfloat *rowB = (const GLfloat *) srcRowB; + GLfloat *dst = (GLfloat *) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) * 0.25F; } - return; - case MESA_FORMAT_A8: - case MESA_FORMAT_L8: - case MESA_FORMAT_I8: - case MESA_FORMAT_CI8: -#if FEATURE_EXT_texture_sRGB - case MESA_FORMAT_SL8: -#endif - { - GLuint i, j, k; - const GLubyte *rowA = (const GLubyte *) srcRowA; - const GLubyte *rowB = (const GLubyte *) srcRowB; - GLubyte *dst = (GLubyte *) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) >> 2; + } + + else if (datatype == GL_HALF_FLOAT_ARB && comps == 4) { + GLuint i, j, k, comp; + const GLhalfARB(*rowA)[4] = (const GLhalfARB(*)[4]) srcRowA; + const GLhalfARB(*rowB)[4] = (const GLhalfARB(*)[4]) srcRowB; + GLhalfARB(*dst)[4] = (GLhalfARB(*)[4]) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + for (comp = 0; comp < 4; comp++) { + GLfloat aj, ak, bj, bk; + aj = _mesa_half_to_float(rowA[j][comp]); + ak = _mesa_half_to_float(rowA[k][comp]); + bj = _mesa_half_to_float(rowB[j][comp]); + bk = _mesa_half_to_float(rowB[k][comp]); + dst[i][comp] = _mesa_float_to_half((aj + ak + bj + bk) * 0.25F); } } - return; - case MESA_FORMAT_RGBA_FLOAT32: - { - GLuint i, j, k; - const GLfloat (*rowA)[4] = (const GLfloat (*)[4]) srcRowA; - const GLfloat (*rowB)[4] = (const GLfloat (*)[4]) srcRowB; - GLfloat (*dst)[4] = (GLfloat (*)[4]) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - dst[i][0] = (rowA[j][0] + rowA[k][0] + - rowB[j][0] + rowB[k][0]) * 0.25F; - dst[i][1] = (rowA[j][1] + rowA[k][1] + - rowB[j][1] + rowB[k][1]) * 0.25F; - dst[i][2] = (rowA[j][2] + rowA[k][2] + - rowB[j][2] + rowB[k][2]) * 0.25F; - dst[i][3] = (rowA[j][3] + rowA[k][3] + - rowB[j][3] + rowB[k][3]) * 0.25F; + } + else if (datatype == GL_HALF_FLOAT_ARB && comps == 3) { + GLuint i, j, k, comp; + const GLhalfARB(*rowA)[3] = (const GLhalfARB(*)[3]) srcRowA; + const GLhalfARB(*rowB)[3] = (const GLhalfARB(*)[3]) srcRowB; + GLhalfARB(*dst)[3] = (GLhalfARB(*)[3]) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + for (comp = 0; comp < 3; comp++) { + GLfloat aj, ak, bj, bk; + aj = _mesa_half_to_float(rowA[j][comp]); + ak = _mesa_half_to_float(rowA[k][comp]); + bj = _mesa_half_to_float(rowB[j][comp]); + bk = _mesa_half_to_float(rowB[k][comp]); + dst[i][comp] = _mesa_float_to_half((aj + ak + bj + bk) * 0.25F); } } - return; - case MESA_FORMAT_RGBA_FLOAT16: - { - GLuint i, j, k, comp; - const GLhalfARB (*rowA)[4] = (const GLhalfARB (*)[4]) srcRowA; - const GLhalfARB (*rowB)[4] = (const GLhalfARB (*)[4]) srcRowB; - GLhalfARB (*dst)[4] = (GLhalfARB (*)[4]) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - for (comp = 0; comp < 4; comp++) { - GLfloat aj, ak, bj, bk; - aj = _mesa_half_to_float(rowA[j][comp]); - ak = _mesa_half_to_float(rowA[k][comp]); - bj = _mesa_half_to_float(rowB[j][comp]); - bk = _mesa_half_to_float(rowB[k][comp]); - dst[i][comp] = _mesa_float_to_half((aj + ak + bj + bk) * 0.25F); - } + } + else if (datatype == GL_HALF_FLOAT_ARB && comps == 2) { + GLuint i, j, k, comp; + const GLhalfARB(*rowA)[2] = (const GLhalfARB(*)[2]) srcRowA; + const GLhalfARB(*rowB)[2] = (const GLhalfARB(*)[2]) srcRowB; + GLhalfARB(*dst)[2] = (GLhalfARB(*)[2]) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + for (comp = 0; comp < 2; comp++) { + GLfloat aj, ak, bj, bk; + aj = _mesa_half_to_float(rowA[j][comp]); + ak = _mesa_half_to_float(rowA[k][comp]); + bj = _mesa_half_to_float(rowB[j][comp]); + bk = _mesa_half_to_float(rowB[k][comp]); + dst[i][comp] = _mesa_float_to_half((aj + ak + bj + bk) * 0.25F); } } - return; - case MESA_FORMAT_RGB_FLOAT32: - { - GLuint i, j, k; - const GLfloat (*rowA)[3] = (const GLfloat (*)[3]) srcRowA; - const GLfloat (*rowB)[3] = (const GLfloat (*)[3]) srcRowB; - GLfloat (*dst)[3] = (GLfloat (*)[3]) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - dst[i][0] = (rowA[j][0] + rowA[k][0] + - rowB[j][0] + rowB[k][0]) * 0.25F; - dst[i][1] = (rowA[j][1] + rowA[k][1] + - rowB[j][1] + rowB[k][1]) * 0.25F; - dst[i][2] = (rowA[j][2] + rowA[k][2] + - rowB[j][2] + rowB[k][2]) * 0.25F; - } + } + else if (datatype == GL_HALF_FLOAT_ARB && comps == 1) { + GLuint i, j, k; + const GLhalfARB *rowA = (const GLhalfARB *) srcRowA; + const GLhalfARB *rowB = (const GLhalfARB *) srcRowB; + GLhalfARB *dst = (GLhalfARB *) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + GLfloat aj, ak, bj, bk; + aj = _mesa_half_to_float(rowA[j]); + ak = _mesa_half_to_float(rowA[k]); + bj = _mesa_half_to_float(rowB[j]); + bk = _mesa_half_to_float(rowB[k]); + dst[i] = _mesa_float_to_half((aj + ak + bj + bk) * 0.25F); } - return; - case MESA_FORMAT_RGB_FLOAT16: - { - GLuint i, j, k, comp; - const GLhalfARB (*rowA)[3] = (const GLhalfARB (*)[3]) srcRowA; - const GLhalfARB (*rowB)[3] = (const GLhalfARB (*)[3]) srcRowB; - GLhalfARB (*dst)[3] = (GLhalfARB (*)[3]) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - for (comp = 0; comp < 3; comp++) { - GLfloat aj, ak, bj, bk; - aj = _mesa_half_to_float(rowA[j][comp]); - ak = _mesa_half_to_float(rowA[k][comp]); - bj = _mesa_half_to_float(rowB[j][comp]); - bk = _mesa_half_to_float(rowB[k][comp]); - dst[i][comp] = _mesa_float_to_half((aj + ak + bj + bk) * 0.25F); - } - } + } + + else if (datatype == GL_UNSIGNED_INT && comps == 1) { + GLuint i, j, k; + const GLuint *rowA = (const GLuint *) srcRowA; + const GLuint *rowB = (const GLuint *) srcRowB; + GLfloat *dst = (GLfloat *) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i] = rowA[j] / 4 + rowA[k] / 4 + rowB[j] / 4 + rowB[k] / 4; } - return; - case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32: - { - GLuint i, j, k; - const GLfloat (*rowA)[2] = (const GLfloat (*)[2]) srcRowA; - const GLfloat (*rowB)[2] = (const GLfloat (*)[2]) srcRowB; - GLfloat (*dst)[2] = (GLfloat (*)[2]) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - dst[i][0] = (rowA[j][0] + rowA[k][0] + - rowB[j][0] + rowB[k][0]) * 0.25F; - dst[i][1] = (rowA[j][1] + rowA[k][1] + - rowB[j][1] + rowB[k][1]) * 0.25F; - } + } + + else if (datatype == GL_UNSIGNED_SHORT_5_6_5 && comps == 3) { + GLuint i, j, k; + const GLushort *rowA = (const GLushort *) srcRowA; + const GLushort *rowB = (const GLushort *) srcRowB; + GLushort *dst = (GLushort *) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + const GLint rowAr0 = rowA[j] & 0x1f; + const GLint rowAr1 = rowA[k] & 0x1f; + const GLint rowBr0 = rowB[j] & 0x1f; + const GLint rowBr1 = rowB[k] & 0x1f; + const GLint rowAg0 = (rowA[j] >> 5) & 0x3f; + const GLint rowAg1 = (rowA[k] >> 5) & 0x3f; + const GLint rowBg0 = (rowB[j] >> 5) & 0x3f; + const GLint rowBg1 = (rowB[k] >> 5) & 0x3f; + const GLint rowAb0 = (rowA[j] >> 11) & 0x1f; + const GLint rowAb1 = (rowA[k] >> 11) & 0x1f; + const GLint rowBb0 = (rowB[j] >> 11) & 0x1f; + const GLint rowBb1 = (rowB[k] >> 11) & 0x1f; + const GLint red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; + const GLint green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; + const GLint blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; + dst[i] = (blue << 11) | (green << 5) | red; } - return; - case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: - { - GLuint i, j, k, comp; - const GLhalfARB (*rowA)[2] = (const GLhalfARB (*)[2]) srcRowA; - const GLhalfARB (*rowB)[2] = (const GLhalfARB (*)[2]) srcRowB; - GLhalfARB (*dst)[2] = (GLhalfARB (*)[2]) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - for (comp = 0; comp < 2; comp++) { - GLfloat aj, ak, bj, bk; - aj = _mesa_half_to_float(rowA[j][comp]); - ak = _mesa_half_to_float(rowA[k][comp]); - bj = _mesa_half_to_float(rowB[j][comp]); - bk = _mesa_half_to_float(rowB[k][comp]); - dst[i][comp] = _mesa_float_to_half((aj + ak + bj + bk) * 0.25F); - } - } + } + else if (datatype == GL_UNSIGNED_SHORT_4_4_4_4 && comps == 4) { + GLuint i, j, k; + const GLushort *rowA = (const GLushort *) srcRowA; + const GLushort *rowB = (const GLushort *) srcRowB; + GLushort *dst = (GLushort *) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + const GLint rowAr0 = rowA[j] & 0xf; + const GLint rowAr1 = rowA[k] & 0xf; + const GLint rowBr0 = rowB[j] & 0xf; + const GLint rowBr1 = rowB[k] & 0xf; + const GLint rowAg0 = (rowA[j] >> 4) & 0xf; + const GLint rowAg1 = (rowA[k] >> 4) & 0xf; + const GLint rowBg0 = (rowB[j] >> 4) & 0xf; + const GLint rowBg1 = (rowB[k] >> 4) & 0xf; + const GLint rowAb0 = (rowA[j] >> 8) & 0xf; + const GLint rowAb1 = (rowA[k] >> 8) & 0xf; + const GLint rowBb0 = (rowB[j] >> 8) & 0xf; + const GLint rowBb1 = (rowB[k] >> 8) & 0xf; + const GLint rowAa0 = (rowA[j] >> 12) & 0xf; + const GLint rowAa1 = (rowA[k] >> 12) & 0xf; + const GLint rowBa0 = (rowB[j] >> 12) & 0xf; + const GLint rowBa1 = (rowB[k] >> 12) & 0xf; + const GLint red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; + const GLint green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; + const GLint blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; + const GLint alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2; + dst[i] = (alpha << 12) | (blue << 8) | (green << 4) | red; } - return; - case MESA_FORMAT_ALPHA_FLOAT32: - case MESA_FORMAT_LUMINANCE_FLOAT32: - case MESA_FORMAT_INTENSITY_FLOAT32: - { - GLuint i, j, k; - const GLfloat *rowA = (const GLfloat *) srcRowA; - const GLfloat *rowB = (const GLfloat *) srcRowB; - GLfloat *dst = (GLfloat *) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) * 0.25F; - } + } + else if (datatype == GL_UNSIGNED_SHORT_1_5_5_5_REV && comps == 4) { + GLuint i, j, k; + const GLushort *rowA = (const GLushort *) srcRowA; + const GLushort *rowB = (const GLushort *) srcRowB; + GLushort *dst = (GLushort *) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + const GLint rowAr0 = rowA[j] & 0x1f; + const GLint rowAr1 = rowA[k] & 0x1f; + const GLint rowBr0 = rowB[j] & 0x1f; + const GLint rowBr1 = rowB[k] & 0xf; + const GLint rowAg0 = (rowA[j] >> 5) & 0x1f; + const GLint rowAg1 = (rowA[k] >> 5) & 0x1f; + const GLint rowBg0 = (rowB[j] >> 5) & 0x1f; + const GLint rowBg1 = (rowB[k] >> 5) & 0x1f; + const GLint rowAb0 = (rowA[j] >> 10) & 0x1f; + const GLint rowAb1 = (rowA[k] >> 10) & 0x1f; + const GLint rowBb0 = (rowB[j] >> 10) & 0x1f; + const GLint rowBb1 = (rowB[k] >> 10) & 0x1f; + const GLint rowAa0 = (rowA[j] >> 15) & 0x1; + const GLint rowAa1 = (rowA[k] >> 15) & 0x1; + const GLint rowBa0 = (rowB[j] >> 15) & 0x1; + const GLint rowBa1 = (rowB[k] >> 15) & 0x1; + const GLint red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; + const GLint green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; + const GLint blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; + const GLint alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2; + dst[i] = (alpha << 15) | (blue << 10) | (green << 5) | red; } - return; - case MESA_FORMAT_ALPHA_FLOAT16: - case MESA_FORMAT_LUMINANCE_FLOAT16: - case MESA_FORMAT_INTENSITY_FLOAT16: - { - GLuint i, j, k; - const GLhalfARB *rowA = (const GLhalfARB *) srcRowA; - const GLhalfARB *rowB = (const GLhalfARB *) srcRowB; - GLhalfARB *dst = (GLhalfARB *) dstRow; - for (i = j = 0, k = k0; i < (GLuint) dstWidth; - i++, j += colStride, k += colStride) { - GLfloat aj, ak, bj, bk; - aj = _mesa_half_to_float(rowA[j]); - ak = _mesa_half_to_float(rowA[k]); - bj = _mesa_half_to_float(rowB[j]); - bk = _mesa_half_to_float(rowB[k]); - dst[i] = _mesa_float_to_half((aj + ak + bj + bk) * 0.25F); - } + } + else if (datatype == GL_UNSIGNED_BYTE_3_3_2 && comps == 3) { + GLuint i, j, k; + const GLubyte *rowA = (const GLubyte *) srcRowA; + const GLubyte *rowB = (const GLubyte *) srcRowB; + GLubyte *dst = (GLubyte *) dstRow; + for (i = j = 0, k = k0; i < (GLuint) dstWidth; + i++, j += colStride, k += colStride) { + const GLint rowAr0 = rowA[j] & 0x3; + const GLint rowAr1 = rowA[k] & 0x3; + const GLint rowBr0 = rowB[j] & 0x3; + const GLint rowBr1 = rowB[k] & 0x3; + const GLint rowAg0 = (rowA[j] >> 2) & 0x7; + const GLint rowAg1 = (rowA[k] >> 2) & 0x7; + const GLint rowBg0 = (rowB[j] >> 2) & 0x7; + const GLint rowBg1 = (rowB[k] >> 2) & 0x7; + const GLint rowAb0 = (rowA[j] >> 5) & 0x7; + const GLint rowAb1 = (rowA[k] >> 5) & 0x7; + const GLint rowBb0 = (rowB[j] >> 5) & 0x7; + const GLint rowBb1 = (rowB[k] >> 5) & 0x7; + const GLint red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; + const GLint green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; + const GLint blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; + dst[i] = (blue << 5) | (green << 2) | red; } - return; - - default: + } + else { _mesa_problem(NULL, "bad format in do_row()"); } } @@ -504,11 +419,11 @@ do_row(const struct gl_texture_format *format, GLint srcWidth, */ static void -make_1d_mipmap(const struct gl_texture_format *format, GLint border, +make_1d_mipmap(GLenum datatype, GLuint comps, GLint border, GLint srcWidth, const GLubyte *srcPtr, GLint dstWidth, GLubyte *dstPtr) { - const GLint bpt = format->TexelBytes; + const GLint bpt = bytes_per_pixel(datatype, comps); const GLubyte *src; GLubyte *dst; @@ -517,7 +432,7 @@ make_1d_mipmap(const struct gl_texture_format *format, GLint border, dst = dstPtr + border * bpt; /* we just duplicate the input row, kind of hack, saves code */ - do_row(format, srcWidth - 2 * border, src, src, + do_row(datatype, comps, srcWidth - 2 * border, src, src, dstWidth - 2 * border, dst); if (border) { @@ -532,23 +447,29 @@ make_1d_mipmap(const struct gl_texture_format *format, GLint border, /** - * XXX need to use the tex image's row stride! + * Strides are in bytes. If zero, it'll be computed as width * bpp. */ static void -make_2d_mipmap(const struct gl_texture_format *format, GLint border, - GLint srcWidth, GLint srcHeight, const GLubyte *srcPtr, - GLint dstWidth, GLint dstHeight, GLubyte *dstPtr) +make_2d_mipmap(GLenum datatype, GLuint comps, GLint border, + GLint srcWidth, GLint srcHeight, + GLint srcRowStride, const GLubyte *srcPtr, + GLint dstWidth, GLint dstHeight, + GLint dstRowStride, GLubyte *dstPtr) { - const GLint bpt = format->TexelBytes; + const GLint bpt = bytes_per_pixel(datatype, comps); const GLint srcWidthNB = srcWidth - 2 * border; /* sizes w/out border */ const GLint dstWidthNB = dstWidth - 2 * border; const GLint dstHeightNB = dstHeight - 2 * border; - const GLint srcRowStride = bpt * srcWidth; - const GLint dstRowStride = bpt * dstWidth; const GLubyte *srcA, *srcB; GLubyte *dst; GLint row; + if (!srcRowStride) + srcRowStride = bpt * srcWidth; + + if (!dstRowStride) + dstRowStride = bpt * dstWidth; + /* Compute src and dst pointers, skipping any border */ srcA = srcPtr + border * ((srcWidth + 1) * bpt); if (srcHeight > 1) @@ -558,7 +479,7 @@ make_2d_mipmap(const struct gl_texture_format *format, GLint border, dst = dstPtr + border * ((dstWidth + 1) * bpt); for (row = 0; row < dstHeightNB; row++) { - do_row(format, srcWidthNB, srcA, srcB, + do_row(datatype, comps, srcWidthNB, srcA, srcB, dstWidthNB, dst); srcA += 2 * srcRowStride; srcB += 2 * srcRowStride; @@ -580,12 +501,12 @@ make_2d_mipmap(const struct gl_texture_format *format, GLint border, MEMCPY(dstPtr + (dstWidth * dstHeight - 1) * bpt, srcPtr + (srcWidth * srcHeight - 1) * bpt, bpt); /* lower border */ - do_row(format, srcWidthNB, + do_row(datatype, comps, srcWidthNB, srcPtr + bpt, srcPtr + bpt, dstWidthNB, dstPtr + bpt); /* upper border */ - do_row(format, srcWidthNB, + do_row(datatype, comps, srcWidthNB, srcPtr + (srcWidth * (srcHeight - 1) + 1) * bpt, srcPtr + (srcWidth * (srcHeight - 1) + 1) * bpt, dstWidthNB, @@ -603,11 +524,11 @@ make_2d_mipmap(const struct gl_texture_format *format, GLint border, else { /* average two src pixels each dest pixel */ for (row = 0; row < dstHeightNB; row += 2) { - do_row(format, 1, + do_row(datatype, comps, 1, srcPtr + (srcWidth * (row * 2 + 1)) * bpt, srcPtr + (srcWidth * (row * 2 + 2)) * bpt, 1, dstPtr + (dstWidth * row + 1) * bpt); - do_row(format, 1, + do_row(datatype, comps, 1, srcPtr + (srcWidth * (row * 2 + 1) + srcWidth - 1) * bpt, srcPtr + (srcWidth * (row * 2 + 2) + srcWidth - 1) * bpt, 1, dstPtr + (dstWidth * row + 1 + dstWidth - 1) * bpt); @@ -618,13 +539,15 @@ make_2d_mipmap(const struct gl_texture_format *format, GLint border, static void -make_3d_mipmap(const struct gl_texture_format *format, GLint border, +make_3d_mipmap(GLenum datatype, GLuint comps, GLint border, GLint srcWidth, GLint srcHeight, GLint srcDepth, + GLint srcRowStride, const GLubyte *srcPtr, GLint dstWidth, GLint dstHeight, GLint dstDepth, + GLint dstRowStride, GLubyte *dstPtr) { - const GLint bpt = format->TexelBytes; + const GLint bpt = bytes_per_pixel(datatype, comps); const GLint srcWidthNB = srcWidth - 2 * border; /* sizes w/out border */ const GLint srcDepthNB = srcDepth - 2 * border; const GLint dstWidthNB = dstWidth - 2 * border; @@ -633,7 +556,6 @@ make_3d_mipmap(const struct gl_texture_format *format, GLint border, GLvoid *tmpRowA, *tmpRowB; GLint img, row; GLint bytesPerSrcImage, bytesPerDstImage; - GLint bytesPerSrcRow, bytesPerDstRow; GLint srcImageOffset, srcRowOffset; (void) srcDepthNB; /* silence warnings */ @@ -651,8 +573,10 @@ make_3d_mipmap(const struct gl_texture_format *format, GLint border, bytesPerSrcImage = srcWidth * srcHeight * bpt; bytesPerDstImage = dstWidth * dstHeight * bpt; - bytesPerSrcRow = srcWidth * bpt; - bytesPerDstRow = dstWidth * bpt; + if (!srcRowStride) + srcRowStride = srcWidth * bpt; + if (!dstRowStride) + dstRowStride = dstWidth * bpt; /* Offset between adjacent src images to be averaged together */ srcImageOffset = (srcDepth == dstDepth) ? 0 : bytesPerSrcImage; @@ -676,13 +600,13 @@ make_3d_mipmap(const struct gl_texture_format *format, GLint border, for (img = 0; img < dstDepthNB; img++) { /* first source image pointer, skipping border */ const GLubyte *imgSrcA = srcPtr - + (bytesPerSrcImage + bytesPerSrcRow + border) * bpt * border + + (bytesPerSrcImage + srcRowStride + border) * bpt * border + img * (bytesPerSrcImage + srcImageOffset); /* second source image pointer, skipping border */ const GLubyte *imgSrcB = imgSrcA + srcImageOffset; /* address of the dest image, skipping border */ GLubyte *imgDst = dstPtr - + (bytesPerDstImage + bytesPerDstRow + border) * bpt * border + + (bytesPerDstImage + dstRowStride + border) * bpt * border + img * bytesPerDstImage; /* setup the four source row pointers and the dest row pointer */ @@ -694,20 +618,20 @@ make_3d_mipmap(const struct gl_texture_format *format, GLint border, for (row = 0; row < dstHeightNB; row++) { /* Average together two rows from first src image */ - do_row(format, srcWidthNB, srcImgARowA, srcImgARowB, + do_row(datatype, comps, srcWidthNB, srcImgARowA, srcImgARowB, srcWidthNB, tmpRowA); /* Average together two rows from second src image */ - do_row(format, srcWidthNB, srcImgBRowA, srcImgBRowB, + do_row(datatype, comps, srcWidthNB, srcImgBRowA, srcImgBRowB, srcWidthNB, tmpRowB); /* Average together the temp rows to make the final row */ - do_row(format, srcWidthNB, tmpRowA, tmpRowB, + do_row(datatype, comps, srcWidthNB, tmpRowA, tmpRowB, dstWidthNB, dstImgRow); /* advance to next rows */ - srcImgARowA += bytesPerSrcRow + srcRowOffset; - srcImgARowB += bytesPerSrcRow + srcRowOffset; - srcImgBRowA += bytesPerSrcRow + srcRowOffset; - srcImgBRowB += bytesPerSrcRow + srcRowOffset; - dstImgRow += bytesPerDstRow; + srcImgARowA += srcRowStride + srcRowOffset; + srcImgARowB += srcRowStride + srcRowOffset; + srcImgBRowA += srcRowStride + srcRowOffset; + srcImgBRowB += srcRowStride + srcRowOffset; + dstImgRow += dstRowStride; } } @@ -717,12 +641,14 @@ make_3d_mipmap(const struct gl_texture_format *format, GLint border, /* Luckily we can leverage the make_2d_mipmap() function here! */ if (border > 0) { /* do front border image */ - make_2d_mipmap(format, 1, srcWidth, srcHeight, srcPtr, - dstWidth, dstHeight, dstPtr); + make_2d_mipmap(datatype, comps, 1, srcWidth, srcHeight, 0, srcPtr, + dstWidth, dstHeight, 0, dstPtr); /* do back border image */ - make_2d_mipmap(format, 1, srcWidth, srcHeight, + make_2d_mipmap(datatype, comps, 1, srcWidth, srcHeight, + 0, srcPtr + bytesPerSrcImage * (srcDepth - 1), dstWidth, dstHeight, + 0, dstPtr + bytesPerDstImage * (dstDepth - 1)); /* do four remaining border edges that span the image slices */ if (srcDepth == dstDepth) { @@ -738,9 +664,9 @@ make_3d_mipmap(const struct gl_texture_format *format, GLint border, /* do border along [img][row=dstHeight-1][col=0] */ src = srcPtr + (img * 2 + 1) * bytesPerSrcImage - + (srcHeight - 1) * bytesPerSrcRow; + + (srcHeight - 1) * srcRowStride; dst = dstPtr + (img + 1) * bytesPerDstImage - + (dstHeight - 1) * bytesPerDstRow; + + (dstHeight - 1) * dstRowStride; MEMCPY(dst, src, bpt); /* do border along [img][row=0][col=dstWidth-1] */ @@ -768,28 +694,28 @@ make_3d_mipmap(const struct gl_texture_format *format, GLint border, /* do border along [img][row=0][col=0] */ src = srcPtr + (img * 2 + 1) * bytesPerSrcImage; dst = dstPtr + (img + 1) * bytesPerDstImage; - do_row(format, 1, src, src + srcImageOffset, 1, dst); + do_row(datatype, comps, 1, src, src + srcImageOffset, 1, dst); /* do border along [img][row=dstHeight-1][col=0] */ src = srcPtr + (img * 2 + 1) * bytesPerSrcImage - + (srcHeight - 1) * bytesPerSrcRow; + + (srcHeight - 1) * srcRowStride; dst = dstPtr + (img + 1) * bytesPerDstImage - + (dstHeight - 1) * bytesPerDstRow; - do_row(format, 1, src, src + srcImageOffset, 1, dst); + + (dstHeight - 1) * dstRowStride; + do_row(datatype, comps, 1, src, src + srcImageOffset, 1, dst); /* do border along [img][row=0][col=dstWidth-1] */ src = srcPtr + (img * 2 + 1) * bytesPerSrcImage + (srcWidth - 1) * bpt; dst = dstPtr + (img + 1) * bytesPerDstImage + (dstWidth - 1) * bpt; - do_row(format, 1, src, src + srcImageOffset, 1, dst); + do_row(datatype, comps, 1, src, src + srcImageOffset, 1, dst); /* do border along [img][row=dstHeight-1][col=dstWidth-1] */ src = srcPtr + (img * 2 + 1) * bytesPerSrcImage + (bytesPerSrcImage - bpt); dst = dstPtr + (img + 1) * bytesPerDstImage + (bytesPerDstImage - bpt); - do_row(format, 1, src, src + srcImageOffset, 1, dst); + do_row(datatype, comps, 1, src, src + srcImageOffset, 1, dst); } } } @@ -797,11 +723,11 @@ make_3d_mipmap(const struct gl_texture_format *format, GLint border, static void -make_1d_stack_mipmap(const struct gl_texture_format *format, GLint border, +make_1d_stack_mipmap(GLenum datatype, GLuint comps, GLint border, GLint srcWidth, const GLubyte *srcPtr, GLint dstWidth, GLint dstHeight, GLubyte *dstPtr) { - const GLint bpt = format->TexelBytes; + const GLint bpt = bytes_per_pixel(datatype, comps); const GLint srcWidthNB = srcWidth - 2 * border; /* sizes w/out border */ const GLint dstWidthNB = dstWidth - 2 * border; const GLint dstHeightNB = dstHeight - 2 * border; @@ -816,7 +742,7 @@ make_1d_stack_mipmap(const struct gl_texture_format *format, GLint border, dst = dstPtr + border * ((dstWidth + 1) * bpt); for (row = 0; row < dstHeightNB; row++) { - do_row(format, srcWidthNB, src, src, + do_row(datatype, comps, srcWidthNB, src, src, dstWidthNB, dst); src += srcRowStride; dst += dstRowStride; @@ -839,23 +765,30 @@ make_1d_stack_mipmap(const struct gl_texture_format *format, GLint border, * and \c make_2d_mipmap. */ static void -make_2d_stack_mipmap(const struct gl_texture_format *format, GLint border, - GLint srcWidth, GLint srcHeight, const GLubyte *srcPtr, +make_2d_stack_mipmap(GLenum datatype, GLuint comps, GLint border, + GLint srcWidth, GLint srcHeight, + GLint srcRowStride, + const GLubyte *srcPtr, GLint dstWidth, GLint dstHeight, GLint dstDepth, + GLint dstRowStride, GLubyte *dstPtr) { - const GLint bpt = format->TexelBytes; + const GLint bpt = bytes_per_pixel(datatype, comps); const GLint srcWidthNB = srcWidth - 2 * border; /* sizes w/out border */ const GLint dstWidthNB = dstWidth - 2 * border; const GLint dstHeightNB = dstHeight - 2 * border; const GLint dstDepthNB = dstDepth - 2 * border; - const GLint srcRowStride = bpt * srcWidth; - const GLint dstRowStride = bpt * dstWidth; const GLubyte *srcA, *srcB; GLubyte *dst; GLint layer; GLint row; + if (!srcRowStride) + srcRowStride = bpt * srcWidth; + + if (!dstRowStride) + dstRowStride = bpt * dstWidth; + /* Compute src and dst pointers, skipping any border */ srcA = srcPtr + border * ((srcWidth + 1) * bpt); if (srcHeight > 1) @@ -866,7 +799,7 @@ make_2d_stack_mipmap(const struct gl_texture_format *format, GLint border, for (layer = 0; layer < dstDepthNB; layer++) { for (row = 0; row < dstHeightNB; row++) { - do_row(format, srcWidthNB, srcA, srcB, + do_row(datatype, comps, srcWidthNB, srcA, srcB, dstWidthNB, dst); srcA += 2 * srcRowStride; srcB += 2 * srcRowStride; @@ -888,12 +821,12 @@ make_2d_stack_mipmap(const struct gl_texture_format *format, GLint border, MEMCPY(dstPtr + (dstWidth * dstHeight - 1) * bpt, srcPtr + (srcWidth * srcHeight - 1) * bpt, bpt); /* lower border */ - do_row(format, srcWidthNB, + do_row(datatype, comps, srcWidthNB, srcPtr + bpt, srcPtr + bpt, dstWidthNB, dstPtr + bpt); /* upper border */ - do_row(format, srcWidthNB, + do_row(datatype, comps, srcWidthNB, srcPtr + (srcWidth * (srcHeight - 1) + 1) * bpt, srcPtr + (srcWidth * (srcHeight - 1) + 1) * bpt, dstWidthNB, @@ -911,11 +844,11 @@ make_2d_stack_mipmap(const struct gl_texture_format *format, GLint border, else { /* average two src pixels each dest pixel */ for (row = 0; row < dstHeightNB; row += 2) { - do_row(format, 1, + do_row(datatype, comps, 1, srcPtr + (srcWidth * (row * 2 + 1)) * bpt, srcPtr + (srcWidth * (row * 2 + 2)) * bpt, 1, dstPtr + (dstWidth * row + 1) * bpt); - do_row(format, 1, + do_row(datatype, comps, 1, srcPtr + (srcWidth * (row * 2 + 1) + srcWidth - 1) * bpt, srcPtr + (srcWidth * (row * 2 + 2) + srcWidth - 1) * bpt, 1, dstPtr + (dstWidth * row + 1 + dstWidth - 1) * bpt); @@ -927,6 +860,106 @@ make_2d_stack_mipmap(const struct gl_texture_format *format, GLint border, /** + * Down-sample a texture image to produce the next lower mipmap level. + */ +void +_mesa_generate_mipmap_level(GLenum target, + GLenum datatype, GLuint comps, + GLint border, + GLint srcWidth, GLint srcHeight, GLint srcDepth, + GLint srcRowStride, + const GLubyte *srcData, + GLint dstWidth, GLint dstHeight, GLint dstDepth, + GLint dstRowStride, + GLubyte *dstData) +{ + switch (target) { + case GL_TEXTURE_1D: + make_1d_mipmap(datatype, comps, border, + srcWidth, srcData, + dstWidth, dstData); + break; + case GL_TEXTURE_2D: + case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: + make_2d_mipmap(datatype, comps, border, + srcWidth, srcHeight, srcRowStride, srcData, + dstWidth, dstHeight, dstRowStride, dstData); + break; + case GL_TEXTURE_3D: + make_3d_mipmap(datatype, comps, border, + srcWidth, srcHeight, srcDepth, srcRowStride, srcData, + dstWidth, dstHeight, dstDepth, dstRowStride, dstData); + break; + case GL_TEXTURE_1D_ARRAY_EXT: + make_1d_stack_mipmap(datatype, comps, border, + srcWidth, srcData, + dstWidth, dstHeight, dstData); + break; + case GL_TEXTURE_2D_ARRAY_EXT: + make_2d_stack_mipmap(datatype, comps, border, + srcWidth, srcHeight, srcRowStride, srcData, + dstWidth, dstHeight, dstDepth, dstRowStride, dstData); + break; + case GL_TEXTURE_RECTANGLE_NV: + /* no mipmaps, do nothing */ + break; + default: + _mesa_problem(NULL, "bad target in _mesa_generate_mipmap_level"); + } +} + + +/** + * compute next (level+1) image size + * \return GL_FALSE if no smaller size can be generated (eg. src is 1x1x1 size) + */ +static GLboolean +next_mipmap_level_size(GLenum target, GLint border, + GLint srcWidth, GLint srcHeight, GLint srcDepth, + GLint *dstWidth, GLint *dstHeight, GLint *dstDepth) +{ + if (srcWidth - 2 * border > 1) { + *dstWidth = (srcWidth - 2 * border) / 2 + 2 * border; + } + else { + *dstWidth = srcWidth; /* can't go smaller */ + } + + if ((srcHeight - 2 * border > 1) && + (target != GL_TEXTURE_1D_ARRAY_EXT)) { + *dstHeight = (srcHeight - 2 * border) / 2 + 2 * border; + } + else { + *dstHeight = srcHeight; /* can't go smaller */ + } + + if ((srcDepth - 2 * border > 1) && + (target != GL_TEXTURE_2D_ARRAY_EXT)) { + *dstDepth = (srcDepth - 2 * border) / 2 + 2 * border; + } + else { + *dstDepth = srcDepth; /* can't go smaller */ + } + + if (*dstWidth == srcWidth && + *dstHeight == srcHeight && + *dstDepth == srcDepth) { + return GL_FALSE; + } + else { + return GL_TRUE; + } +} + + + + +/** * For GL_SGIX_generate_mipmap: * Generate a complete set of mipmaps from texObj's base-level image. * Stop at texObj's MaxLevel or when we get to the 1x1 texture. @@ -940,6 +973,8 @@ _mesa_generate_mipmap(GLcontext *ctx, GLenum target, const GLubyte *srcData = NULL; GLubyte *dstData = NULL; GLint level, maxLevels; + GLenum datatype; + GLuint comps; ASSERT(texObj); /* XXX choose cube map face here??? */ @@ -1002,6 +1037,8 @@ _mesa_generate_mipmap(GLcontext *ctx, GLenum target, convertFormat = srcImage->TexFormat; } + _mesa_format_to_type_and_comps(convertFormat, &datatype, &comps); + for (level = texObj->BaseLevel; level < texObj->MaxLevel && level < maxLevels - 1; level++) { /* generate image[level+1] from image[level] */ @@ -1010,6 +1047,7 @@ _mesa_generate_mipmap(GLcontext *ctx, GLenum target, GLint srcWidth, srcHeight, srcDepth; GLint dstWidth, dstHeight, dstDepth; GLint border, bytesPerTexel; + GLboolean nextLevel; /* get src image parameters */ srcImage = _mesa_select_tex_image(ctx, texObj, target, level); @@ -1019,31 +1057,10 @@ _mesa_generate_mipmap(GLcontext *ctx, GLenum target, srcDepth = srcImage->Depth; border = srcImage->Border; - /* compute next (level+1) image size */ - if (srcWidth - 2 * border > 1) { - dstWidth = (srcWidth - 2 * border) / 2 + 2 * border; - } - else { - dstWidth = srcWidth; /* can't go smaller */ - } - if ((srcHeight - 2 * border > 1) && - (texObj->Target != GL_TEXTURE_1D_ARRAY_EXT)) { - dstHeight = (srcHeight - 2 * border) / 2 + 2 * border; - } - else { - dstHeight = srcHeight; /* can't go smaller */ - } - if ((srcDepth - 2 * border > 1) && - (texObj->Target != GL_TEXTURE_2D_ARRAY_EXT)) { - dstDepth = (srcDepth - 2 * border) / 2 + 2 * border; - } - else { - dstDepth = srcDepth; /* can't go smaller */ - } - - if (dstWidth == srcWidth && - dstHeight == srcHeight && - dstDepth == srcDepth) { + nextLevel = next_mipmap_level_size(target, border, + srcWidth, srcHeight, srcDepth, + &dstWidth, &dstHeight, &dstDepth); + if (!nextLevel) { /* all done */ if (srcImage->IsCompressed) { _mesa_free((void *) srcData); @@ -1113,48 +1130,10 @@ _mesa_generate_mipmap(GLcontext *ctx, GLenum target, dstData = (GLubyte *) dstImage->Data; } - /* - * We use simple 2x2 averaging to compute the next mipmap level. - */ - switch (target) { - case GL_TEXTURE_1D: - make_1d_mipmap(convertFormat, border, - srcWidth, srcData, - dstWidth, dstData); - break; - case GL_TEXTURE_2D: - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: - make_2d_mipmap(convertFormat, border, - srcWidth, srcHeight, srcData, - dstWidth, dstHeight, dstData); - break; - case GL_TEXTURE_3D: - make_3d_mipmap(convertFormat, border, - srcWidth, srcHeight, srcDepth, srcData, - dstWidth, dstHeight, dstDepth, dstData); - break; - case GL_TEXTURE_1D_ARRAY_EXT: - make_1d_stack_mipmap(convertFormat, border, - srcWidth, srcData, - dstWidth, dstHeight, dstData); - break; - case GL_TEXTURE_2D_ARRAY_EXT: - make_2d_stack_mipmap(convertFormat, border, - srcWidth, srcHeight, srcData, - dstWidth, dstHeight, dstDepth, dstData); - break; - case GL_TEXTURE_RECTANGLE_NV: - /* no mipmaps, do nothing */ - break; - default: - _mesa_problem(ctx, "bad dimensions in _mesa_generate_mipmaps"); - return; - } + /* Note, 0 indicates default row strides */ + _mesa_generate_mipmap_level(target, datatype, comps, border, + srcWidth, srcHeight, srcDepth, 0, srcData, + dstWidth, dstHeight, dstDepth, 0, dstData); if (dstImage->IsCompressed) { GLubyte *temp; diff --git a/src/mesa/main/mipmap.h b/src/mesa/main/mipmap.h index 46e16902c86..44ecdddb277 100644 --- a/src/mesa/main/mipmap.h +++ b/src/mesa/main/mipmap.h @@ -28,6 +28,19 @@ #include "mtypes.h" + +extern void +_mesa_generate_mipmap_level(GLenum target, + GLenum datatype, GLuint comps, + GLint border, + GLint srcWidth, GLint srcHeight, GLint srcDepth, + GLint srcRowStride, + const GLubyte *srcData, + GLint dstWidth, GLint dstHeight, GLint dstDepth, + GLint dstRowStride, + GLubyte *dstData); + + extern void _mesa_generate_mipmap(GLcontext *ctx, GLenum target, struct gl_texture_object *texObj); diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c index acc268e622d..88fbd8f07c2 100644 --- a/src/mesa/main/texformat.c +++ b/src/mesa/main/texformat.c @@ -1569,3 +1569,174 @@ _mesa_choose_tex_format( GLcontext *ctx, GLint internalFormat, _mesa_problem(ctx, "unexpected format in _mesa_choose_tex_format()"); return NULL; } + + + +/** + * Return datatype and number of components per texel for the + * given gl_texture_format. + */ +void +_mesa_format_to_type_and_comps(const struct gl_texture_format *format, + GLenum *datatype, GLuint *comps) +{ + switch (format->MesaFormat) { + case MESA_FORMAT_RGBA8888: + case MESA_FORMAT_RGBA8888_REV: + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_ARGB8888_REV: + *datatype = CHAN_TYPE; + *comps = 4; + return; + case MESA_FORMAT_RGB888: + case MESA_FORMAT_BGR888: + *datatype = GL_UNSIGNED_BYTE; + *comps = 3; + return; + case MESA_FORMAT_RGB565: + case MESA_FORMAT_RGB565_REV: + *datatype = GL_UNSIGNED_SHORT_5_6_5; + *comps = 3; + return; + + case MESA_FORMAT_ARGB4444: + case MESA_FORMAT_ARGB4444_REV: + *datatype = GL_UNSIGNED_SHORT_4_4_4_4; + *comps = 4; + return; + + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_ARGB1555_REV: + *datatype = GL_UNSIGNED_SHORT_1_5_5_5_REV; + *comps = 3; + return; + + case MESA_FORMAT_AL88: + case MESA_FORMAT_AL88_REV: + *datatype = GL_UNSIGNED_BYTE; + *comps = 2; + return; + case MESA_FORMAT_RGB332: + *datatype = GL_UNSIGNED_BYTE_3_3_2; + *comps = 3; + return; + + case MESA_FORMAT_A8: + case MESA_FORMAT_L8: + case MESA_FORMAT_I8: + case MESA_FORMAT_CI8: + *datatype = GL_UNSIGNED_BYTE; + *comps = 1; + return; + + case MESA_FORMAT_YCBCR: + case MESA_FORMAT_YCBCR_REV: + *datatype = GL_UNSIGNED_SHORT; + *comps = 2; + return; + + case MESA_FORMAT_Z24_S8: + *datatype = GL_UNSIGNED_INT; + *comps = 1; /* XXX OK? */ + return; + + case MESA_FORMAT_Z16: + *datatype = GL_UNSIGNED_SHORT; + *comps = 1; + return; + + case MESA_FORMAT_Z32: + *datatype = GL_UNSIGNED_INT; + *comps = 1; + return; + + case MESA_FORMAT_SRGB8: + *datatype = GL_UNSIGNED_BYTE; + *comps = 3; + return; + case MESA_FORMAT_SRGBA8: + *datatype = GL_UNSIGNED_BYTE; + *comps = 4; + return; + case MESA_FORMAT_SL8: + *datatype = GL_UNSIGNED_BYTE; + *comps = 1; + return; + case MESA_FORMAT_SLA8: + *datatype = GL_UNSIGNED_BYTE; + *comps = 2; + return; + + case MESA_FORMAT_RGB_FXT1: + case MESA_FORMAT_RGBA_FXT1: + case MESA_FORMAT_RGB_DXT1: + case MESA_FORMAT_RGBA_DXT1: + case MESA_FORMAT_RGBA_DXT3: + case MESA_FORMAT_RGBA_DXT5: + /* XXX generate error instead? */ + *datatype = GL_UNSIGNED_BYTE; + *comps = 0; + return; + + case MESA_FORMAT_RGBA: + *datatype = CHAN_TYPE; + *comps = 4; + return; + case MESA_FORMAT_RGB: + *datatype = CHAN_TYPE; + *comps = 3; + return; + case MESA_FORMAT_LUMINANCE_ALPHA: + *datatype = CHAN_TYPE; + *comps = 2; + return; + case MESA_FORMAT_ALPHA: + case MESA_FORMAT_LUMINANCE: + case MESA_FORMAT_INTENSITY: + *datatype = CHAN_TYPE; + *comps = 1; + return; + + case MESA_FORMAT_RGBA_FLOAT32: + *datatype = GL_FLOAT; + *comps = 4; + return; + case MESA_FORMAT_RGBA_FLOAT16: + *datatype = GL_HALF_FLOAT_ARB; + *comps = 4; + return; + case MESA_FORMAT_RGB_FLOAT32: + *datatype = GL_FLOAT; + *comps = 3; + return; + case MESA_FORMAT_RGB_FLOAT16: + *datatype = GL_HALF_FLOAT_ARB; + *comps = 3; + return; + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32: + *datatype = GL_FLOAT; + *comps = 2; + return; + case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16: + *datatype = GL_HALF_FLOAT_ARB; + *comps = 2; + return; + case MESA_FORMAT_ALPHA_FLOAT32: + case MESA_FORMAT_LUMINANCE_FLOAT32: + case MESA_FORMAT_INTENSITY_FLOAT32: + *datatype = GL_FLOAT; + *comps = 1; + return; + case MESA_FORMAT_ALPHA_FLOAT16: + case MESA_FORMAT_LUMINANCE_FLOAT16: + case MESA_FORMAT_INTENSITY_FLOAT16: + *datatype = GL_HALF_FLOAT_ARB; + *comps = 1; + return; + + default: + _mesa_problem(NULL, "bad format in _mesa_format_to_type_and_comps"); + *datatype = 0; + *comps = 1; + } +} diff --git a/src/mesa/main/texformat.h b/src/mesa/main/texformat.h index 55851db7016..48f0fe99f27 100644 --- a/src/mesa/main/texformat.h +++ b/src/mesa/main/texformat.h @@ -239,4 +239,10 @@ extern const struct gl_texture_format * _mesa_choose_tex_format( GLcontext *ctx, GLint internalFormat, GLenum format, GLenum type ); + +extern void +_mesa_format_to_type_and_comps(const struct gl_texture_format *format, + GLenum *datatype, GLuint *comps); + + #endif diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index 09ec0d45533..5c96be92165 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -595,8 +595,12 @@ is_compressed_format(GLcontext *ctx, GLenum internalFormat) } -static GLuint -texture_face(GLenum target) +/** + * For cube map faces, return a face index in [0,5]. + * For other targets return 0; + */ +GLuint +_mesa_tex_target_to_face(GLenum target) { if (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB && target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB) @@ -625,6 +629,7 @@ _mesa_set_tex_image(struct gl_texture_object *tObj, { ASSERT(tObj); ASSERT(texImage); + /* XXX simplify this with _mesa_tex_target_to_face() */ switch (target) { case GL_TEXTURE_1D: case GL_TEXTURE_2D: @@ -828,6 +833,7 @@ _mesa_select_tex_image(GLcontext *ctx, const struct gl_texture_object *texObj, if (level < 0 || level >= MAX_TEXTURE_LEVELS) return NULL; + /* XXX simplify this with _mesa_tex_target_to_face() */ switch (target) { case GL_TEXTURE_1D: case GL_PROXY_TEXTURE_1D: @@ -2424,7 +2430,7 @@ _mesa_TexImage1D( GLenum target, GLint level, GLint internalFormat, struct gl_texture_unit *texUnit; struct gl_texture_object *texObj; struct gl_texture_image *texImage; - const GLuint face = texture_face(target); + const GLuint face = _mesa_tex_target_to_face(target); if (texture_error_check(ctx, target, level, internalFormat, format, type, 1, postConvWidth, 1, 1, border)) { @@ -2527,7 +2533,7 @@ _mesa_TexImage2D( GLenum target, GLint level, GLint internalFormat, struct gl_texture_unit *texUnit; struct gl_texture_object *texObj; struct gl_texture_image *texImage; - const GLuint face = texture_face(target); + const GLuint face = _mesa_tex_target_to_face(target); if (texture_error_check(ctx, target, level, internalFormat, format, type, 2, postConvWidth, postConvHeight, @@ -2629,7 +2635,7 @@ _mesa_TexImage3D( GLenum target, GLint level, GLint internalFormat, struct gl_texture_unit *texUnit; struct gl_texture_object *texObj; struct gl_texture_image *texImage; - const GLuint face = texture_face(target); + const GLuint face = _mesa_tex_target_to_face(target); if (texture_error_check(ctx, target, level, (GLint) internalFormat, format, type, 3, width, height, depth, border)) { @@ -2897,7 +2903,7 @@ _mesa_CopyTexImage1D( GLenum target, GLint level, struct gl_texture_object *texObj; struct gl_texture_image *texImage; GLsizei postConvWidth = width; - const GLuint face = texture_face(target); + const GLuint face = _mesa_tex_target_to_face(target); GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); @@ -2960,7 +2966,7 @@ _mesa_CopyTexImage2D( GLenum target, GLint level, GLenum internalFormat, struct gl_texture_object *texObj; struct gl_texture_image *texImage; GLsizei postConvWidth = width, postConvHeight = height; - const GLuint face = texture_face(target); + const GLuint face = _mesa_tex_target_to_face(target); GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END_AND_FLUSH(ctx); diff --git a/src/mesa/main/teximage.h b/src/mesa/main/teximage.h index f2cad7eb2da..b718c0046d8 100644 --- a/src/mesa/main/teximage.h +++ b/src/mesa/main/teximage.h @@ -107,6 +107,11 @@ _mesa_test_proxy_teximage(GLcontext *ctx, GLenum target, GLint level, GLint width, GLint height, GLint depth, GLint border); +extern GLuint +_mesa_tex_target_to_face(GLenum target); + + + /** * Lock a texture for updating. See also _mesa_lock_context_textures(). */ diff --git a/src/mesa/pipe/cell/ppu/cell_state_fs.c b/src/mesa/pipe/cell/ppu/cell_state_fs.c index 96a52273b01..3f46a87d189 100644 --- a/src/mesa/pipe/cell/ppu/cell_state_fs.c +++ b/src/mesa/pipe/cell/ppu/cell_state_fs.c @@ -67,7 +67,7 @@ cell_create_fs_state(struct pipe_context *pipe, #endif #ifdef MESA_LLVM - state->llvm_prog = gallivm_from_tgsi(state->shader.tokens, GALLIVM_FS); + state->llvm_prog = 0; if (!gallivm_global_cpu_engine()) { gallivm_cpu_engine_create(state->llvm_prog); } diff --git a/src/mesa/pipe/draw/draw_vertex_shader.c b/src/mesa/pipe/draw/draw_vertex_shader.c index 5ca93aa6152..9c31df1e3e1 100644 --- a/src/mesa/pipe/draw/draw_vertex_shader.c +++ b/src/mesa/pipe/draw/draw_vertex_shader.c @@ -113,7 +113,15 @@ run_vertex_program(struct draw_context *draw, draw->vertex_fetch.fetch_func( draw, machine, elts, count ); /* run shader */ -#if defined(__i386__) || defined(__386__) +#ifdef MESA_LLVM + if (1) { + struct gallivm_prog *prog = draw->vertex_shader->llvm_prog; + gallivm_cpu_vs_exec(prog, + machine->Inputs, + machine->Outputs, + machine->Consts); + } else +#elif defined(__i386__) || defined(__386__) if (draw->use_sse) { /* SSE */ /* cast away const */ @@ -212,13 +220,7 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) */ draw_update_vertex_fetch( draw ); -// debug_printf( " q(%d) ", draw->vs.queue_nr ); -#ifdef MESA_LLVM - if (draw->vertex_shader->llvm_prog) { - draw_vertex_shader_queue_flush_llvm(draw); - return; - } -#endif +// fprintf(stderr, " q(%d) ", draw->vs.queue_nr ); /* run vertex shader on vertex cache entries, four per invokation */ for (i = 0; i < draw->vs.queue_nr; i += 4) { @@ -260,7 +262,13 @@ draw_create_vertex_shader(struct draw_context *draw, vs->state = shader; #ifdef MESA_LLVM - vs->llvm_prog = gallivm_from_tgsi(shader->tokens, GALLIVM_VS); + struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS); + gallivm_ir_set_layout(ir, GALLIVM_SOA); + gallivm_ir_set_components(ir, 4); + gallivm_ir_fill_from_tgsi(ir, shader->tokens); + vs->llvm_prog = gallivm_ir_compile(ir); + gallivm_ir_delete(ir); + draw->engine = gallivm_global_cpu_engine(); if (!draw->engine) { draw->engine = gallivm_cpu_engine_create(vs->llvm_prog); @@ -296,7 +304,7 @@ draw_bind_vertex_shader(struct draw_context *draw, draw->vertex_shader = dvs; draw->num_vs_outputs = dvs->state->num_outputs; - /* specify the fragment program to interpret/execute */ + /* specify the vertex program to interpret/execute */ tgsi_exec_machine_init(&draw->machine, draw->vertex_shader->state->tokens, PIPE_MAX_SAMPLERS, diff --git a/src/mesa/pipe/draw/draw_vertex_shader_llvm.c b/src/mesa/pipe/draw/draw_vertex_shader_llvm.c deleted file mode 100644 index 63551c993e7..00000000000 --- a/src/mesa/pipe/draw/draw_vertex_shader_llvm.c +++ /dev/null @@ -1,194 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin [email protected] - */ - -#include "pipe/p_util.h" -#include "draw_private.h" -#include "draw_context.h" - -#ifdef MESA_LLVM - -#include "pipe/llvm/gallivm.h" -#include "pipe/p_shader_tokens.h" - -#define DBG 0 - -static INLINE void -fetch_attrib4(const void *ptr, enum pipe_format format, float attrib[4]) -{ - /* defaults */ - attrib[1] = 0.0; - attrib[2] = 0.0; - attrib[3] = 1.0; - switch (format) { - case PIPE_FORMAT_R32G32B32A32_FLOAT: - attrib[3] = ((float *) ptr)[3]; - /* fall-through */ - case PIPE_FORMAT_R32G32B32_FLOAT: - attrib[2] = ((float *) ptr)[2]; - /* fall-through */ - case PIPE_FORMAT_R32G32_FLOAT: - attrib[1] = ((float *) ptr)[1]; - /* fall-through */ - case PIPE_FORMAT_R32_FLOAT: - attrib[0] = ((float *) ptr)[0]; - break; - default: - assert(0); - } -} - - -/** - * Fetch vertex attributes for 'count' vertices. - */ -static INLINE -void vertex_fetch(struct draw_context *draw, - const unsigned elt, - float (*inputs)[4]) -{ - uint attr; - - /* loop over vertex attributes (vertex shader inputs) */ - for (attr = 0; attr < draw->vertex_shader->state->num_inputs; attr++) { - - unsigned buf = draw->vertex_element[attr].vertex_buffer_index; - const void *src - = (const void *) ((const ubyte *) draw->user.vbuffer[buf] - + draw->vertex_buffer[buf].buffer_offset - + draw->vertex_element[attr].src_offset - + elt * draw->vertex_buffer[buf].pitch); - fetch_attrib4(src, draw->vertex_element[attr].src_format, inputs[attr]); - } -} - -static INLINE unsigned -compute_clipmask(const float *clip, const float (*plane)[4], unsigned nr) -{ - unsigned mask = 0; - unsigned i; - - for (i = 0; i < nr; i++) { - if (dot4(clip, plane[i]) < 0) - mask |= (1<<i); - } - - return mask; -} - - -/** - * Called by the draw module when the vertx cache needs to be flushed. - * This involves running the vertex shader. - */ -void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw) -{ - unsigned i; - - struct vertex_header *dests[VS_QUEUE_LENGTH]; - float inputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS][4] ALIGN16_ATTRIB; - float outputs[VS_QUEUE_LENGTH][PIPE_MAX_SHADER_INPUTS][4] ALIGN16_ATTRIB; - float (*consts)[4] = (float (*)[4]) draw->user.constants; - struct gallivm_prog *prog = draw->vertex_shader->llvm_prog; - const float *scale = draw->viewport.scale; - const float *trans = draw->viewport.translate; - /* fetch the inputs */ - for (i = 0; i < draw->vs.queue_nr; ++i) { - unsigned elt = draw->vs.queue[i].elt; - dests[i] = draw->vs.queue[i].dest; - vertex_fetch(draw, elt, inputs[i]); - } - - /* batch execute the shaders on all the vertices */ - gallivm_prog_exec(prog, inputs, outputs, consts, - draw->vs.queue_nr, - draw->vertex_shader->state->num_inputs, - draw->vertex_shader->state->num_outputs); - - - /* store machine results */ - for (int i = 0; i < draw->vs.queue_nr; ++i) { - unsigned slot; - float x, y, z, w; - struct vertex_header *vOut = draw->vs.queue[i].dest; - float (*dests)[4] = outputs[i]; - - /* Handle attr[0] (position) specially: - * - * XXX: Computing the clipmask should be done in the vertex - * program as a set of DP4 instructions appended to the - * user-provided code. - */ - x = vOut->clip[0] = dests[0][0]; - y = vOut->clip[1] = dests[0][1]; - z = vOut->clip[2] = dests[0][2]; - w = vOut->clip[3] = dests[0][3]; -#if DBG - debug_printf("output %d: %f %f %f %f\n", 0, x, y, z, w); -#endif - - vOut->clipmask = compute_clipmask(vOut->clip, draw->plane, draw->nr_planes); - vOut->edgeflag = 1; - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - - /* Viewport mapping */ - vOut->data[0][0] = x * scale[0] + trans[0]; - vOut->data[0][1] = y * scale[1] + trans[1]; - vOut->data[0][2] = z * scale[2] + trans[2]; - vOut->data[0][3] = w; - - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. - */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - vOut->data[slot][0] = dests[slot][0]; - vOut->data[slot][1] = dests[slot][1]; - vOut->data[slot][2] = dests[slot][2]; - vOut->data[slot][3] = dests[slot][3]; - -#if DBG - debug_printf("output %d: %f %f %f %f\n", slot, - vOut->data[slot][0], - vOut->data[slot][1], - vOut->data[slot][2], - vOut->data[slot][3]); -#endif - } - } /* loop over vertices */ - - draw->vs.queue_nr = 0; -} - -#endif /* MESA_LLVM */ diff --git a/src/mesa/pipe/draw/draw_wide_prims.c b/src/mesa/pipe/draw/draw_wide_prims.c index 9759e7e2e82..4c7e279b20f 100644 --- a/src/mesa/pipe/draw/draw_wide_prims.c +++ b/src/mesa/pipe/draw/draw_wide_prims.c @@ -76,7 +76,6 @@ static void passthrough_tri( struct draw_stage *stage, /** * Draw a wide line by drawing a quad (two triangles). - * XXX still need line stipple. * XXX need to disable polygon stipple. */ static void wide_line( struct draw_stage *stage, @@ -103,12 +102,9 @@ static void wide_line( struct draw_stage *stage, /* * Draw wide line as a quad (two tris) by "stretching" the line along * X or Y. - * XXX For AA lines, the quad corners have to be computed in a - * more sophisticated way. + * We need to tweak coords in several ways to be conformant here. */ - /* need to tweak coords in several ways to be conformant here */ - if (dx > dy) { /* x-major line */ pos0[1] = pos0[1] - half_width - 0.25f; @@ -166,6 +162,70 @@ static void wide_line( struct draw_stage *stage, /** + * Draw a wide line by drawing a quad, using geometry which will + * fullfill GL's antialiased line requirements. + */ +static void wide_line_aa(struct draw_stage *stage, + struct prim_header *header) +{ + const struct wide_stage *wide = wide_stage(stage); + const float half_width = wide->half_line_width; + struct prim_header tri; + struct vertex_header *v[4]; + float *pos; + float dx = header->v[1]->data[0][0] - header->v[0]->data[0][0]; + float dy = header->v[1]->data[0][1] - header->v[0]->data[0][1]; + const float len = sqrt(dx * dx + dy * dy); + uint i; + + dx = dx * half_width / len; + dy = dy * half_width / len; + + /* allocate/dup new verts */ + for (i = 0; i < 4; i++) { + v[i] = dup_vert(stage, header->v[i/2], i); + } + + /* + * Quad for line from v0 to v1: + * + * 1 3 + * +-------------------------+ + * | | + * *v0 v1* + * | | + * +-------------------------+ + * 0 2 + */ + + pos = v[0]->data[0]; + pos[0] += dy; + pos[1] -= dx; + + pos = v[1]->data[0]; + pos[0] -= dy; + pos[1] += dx; + + pos = v[2]->data[0]; + pos[0] += dy; + pos[1] -= dx; + + pos = v[3]->data[0]; + pos[0] -= dy; + pos[1] += dx; + + tri.det = header->det; /* only the sign matters */ + + tri.v[0] = v[2]; tri.v[1] = v[1]; tri.v[2] = v[0]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[3]; tri.v[1] = v[1]; tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + +} + + +/** * Set the vertex texcoords for sprite mode. * Coords may be left untouched or set to a right-side-up or upside-down * orientation. @@ -319,7 +379,10 @@ static void wide_first_line( struct draw_stage *stage, wide->half_line_width = 0.5f * draw->rasterizer->line_width; if (draw->rasterizer->line_width != 1.0) { - wide->stage.line = wide_line; + if (draw->rasterizer->line_smooth) + wide->stage.line = wide_line_aa; + else + wide->stage.line = wide_line; } else { wide->stage.line = passthrough_line; diff --git a/src/mesa/pipe/i915simple/i915_state.c b/src/mesa/pipe/i915simple/i915_state.c index 950ea52d601..abd5571b885 100644 --- a/src/mesa/pipe/i915simple/i915_state.c +++ b/src/mesa/pipe/i915simple/i915_state.c @@ -250,6 +250,13 @@ i915_create_sampler_state(struct pipe_context *pipe, if (sampler->normalized_coords) cso->state[1] |= SS3_NORMALIZED_COORDS; + if (0) /* XXX not tested yet */ + { + int minlod = (int) (16.0 * sampler->min_lod); + minlod = CLAMP(minlod, 0, 16 * 11); + cso->state[1] |= (minlod << SS3_MIN_LOD_SHIFT); + } + { ubyte r = float_to_ubyte(sampler->border_color[0]); ubyte g = float_to_ubyte(sampler->border_color[1]); diff --git a/src/mesa/pipe/i915simple/i915_state_sampler.c b/src/mesa/pipe/i915simple/i915_state_sampler.c index 0dbbc5241d5..9c1a5bbbd65 100644 --- a/src/mesa/pipe/i915simple/i915_state_sampler.c +++ b/src/mesa/pipe/i915simple/i915_state_sampler.c @@ -185,7 +185,7 @@ i915_update_texture(struct i915_context *i915, uint unit, const struct pipe_texture *pt = &tex->base; uint format, pitch; const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; - const uint num_levels = pt->last_level - pt->first_level; + const uint num_levels = pt->last_level; assert(tex); assert(width); diff --git a/src/mesa/pipe/i915simple/i915_texture.c b/src/mesa/pipe/i915simple/i915_texture.c index 6faeab134ab..6d37ae3d747 100644 --- a/src/mesa/pipe/i915simple/i915_texture.c +++ b/src/mesa/pipe/i915simple/i915_texture.c @@ -118,11 +118,11 @@ i945_miptree_layout_2d( struct i915_texture *tex ) tex->pitch = pt->width[0]; /* May need to adjust pitch to accomodate the placement of - * the 2nd mipmap. This occurs when the alignment + * the 2nd mipmap level. This occurs when the alignment * constraints of mipmap placement push the right edge of the - * 2nd mipmap out past the width of its parent. + * 2nd mipmap level out past the width of its parent. */ - if (pt->first_level != pt->last_level) { + if (pt->last_level > 0) { unsigned mip1_width = align_int(minify(pt->width[0]), align_w) + minify(minify(pt->width[0])); @@ -136,7 +136,7 @@ i945_miptree_layout_2d( struct i915_texture *tex ) tex->pitch = align_int(tex->pitch * pt->cpp, 4) / pt->cpp; tex->total_height = 0; - for ( level = pt->first_level ; level <= pt->last_level ; level++ ) { + for (level = 0; level <= pt->last_level; level++) { unsigned img_height; i915_miptree_set_level_info(tex, level, 1, x, y, width, height, 1); @@ -152,9 +152,9 @@ i945_miptree_layout_2d( struct i915_texture *tex ) */ tex->total_height = MAX2(tex->total_height, y + img_height); - /* Layout_below: step right after second mipmap. + /* Layout_below: step right after second mipmap level. */ - if (level == pt->first_level + 1) { + if (level == 1) { x += align_int(width, align_w); } else { @@ -204,7 +204,7 @@ i915_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) tex->pitch = ((dim * pt->cpp * 2 + 3) & ~3) / pt->cpp; tex->total_height = dim * 4; - for (level = pt->first_level; level <= pt->last_level; level++) { + for (level = 0; level <= pt->last_level; level++) { i915_miptree_set_level_info(tex, level, 6, 0, 0, /*OLD: tex->pitch, tex->total_height,*/ @@ -219,7 +219,7 @@ i915_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) unsigned y = initial_offsets[face][1] * dim; unsigned d = dim; - for (level = pt->first_level; level <= pt->last_level; level++) { + for (level = 0; level <= pt->last_level; level++) { i915_miptree_set_image_offset(tex, level, face, x, y); d >>= 1; x += step_offsets[face][0] * d; @@ -240,7 +240,7 @@ i915_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) /* XXX: hardware expects/requires 9 levels at minimum. */ - for (level = pt->first_level; level <= MAX2(8, pt->last_level); + for (level = 0; level <= MAX2(8, pt->last_level); level++) { i915_miptree_set_level_info(tex, level, depth, 0, tex->total_height, width, height, depth); @@ -256,7 +256,7 @@ i915_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) /* Fixup depth image_offsets: */ depth = pt->depth[0]; - for (level = pt->first_level; level <= pt->last_level; level++) { + for (level = 0; level <= pt->last_level; level++) { unsigned i; for (i = 0; i < depth; i++) i915_miptree_set_image_offset(tex, level, i, @@ -282,7 +282,7 @@ i915_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) tex->pitch = ((pt->width[0] * pt->cpp + 3) & ~3) / pt->cpp; tex->total_height = 0; - for (level = pt->first_level; level <= pt->last_level; level++) { + for (level = 0; level <= pt->last_level; level++) { i915_miptree_set_level_info(tex, level, 1, 0, tex->total_height, width, height, 1); @@ -337,7 +337,7 @@ i945_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) /* Set all the levels to effectively occupy the whole rectangular region. */ - for (level = pt->first_level; level <= pt->last_level; level++) { + for (level = 0; level <= pt->last_level; level++) { i915_miptree_set_level_info(tex, level, 6, 0, 0, lvlWidth, lvlHeight, 1); @@ -355,12 +355,12 @@ i945_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) y = tex->total_height - 4; x = (face - 4) * 8; } - else if (dim < 4 && (face > 0 || pt->first_level > 0)) { + else if (dim < 4 && (face > 0)) { y = tex->total_height - 4; x = face * 8; } - for (level = pt->first_level; level <= pt->last_level; level++) { + for (level = 0; level <= pt->last_level; level++) { i915_miptree_set_image_offset(tex, level, face, x, y); d >>= 1; @@ -418,7 +418,7 @@ i945_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) pack_x_pitch = tex->pitch; pack_x_nr = 1; - for (level = pt->first_level; level <= pt->last_level; level++) { + for (level = 0; level <= pt->last_level; level++) { unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6; int x = 0; int y = 0; diff --git a/src/mesa/pipe/i965simple/brw_tex_layout.c b/src/mesa/pipe/i965simple/brw_tex_layout.c index 405fd1f7949..90561f1307d 100644 --- a/src/mesa/pipe/i965simple/brw_tex_layout.c +++ b/src/mesa/pipe/i965simple/brw_tex_layout.c @@ -146,7 +146,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) * constraints of mipmap placement push the right edge of the * 2nd mipmap out past the width of its parent. */ - if (pt->first_level != pt->last_level) { + if (pt->last_level > 0) { unsigned mip1_width; if (pt->compressed) { @@ -168,7 +168,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) tex->pitch = align(tex->pitch * pt->cpp, 4) / pt->cpp; tex->total_height = 0; - for ( level = pt->first_level ; level <= pt->last_level ; level++ ) { + for (level = 0; level <= pt->last_level; level++) { unsigned img_height; intel_miptree_set_level_info(tex, level, 1, x, y, width, @@ -187,7 +187,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) /* Layout_below: step right after second mipmap. */ - if (level == pt->first_level + 1) { + if (level == 1) { x += align(width, align_w); } else { @@ -234,7 +234,7 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture pack_x_pitch = tex->pitch; pack_x_nr = 1; - for ( level = pt->first_level ; level <= pt->last_level ; level++ ) { + for (level = 0; level <= pt->last_level; level++) { unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6; int x = 0; int y = 0; diff --git a/src/mesa/pipe/i965simple/brw_wm_surface_state.c b/src/mesa/pipe/i965simple/brw_wm_surface_state.c index cbb4f2efd3b..d16d919bce9 100644 --- a/src/mesa/pipe/i965simple/brw_wm_surface_state.c +++ b/src/mesa/pipe/i965simple/brw_wm_surface_state.c @@ -154,7 +154,7 @@ void brw_update_texture_surface( struct brw_context *brw, /* Updated in emit_reloc */ surf.ss1.base_addr = brw_buffer_offset( brw, tObj->buffer ); - surf.ss2.mip_count = tObj->base.last_level - tObj->base.first_level; + surf.ss2.mip_count = tObj->base.last_level; surf.ss2.width = tObj->base.width[0] - 1; surf.ss2.height = tObj->base.height[0] - 1; diff --git a/src/mesa/pipe/llvm/Makefile b/src/mesa/pipe/llvm/Makefile index f655fb8340a..a0494ba966e 100644 --- a/src/mesa/pipe/llvm/Makefile +++ b/src/mesa/pipe/llvm/Makefile @@ -7,8 +7,13 @@ LIBNAME = gallivm GALLIVM_SOURCES = \ gallivm.cpp \ + gallivm_cpu.cpp \ instructions.cpp \ - storage.cpp + loweringpass.cpp \ + tgsitollvm.cpp \ + storage.cpp \ + storagesoa.cpp \ + instructionssoa.cpp INC_SOURCES = gallivm_builtins.cpp llvm_base_shader.cpp diff --git a/src/mesa/pipe/llvm/gallivm.cpp b/src/mesa/pipe/llvm/gallivm.cpp index afa14468902..b99dc6db5b8 100644 --- a/src/mesa/pipe/llvm/gallivm.cpp +++ b/src/mesa/pipe/llvm/gallivm.cpp @@ -32,16 +32,17 @@ #ifdef MESA_LLVM #include "gallivm.h" +#include "gallivm_p.h" #include "instructions.h" +#include "loweringpass.h" #include "storage.h" +#include "tgsitollvm.h" #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/tgsi/util/tgsi_parse.h" + #include "pipe/tgsi/exec/tgsi_exec.h" -#include "pipe/tgsi/util/tgsi_util.h" -#include "pipe/tgsi/util/tgsi_build.h" #include "pipe/tgsi/util/tgsi_dump.h" #include <llvm/Module.h> @@ -63,38 +64,20 @@ #include <llvm/Analysis/LoopPass.h> #include <llvm/Target/TargetData.h> #include <llvm/Bitcode/ReaderWriter.h> +#include <llvm/Transforms/Utils/Cloning.h> #include <sstream> #include <fstream> #include <iostream> -struct gallivm_interpolate { - int attrib; - int chan; - int type; -}; - -struct gallivm_prog { - llvm::Module *module; - void *function; - int num_consts; - int id; - enum gallivm_shader_type type; - - struct gallivm_interpolate interpolators[32*4]; //FIXME: this might not be enough for some shaders - int num_interp; -}; - -struct gallivm_cpu_engine { - llvm::ExecutionEngine *engine; -}; +static int GLOBAL_ID = 0; using namespace llvm; -#include "llvm_base_shader.cpp" - -static int GLOBAL_ID = 0; -static inline void AddStandardCompilePasses(PassManager &PM) { +static inline +void AddStandardCompilePasses(PassManager &PM) +{ + PM.add(new LoweringPass()); PM.add(createVerifierPass()); // Verify that input is correct PM.add(createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp @@ -150,721 +133,14 @@ static inline void AddStandardCompilePasses(PassManager &PM) { PM.add(createConstantMergePass()); // Merge dup global constants } -static inline void -add_interpolator(struct gallivm_prog *prog, - struct gallivm_interpolate *interp) -{ - prog->interpolators[prog->num_interp] = *interp; - ++prog->num_interp; -} - -static void -translate_declaration(struct gallivm_prog *prog, - llvm::Module *module, - Storage *storage, - struct tgsi_full_declaration *decl, - struct tgsi_full_declaration *fd) -{ - if (decl->Declaration.File == TGSI_FILE_INPUT) { - unsigned first, last, mask; - uint interp_method; - - assert(decl->Declaration.Declare == TGSI_DECLARE_RANGE); - - first = decl->u.DeclarationRange.First; - last = decl->u.DeclarationRange.Last; - mask = decl->Declaration.UsageMask; - - /* Do not touch WPOS.xy */ - if (first == 0) { - mask &= ~TGSI_WRITEMASK_XY; - if (mask == TGSI_WRITEMASK_NONE) { - first++; - if (first > last) { - return; - } - } - } - - interp_method = decl->Interpolation.Interpolate; - - if (mask == TGSI_WRITEMASK_XYZW) { - unsigned i, j; - - for (i = first; i <= last; i++) { - for (j = 0; j < NUM_CHANNELS; j++) { - //interp( mach, i, j ); - struct gallivm_interpolate interp; - interp.type = interp_method; - interp.attrib = i; - interp.chan = j; - add_interpolator(prog, &interp); - } - } - } else { - unsigned i, j; - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - for( i = first; i <= last; i++ ) { - struct gallivm_interpolate interp; - interp.type = interp_method; - interp.attrib = i; - interp.chan = j; - add_interpolator(prog, &interp); - } - } - } - } - } -} - - -static void -translate_immediate(Storage *storage, - struct tgsi_full_immediate *imm) -{ - float vec[4]; - int i; - for (i = 0; i < imm->Immediate.Size - 1; ++i) { - switch( imm->Immediate.DataType ) { - case TGSI_IMM_FLOAT32: - vec[i] = imm->u.ImmediateFloat32[i].Float; - break; - default: - assert( 0 ); - } - } - storage->addImmediate(vec); -} - -static inline llvm::Value * -swizzleVector(llvm::Value *val, struct tgsi_full_src_register *src, - Storage *storage) -{ - int swizzle = 0; - int start = 1000; - const int NO_SWIZZLE = TGSI_SWIZZLE_X * 1000 + TGSI_SWIZZLE_Y * 100 + - TGSI_SWIZZLE_Z * 10 + TGSI_SWIZZLE_W; - for (int k = 0; k < 4; ++k) { - swizzle += tgsi_util_get_full_src_register_extswizzle(src, k) * start; - start /= 10; - } - if (swizzle != NO_SWIZZLE) { - /*fprintf(stderr, "XXXXXXXX swizzle = %d\n", swizzle);*/ - val = storage->shuffleVector(val, swizzle); - } - return val; -} - -static void -translate_instruction(llvm::Module *module, - Storage *storage, - Instructions *instr, - struct tgsi_full_instruction *inst, - struct tgsi_full_instruction *fi, - unsigned instno) -{ - llvm::Value *inputs[4]; - inputs[0] = 0; - inputs[1] = 0; - inputs[2] = 0; - inputs[3] = 0; - - for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { - struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; - llvm::Value *val = 0; - llvm::Value *indIdx = 0; - - if (src->SrcRegister.Indirect) { - indIdx = storage->addrElement(src->SrcRegisterInd.Index); - indIdx = storage->extractIndex(indIdx); - } - if (src->SrcRegister.File == TGSI_FILE_CONSTANT) { - val = storage->constElement(src->SrcRegister.Index, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { - val = storage->inputElement(src->SrcRegister.Index, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { - val = storage->tempElement(src->SrcRegister.Index); - } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { - val = storage->outputElement(src->SrcRegister.Index, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { - val = storage->immediateElement(src->SrcRegister.Index); - } else { - fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File); - return; - } - - inputs[i] = swizzleVector(val, src, storage); - } - - /*if (inputs[0]) - instr->printVector(inputs[0]); - if (inputs[1]) - instr->printVector(inputs[1]);*/ - llvm::Value *out = 0; - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: { - out = instr->arl(inputs[0]); - } - break; - case TGSI_OPCODE_MOV: { - out = inputs[0]; - } - break; - case TGSI_OPCODE_LIT: { - out = instr->lit(inputs[0]); - } - break; - case TGSI_OPCODE_RCP: { - out = instr->rcp(inputs[0]); - } - break; - case TGSI_OPCODE_RSQ: { - out = instr->rsq(inputs[0]); - } - break; - case TGSI_OPCODE_EXP: - break; - case TGSI_OPCODE_LOG: - break; - case TGSI_OPCODE_MUL: { - out = instr->mul(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_ADD: { - out = instr->add(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DP3: { - out = instr->dp3(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DP4: { - out = instr->dp4(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DST: { - out = instr->dst(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_MIN: { - out = instr->min(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_MAX: { - out = instr->max(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SLT: { - out = instr->slt(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SGE: { - out = instr->sge(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_MAD: { - out = instr->madd(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_SUB: { - out = instr->sub(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_LERP: { - out = instr->lerp(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_CND: - break; - case TGSI_OPCODE_CND0: - break; - case TGSI_OPCODE_DOT2ADD: - break; - case TGSI_OPCODE_INDEX: - break; - case TGSI_OPCODE_NEGATE: - break; - case TGSI_OPCODE_FRAC: { - out = instr->frc(inputs[0]); - } - break; - case TGSI_OPCODE_CLAMP: - break; - case TGSI_OPCODE_FLOOR: { - out = instr->floor(inputs[0]); - } - break; - case TGSI_OPCODE_ROUND: - break; - case TGSI_OPCODE_EXPBASE2: { - out = instr->ex2(inputs[0]); - } - break; - case TGSI_OPCODE_LOGBASE2: { - out = instr->lg2(inputs[0]); - } - break; - case TGSI_OPCODE_POWER: { - out = instr->pow(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_CROSSPRODUCT: { - out = instr->cross(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_MULTIPLYMATRIX: - break; - case TGSI_OPCODE_ABS: { - out = instr->abs(inputs[0]); - } - break; - case TGSI_OPCODE_RCC: - break; - case TGSI_OPCODE_DPH: { - out = instr->dph(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_COS: { - out = instr->cos(inputs[0]); - } - break; - case TGSI_OPCODE_DDX: - break; - case TGSI_OPCODE_DDY: - break; - case TGSI_OPCODE_KILP: { - out = instr->kilp(inputs[0]); - storage->setKilElement(out); - return; - } - break; - case TGSI_OPCODE_PK2H: - break; - case TGSI_OPCODE_PK2US: - break; - case TGSI_OPCODE_PK4B: - break; - case TGSI_OPCODE_PK4UB: - break; - case TGSI_OPCODE_RFL: - break; - case TGSI_OPCODE_SEQ: - break; - case TGSI_OPCODE_SFL: - break; - case TGSI_OPCODE_SGT: { - out = instr->sgt(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SIN: { - out = instr->sin(inputs[0]); - } - break; - case TGSI_OPCODE_SLE: - break; - case TGSI_OPCODE_SNE: - break; - case TGSI_OPCODE_STR: - break; - case TGSI_OPCODE_TEX: - break; - case TGSI_OPCODE_TXD: - break; - case TGSI_OPCODE_UP2H: - break; - case TGSI_OPCODE_UP2US: - break; - case TGSI_OPCODE_UP4B: - break; - case TGSI_OPCODE_UP4UB: - break; - case TGSI_OPCODE_X2D: - break; - case TGSI_OPCODE_ARA: - break; - case TGSI_OPCODE_ARR: - break; - case TGSI_OPCODE_BRA: - break; - case TGSI_OPCODE_CAL: { - instr->cal(inst->InstructionExtLabel.Label, storage->inputPtr()); - return; - } - break; - case TGSI_OPCODE_RET: { - instr->end(); - return; - } - break; - case TGSI_OPCODE_SSG: - break; - case TGSI_OPCODE_CMP: { - out = instr->cmp(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_SCS: { - out = instr->scs(inputs[0]); - } - break; - case TGSI_OPCODE_TXB: - break; - case TGSI_OPCODE_NRM: - break; - case TGSI_OPCODE_DIV: - break; - case TGSI_OPCODE_DP2: - break; - case TGSI_OPCODE_TXL: - break; - case TGSI_OPCODE_BRK: { - instr->brk(); - return; - } - break; - case TGSI_OPCODE_IF: { - instr->ifop(inputs[0]); - storage->setCurrentBlock(instr->currentBlock()); - return; //just update the state - } - break; - case TGSI_OPCODE_LOOP: - break; - case TGSI_OPCODE_REP: - break; - case TGSI_OPCODE_ELSE: { - instr->elseop(); - storage->setCurrentBlock(instr->currentBlock()); - return; //only state update - } - break; - case TGSI_OPCODE_ENDIF: { - instr->endif(); - storage->setCurrentBlock(instr->currentBlock()); - return; //just update the state - } - break; - case TGSI_OPCODE_ENDLOOP: - break; - case TGSI_OPCODE_ENDREP: - break; - case TGSI_OPCODE_PUSHA: - break; - case TGSI_OPCODE_POPA: - break; - case TGSI_OPCODE_CEIL: - break; - case TGSI_OPCODE_I2F: - break; - case TGSI_OPCODE_NOT: - break; - case TGSI_OPCODE_TRUNC: { - out = instr->trunc(inputs[0]); - } - break; - case TGSI_OPCODE_SHL: - break; - case TGSI_OPCODE_SHR: - break; - case TGSI_OPCODE_AND: - break; - case TGSI_OPCODE_OR: - break; - case TGSI_OPCODE_MOD: - break; - case TGSI_OPCODE_XOR: - break; - case TGSI_OPCODE_SAD: - break; - case TGSI_OPCODE_TXF: - break; - case TGSI_OPCODE_TXQ: - break; - case TGSI_OPCODE_CONT: - break; - case TGSI_OPCODE_EMIT: - break; - case TGSI_OPCODE_ENDPRIM: - break; - case TGSI_OPCODE_BGNLOOP2: { - instr->beginLoop(); - storage->setCurrentBlock(instr->currentBlock()); - return; - } - break; - case TGSI_OPCODE_BGNSUB: { - instr->bgnSub(instno); - storage->setCurrentBlock(instr->currentBlock()); - storage->pushTemps(); - return; - } - break; - case TGSI_OPCODE_ENDLOOP2: { - instr->endLoop(); - storage->setCurrentBlock(instr->currentBlock()); - return; - } - break; - case TGSI_OPCODE_ENDSUB: { - instr->endSub(); - storage->setCurrentBlock(instr->currentBlock()); - storage->popArguments(); - storage->popTemps(); - return; - } - break; - case TGSI_OPCODE_NOISE1: - break; - case TGSI_OPCODE_NOISE2: - break; - case TGSI_OPCODE_NOISE3: - break; - case TGSI_OPCODE_NOISE4: - break; - case TGSI_OPCODE_NOP: - break; - case TGSI_OPCODE_TEXBEM: - break; - case TGSI_OPCODE_TEXBEML: - break; - case TGSI_OPCODE_TEXREG2AR: - break; - case TGSI_OPCODE_TEXM3X2PAD: - break; - case TGSI_OPCODE_TEXM3X2TEX: - break; - case TGSI_OPCODE_TEXM3X3PAD: - break; - case TGSI_OPCODE_TEXM3X3TEX: - break; - case TGSI_OPCODE_TEXM3X3SPEC: - break; - case TGSI_OPCODE_TEXM3X3VSPEC: - break; - case TGSI_OPCODE_TEXREG2GB: - break; - case TGSI_OPCODE_TEXREG2RGB: - break; - case TGSI_OPCODE_TEXDP3TEX: - break; - case TGSI_OPCODE_TEXDP3: - break; - case TGSI_OPCODE_TEXM3X3: - break; - case TGSI_OPCODE_TEXM3X2DEPTH: - break; - case TGSI_OPCODE_TEXDEPTH: - break; - case TGSI_OPCODE_BEM: - break; - case TGSI_OPCODE_M4X3: - break; - case TGSI_OPCODE_M3X4: - break; - case TGSI_OPCODE_M3X3: - break; - case TGSI_OPCODE_M3X2: - break; - case TGSI_OPCODE_NRM4: - break; - case TGSI_OPCODE_CALLNZ: - break; - case TGSI_OPCODE_IFC: - break; - case TGSI_OPCODE_BREAKC: - break; - case TGSI_OPCODE_KIL: - break; - case TGSI_OPCODE_END: - instr->end(); - return; - break; - default: - fprintf(stderr, "ERROR: Unknown opcode %d\n", - inst->Instruction.Opcode); - assert(0); - break; - } - - if (!out) { - fprintf(stderr, "ERROR: unsupported opcode %d\n", - inst->Instruction.Opcode); - assert(!"Unsupported opcode"); - } - - /* # not sure if we need this */ - switch( inst->Instruction.Saturate ) { - case TGSI_SAT_NONE: - break; - case TGSI_SAT_ZERO_ONE: - /*TXT( "_SAT" );*/ - break; - case TGSI_SAT_MINUS_PLUS_ONE: - /*TXT( "_SAT[-1,1]" );*/ - break; - default: - assert( 0 ); - } - - /* store results */ - for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - - if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { - storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { - storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { - storage->setAddrElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else { - fprintf(stderr, "ERROR: unsupported LLVM destination!"); - assert(!"wrong destination"); - } - } -} - -static llvm::Module * -tgsi_to_llvm(struct gallivm_prog *prog, const struct tgsi_token *tokens) -{ - llvm::Module *mod = createBaseShader(); - struct tgsi_parse_context parse; - struct tgsi_full_instruction fi; - struct tgsi_full_declaration fd; - unsigned instno = 0; - Function* shader = mod->getFunction("execute_shader"); - std::ostringstream stream; - if (prog->type == GALLIVM_VS) { - stream << "vs_shader"; - } else { - stream << "fs_shader"; - } - stream << prog->id; - std::string func_name = stream.str(); - shader->setName(func_name.c_str()); - - Function::arg_iterator args = shader->arg_begin(); - Value *ptr_INPUT = args++; - ptr_INPUT->setName("input"); - - BasicBlock *label_entry = new BasicBlock("entry", shader, 0); - - tgsi_parse_init(&parse, tokens); - - fi = tgsi_default_full_instruction(); - fd = tgsi_default_full_declaration(); - Storage storage(label_entry, ptr_INPUT); - Instructions instr(mod, shader, label_entry, &storage); - while(!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - translate_declaration(prog, mod, &storage, - &parse.FullToken.FullDeclaration, - &fd); - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - translate_immediate(&storage, - &parse.FullToken.FullImmediate); - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - translate_instruction(mod, &storage, &instr, - &parse.FullToken.FullInstruction, - &fi, instno); - ++instno; - break; - - default: - assert(0); - } - } - - tgsi_parse_free(&parse); - - prog->num_consts = storage.numConsts(); - return mod; -} - -/*! - Translates the TGSI tokens into LLVM format. Translated representation - is stored in the gallivm_prog and returned. - After calling this function the gallivm_prog can either be used with a custom - code generator to generate machine code for the GPU which the code generator - addresses or it can be jit compiled with gallivm_cpu_jit_compile and executed - with gallivm_prog_exec to run the module on the CPU. - */ -struct gallivm_prog * -gallivm_from_tgsi(const struct tgsi_token *tokens, enum gallivm_shader_type type) -{ - std::cout << "Creating llvm from: " <<std::endl; - ++GLOBAL_ID; - struct gallivm_prog *gallivm = - (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog)); - gallivm->id = GLOBAL_ID; - gallivm->type = type; - tgsi_dump(tokens, 0); - - llvm::Module *mod = tgsi_to_llvm(gallivm, tokens); - gallivm->module = mod; - gallivm_prog_dump(gallivm, 0); - - /* Run optimization passes over it */ - PassManager passes; - passes.add(new TargetData(mod)); - AddStandardCompilePasses(passes); - passes.run(*mod); - - gallivm->module = mod; - - gallivm_prog_dump(gallivm, 0); - - return gallivm; -} - - void gallivm_prog_delete(struct gallivm_prog *prog) { - llvm::Module *mod = static_cast<llvm::Module*>(prog->module); - delete mod; + delete prog->module; prog->module = 0; prog->function = 0; free(prog); } -typedef void (*vertex_shader_runner)(float (*ainputs)[PIPE_MAX_SHADER_INPUTS][4], - float (*dests)[PIPE_MAX_SHADER_INPUTS][4], - float (*aconsts)[4], - int num_vertices, - int num_inputs, - int num_attribs, - int num_consts); - - -/*! - This function is used to execute the gallivm_prog in software. Before calling - this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile - function. - */ -int gallivm_prog_exec(struct gallivm_prog *prog, - float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], - float (*dests)[PIPE_MAX_SHADER_INPUTS][4], - float (*consts)[4], - int num_vertices, - int num_inputs, - int num_attribs) -{ - vertex_shader_runner runner = reinterpret_cast<vertex_shader_runner>(prog->function); - assert(runner); - runner(inputs, dests, consts, num_vertices, num_inputs, - num_attribs, prog->num_consts); - - return 0; -} - - - static inline void constant_interpolation(float (*inputs)[16][4], const struct tgsi_interp_coef *coefs, @@ -919,40 +195,15 @@ perspective_interpolation(float (*inputs)[16][4], } } -typedef int (*fragment_shader_runner)(float x, float y, - float (*dests)[16][4], - float (*inputs)[16][4], - int num_attribs, - float (*consts)[4], int num_consts, - struct tgsi_sampler *samplers); - -int gallivm_fragment_shader_exec(struct gallivm_prog *prog, - float fx, float fy, - float (*dests)[16][4], - float (*inputs)[16][4], - float (*consts)[4], - struct tgsi_sampler *samplers) -{ - fragment_shader_runner runner = reinterpret_cast<fragment_shader_runner>(prog->function); - assert(runner); - - return runner(fx, fy, dests, inputs, prog->num_interp, - consts, prog->num_consts, - samplers); -} - -void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix) +void gallivm_ir_dump(struct gallivm_ir *ir, const char *file_prefix) { - llvm::Module *mod; - if (!prog || !prog->module) + if (!ir || !ir->module) return; - mod = static_cast<llvm::Module*>(prog->module); - if (file_prefix) { std::ostringstream stream; stream << file_prefix; - stream << prog->id; + stream << ir->id; stream << ".ll"; std::string name = stream.str(); std::ofstream out(name.c_str()); @@ -960,12 +211,12 @@ void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix) std::cerr<<"Can't open file : "<<stream.str()<<std::endl;; return; } - out << (*mod); + out << (*ir->module); out.close(); } else { - const llvm::Module::FunctionListType &funcs = mod->getFunctionList(); + const llvm::Module::FunctionListType &funcs = ir->module->getFunctionList(); llvm::Module::FunctionListType::const_iterator itr; - std::cout<<"; ---------- Start shader "<<prog->id<<std::endl; + std::cout<<"; ---------- Start shader "<<ir->id<<std::endl; for (itr = funcs.begin(); itr != funcs.end(); ++itr) { const llvm::Function &func = (*itr); std::string name = func.getName(); @@ -978,86 +229,10 @@ void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix) std::cout<<*found<<std::endl; } } - std::cout<<"; ---------- End shader "<<prog->id<<std::endl; - } -} - - -static struct gallivm_cpu_engine *CPU = 0; - -static inline llvm::Function *func_for_shader(struct gallivm_prog *prog) -{ - llvm::Module *mod = prog->module; - llvm::Function *func = 0; - - switch (prog->type) { - case GALLIVM_VS: - func = mod->getFunction("run_vertex_shader"); - break; - case GALLIVM_FS: - func = mod->getFunction("run_fragment_shader"); - break; - default: - assert(!"Unknown shader type!"); - break; + std::cout<<"; ---------- End shader "<<ir->id<<std::endl; } - return func; } -/*! - This function creates a CPU based execution engine for the given gallivm_prog. - gallivm_cpu_engine should be used as a singleton throughout the library. Before - executing gallivm_prog_exec one needs to call gallivm_cpu_jit_compile. - The gallivm_prog instance which is being passed to the constructor is being - automatically JIT compiled so one shouldn't call gallivm_cpu_jit_compile - with it again. - */ -struct gallivm_cpu_engine * gallivm_cpu_engine_create(struct gallivm_prog *prog) -{ - struct gallivm_cpu_engine *cpu = (struct gallivm_cpu_engine *) - calloc(1, sizeof(struct gallivm_cpu_engine)); - llvm::Module *mod = static_cast<llvm::Module*>(prog->module); - llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); - llvm::ExecutionEngine *ee = llvm::ExecutionEngine::create(mp, false); - ee->DisableLazyCompilation(); - cpu->engine = ee; - - llvm::Function *func = func_for_shader(prog); - - prog->function = ee->getPointerToFunction(func); - CPU = cpu; - return cpu; -} - - -/*! - This function JIT compiles the given gallivm_prog with the given cpu based execution engine. - The reference to the generated machine code entry point will be stored - in the gallivm_prog program. After executing this function one can call gallivm_prog_exec - in order to execute the gallivm_prog on the CPU. - */ -void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog *prog) -{ - llvm::Module *mod = static_cast<llvm::Module*>(prog->module); - llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); - llvm::ExecutionEngine *ee = cpu->engine; - assert(ee); - ee->DisableLazyCompilation(); - ee->addModuleProvider(mp); - - llvm::Function *func = func_for_shader(prog); - prog->function = ee->getPointerToFunction(func); -} - -void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *cpu) -{ - free(cpu); -} - -struct gallivm_cpu_engine * gallivm_global_cpu_engine() -{ - return CPU; -} void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog, float (*inputs)[16][4], @@ -1084,9 +259,66 @@ void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog, } } -#endif /* MESA_LLVM */ +struct gallivm_ir * gallivm_ir_new(enum gallivm_shader_type type) +{ + struct gallivm_ir *ir = + (struct gallivm_ir *)calloc(1, sizeof(struct gallivm_ir)); + ++GLOBAL_ID; + ir->id = GLOBAL_ID; + ir->type = type; + + return ir; +} + +void gallivm_ir_set_layout(struct gallivm_ir *ir, + enum gallivm_vector_layout layout) +{ + ir->layout = layout; +} + +void gallivm_ir_set_components(struct gallivm_ir *ir, int num) +{ + ir->num_components = num; +} +void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir, + const struct tgsi_token *tokens) +{ + std::cout << "Creating llvm from: " <<std::endl; + tgsi_dump(tokens, 0); + llvm::Module *irmod = tgsi_to_llvmir(ir, tokens); + llvm::Module *mod = tgsi_to_llvm(ir, tokens); + ir->module = mod; + gallivm_ir_dump(ir, 0); +} + +void gallivm_ir_delete(struct gallivm_ir *ir) +{ + delete ir->module; + free(ir); +} + +struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir) +{ + struct gallivm_prog *prog = + (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog)); + llvm::Module *mod = llvm::CloneModule(ir->module); + prog->num_consts = ir->num_consts; + memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators)); + prog->num_interp = ir->num_interp; + + /* Run optimization passes over it */ + PassManager passes; + passes.add(new TargetData(mod)); + AddStandardCompilePasses(passes); + passes.run(*mod); + prog->module = mod; + + return prog; +} + +#endif /* MESA_LLVM */ diff --git a/src/mesa/pipe/llvm/gallivm.h b/src/mesa/pipe/llvm/gallivm.h index 4695de3127a..b104520cb7f 100644 --- a/src/mesa/pipe/llvm/gallivm.h +++ b/src/mesa/pipe/llvm/gallivm.h @@ -43,31 +43,34 @@ extern "C" { struct tgsi_token; +struct gallivm_ir; struct gallivm_prog; struct gallivm_cpu_engine; struct tgsi_interp_coef; struct tgsi_sampler; +struct tgsi_exec_vector; enum gallivm_shader_type { GALLIVM_VS, GALLIVM_FS }; -struct gallivm_prog *gallivm_from_tgsi(const struct tgsi_token *tokens, enum gallivm_shader_type type); -void gallivm_prog_delete(struct gallivm_prog *prog); -int gallivm_prog_exec(struct gallivm_prog *prog, - float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], - float (*dests)[PIPE_MAX_SHADER_INPUTS][4], - float (*consts)[4], - int num_vertices, - int num_inputs, - int num_attribs); -int gallivm_fragment_shader_exec(struct gallivm_prog *prog, - float x, float y, - float (*dests)[PIPE_MAX_SHADER_INPUTS][4], - float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], - float (*consts)[4], - struct tgsi_sampler *samplers); +enum gallivm_vector_layout { + GALLIVM_AOS, + GALLIVM_SOA +}; + +struct gallivm_ir *gallivm_ir_new(enum gallivm_shader_type type); +void gallivm_ir_set_layout(struct gallivm_ir *ir, + enum gallivm_vector_layout layout); +void gallivm_ir_set_components(struct gallivm_ir *ir, int num); +void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir, + const struct tgsi_token *tokens); +void gallivm_ir_delete(struct gallivm_ir *ir); + + +struct gallivm_prog *gallivm_ir_compile(struct gallivm_ir *ir); + void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog, float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], const struct tgsi_interp_coef *coefs); @@ -76,9 +79,20 @@ void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix); struct gallivm_cpu_engine *gallivm_cpu_engine_create(struct gallivm_prog *prog); struct gallivm_cpu_engine *gallivm_global_cpu_engine(); +int gallivm_cpu_vs_exec(struct gallivm_prog *prog, + struct tgsi_exec_vector *inputs, + struct tgsi_exec_vector *dests, + float (*consts)[4]); +int gallivm_cpu_fs_exec(struct gallivm_prog *prog, + float x, float y, + float (*dests)[PIPE_MAX_SHADER_INPUTS][4], + float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], + float (*consts)[4], + struct tgsi_sampler *samplers); void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *ee, struct gallivm_prog *prog); void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *ee); + #endif /* MESA_LLVM */ #if defined __cplusplus diff --git a/src/mesa/pipe/llvm/gallivm_cpu.cpp b/src/mesa/pipe/llvm/gallivm_cpu.cpp new file mode 100644 index 00000000000..5f1268bf4f8 --- /dev/null +++ b/src/mesa/pipe/llvm/gallivm_cpu.cpp @@ -0,0 +1,204 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin [email protected] + */ +#ifdef MESA_LLVM + +#include "gallivm.h" +#include "gallivm_p.h" + +#include "instructions.h" +#include "loweringpass.h" +#include "storage.h" +#include "tgsitollvm.h" + +#include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" + +#include "pipe/tgsi/exec/tgsi_exec.h" +#include "pipe/tgsi/util/tgsi_dump.h" + +#include <llvm/Module.h> +#include <llvm/CallingConv.h> +#include <llvm/Constants.h> +#include <llvm/DerivedTypes.h> +#include <llvm/Instructions.h> +#include <llvm/ModuleProvider.h> +#include <llvm/Pass.h> +#include <llvm/PassManager.h> +#include <llvm/ParameterAttributes.h> +#include <llvm/Support/PatternMatch.h> +#include <llvm/ExecutionEngine/JIT.h> +#include <llvm/ExecutionEngine/Interpreter.h> +#include <llvm/ExecutionEngine/GenericValue.h> +#include <llvm/Support/MemoryBuffer.h> +#include <llvm/LinkAllPasses.h> +#include <llvm/Analysis/Verifier.h> +#include <llvm/Analysis/LoopPass.h> +#include <llvm/Target/TargetData.h> +#include <llvm/Bitcode/ReaderWriter.h> +#include <llvm/Transforms/Utils/Cloning.h> + +#include <sstream> +#include <fstream> +#include <iostream> + +struct gallivm_cpu_engine { + llvm::ExecutionEngine *engine; +}; + +static struct gallivm_cpu_engine *CPU = 0; + +typedef int (*fragment_shader_runner)(float x, float y, + float (*dests)[16][4], + float (*inputs)[16][4], + int num_attribs, + float (*consts)[4], int num_consts, + struct tgsi_sampler *samplers); + +int gallivm_cpu_fs_exec(struct gallivm_prog *prog, + float fx, float fy, + float (*dests)[16][4], + float (*inputs)[16][4], + float (*consts)[4], + struct tgsi_sampler *samplers) +{ + fragment_shader_runner runner = reinterpret_cast<fragment_shader_runner>(prog->function); + assert(runner); + + return runner(fx, fy, dests, inputs, prog->num_interp, + consts, prog->num_consts, + samplers); +} + +static inline llvm::Function *func_for_shader(struct gallivm_prog *prog) +{ + llvm::Module *mod = prog->module; + llvm::Function *func = 0; + + switch (prog->type) { + case GALLIVM_VS: + func = mod->getFunction("run_vertex_shader"); + break; + case GALLIVM_FS: + func = mod->getFunction("run_fragment_shader"); + break; + default: + assert(!"Unknown shader type!"); + break; + } + return func; +} + +/*! + This function creates a CPU based execution engine for the given gallivm_prog. + gallivm_cpu_engine should be used as a singleton throughout the library. Before + executing gallivm_prog_exec one needs to call gallivm_cpu_jit_compile. + The gallivm_prog instance which is being passed to the constructor is being + automatically JIT compiled so one shouldn't call gallivm_cpu_jit_compile + with it again. + */ +struct gallivm_cpu_engine * gallivm_cpu_engine_create(struct gallivm_prog *prog) +{ + struct gallivm_cpu_engine *cpu = (struct gallivm_cpu_engine *) + calloc(1, sizeof(struct gallivm_cpu_engine)); + llvm::Module *mod = static_cast<llvm::Module*>(prog->module); + llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); + llvm::ExecutionEngine *ee = llvm::ExecutionEngine::create(mp, false); + ee->DisableLazyCompilation(); + cpu->engine = ee; + + llvm::Function *func = func_for_shader(prog); + + prog->function = ee->getPointerToFunction(func); + CPU = cpu; + return cpu; +} + + +/*! + This function JIT compiles the given gallivm_prog with the given cpu based execution engine. + The reference to the generated machine code entry point will be stored + in the gallivm_prog program. After executing this function one can call gallivm_prog_exec + in order to execute the gallivm_prog on the CPU. + */ +void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog *prog) +{ + llvm::Module *mod = static_cast<llvm::Module*>(prog->module); + llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); + llvm::ExecutionEngine *ee = cpu->engine; + assert(ee); + /*FIXME : remove */ + ee->DisableLazyCompilation(); + ee->addModuleProvider(mp); + + llvm::Function *func = func_for_shader(prog); + prog->function = ee->getPointerToFunction(func); +} + +void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *cpu) +{ + free(cpu); +} + +struct gallivm_cpu_engine * gallivm_global_cpu_engine() +{ + return CPU; +} + + +typedef void (*vertex_shader_runner)(void *ainputs, + void *dests, + float (*aconsts)[4], + int num_vertices, + int num_inputs, + int num_attribs, + int num_consts); + + +/*! + This function is used to execute the gallivm_prog in software. Before calling + this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile + function. + */ +int gallivm_cpu_vs_exec(struct gallivm_prog *prog, + struct tgsi_exec_vector *inputs, + struct tgsi_exec_vector *dests, + float (*consts)[4]) +{ + vertex_shader_runner runner = reinterpret_cast<vertex_shader_runner>(prog->function); + assert(runner); + /*FIXME*/ + runner(inputs, dests, consts, 4, 4, 4, prog->num_consts); + + return 0; +} + +#endif diff --git a/src/mesa/pipe/llvm/gallivm_p.h b/src/mesa/pipe/llvm/gallivm_p.h new file mode 100644 index 00000000000..2c6e5e8f5f2 --- /dev/null +++ b/src/mesa/pipe/llvm/gallivm_p.h @@ -0,0 +1,56 @@ +#ifndef GALLIVM_P_H +#define GALLIVM_P_H + +#ifdef MESA_LLVM + +namespace llvm { + class Module; +} + +#if defined __cplusplus +extern "C" { +#endif + +enum gallivm_shader_type; +enum gallivm_vector_layout; + +struct gallivm_interpolate { + int attrib; + int chan; + int type; +}; + +struct gallivm_ir { + llvm::Module *module; + int id; + enum gallivm_shader_type type; + enum gallivm_vector_layout layout; + int num_components; + int num_consts; + + //FIXME: this might not be enough for some shaders + struct gallivm_interpolate interpolators[32*4]; + int num_interp; +}; + +struct gallivm_prog { + llvm::Module *module; + void *function; + + int id; + enum gallivm_shader_type type; + + int num_consts; + + //FIXME: this might not be enough for some shaders + struct gallivm_interpolate interpolators[32*4]; + int num_interp; +}; + +#endif /* MESA_LLVM */ + +#if defined __cplusplus +} // extern "C" +#endif + +#endif diff --git a/src/mesa/pipe/llvm/instructionssoa.cpp b/src/mesa/pipe/llvm/instructionssoa.cpp new file mode 100644 index 00000000000..9ac4d8fbc73 --- /dev/null +++ b/src/mesa/pipe/llvm/instructionssoa.cpp @@ -0,0 +1,26 @@ +#include "instructionssoa.h" + +InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func, + llvm::BasicBlock *block, StorageSoa *storage) +{ +} + +std::vector<llvm::Value*> InstructionsSoa::add(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<llvm::Value*> res(4); + + return res; +} + +std::vector<llvm::Value*> InstructionsSoa::mul(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<llvm::Value*> res(4); + + return res; +} + +void InstructionsSoa::end() +{ +} diff --git a/src/mesa/pipe/llvm/instructionssoa.h b/src/mesa/pipe/llvm/instructionssoa.h new file mode 100644 index 00000000000..0b6b41cf056 --- /dev/null +++ b/src/mesa/pipe/llvm/instructionssoa.h @@ -0,0 +1,55 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INSTRUCTIONSSOA_H +#define INSTRUCTIONSSOA_H + +#include <vector> + +namespace llvm { + class Module; + class Function; + class BasicBlock; + class Value; +} +class StorageSoa; + +class InstructionsSoa +{ +public: + InstructionsSoa(llvm::Module *mod, llvm::Function *func, + llvm::BasicBlock *block, StorageSoa *storage); + + std::vector<llvm::Value*> add(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> mul(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + void end(); +}; + + +#endif diff --git a/src/mesa/pipe/llvm/llvm_base_shader.cpp b/src/mesa/pipe/llvm/llvm_base_shader.cpp index b574b550aec..90a25a440ab 100644 --- a/src/mesa/pipe/llvm/llvm_base_shader.cpp +++ b/src/mesa/pipe/llvm/llvm_base_shader.cpp @@ -104,8 +104,8 @@ Module* createBaseShader() { /*isVarArg=*/false); std::vector<const Type*>FuncTy_18_args; - FuncTy_18_args.push_back(PointerTy_12); - FuncTy_18_args.push_back(PointerTy_12); + FuncTy_18_args.push_back(PointerTy_9); + FuncTy_18_args.push_back(PointerTy_9); FuncTy_18_args.push_back(PointerTy_7); FuncTy_18_args.push_back(IntegerType::get(32)); FuncTy_18_args.push_back(IntegerType::get(32)); @@ -526,14 +526,23 @@ Module* createBaseShader() { BasicBlock* label_forbody_preheader_i = new BasicBlock("forbody.preheader.i",func_run_vertex_shader,0); BasicBlock* label_forbody_i = new BasicBlock("forbody.i",func_run_vertex_shader,0); BasicBlock* label_from_consts_exit = new BasicBlock("from_consts.exit",func_run_vertex_shader,0); - BasicBlock* label_forbody_preheader_91 = new BasicBlock("forbody.preheader",func_run_vertex_shader,0); - BasicBlock* label_forbody_92 = new BasicBlock("forbody",func_run_vertex_shader,0); - BasicBlock* label_afterfor_93 = new BasicBlock("afterfor",func_run_vertex_shader,0); // Block entry (label_entry_90) AllocaInst* ptr_consts = new AllocaInst(ArrayTy_20, "consts", label_entry_90); AllocaInst* ptr_temps = new AllocaInst(ArrayTy_22, "temps", label_entry_90); AllocaInst* ptr_args = new AllocaInst(StructTy_struct_ShaderInput, "args", label_entry_90); + std::vector<Value*> ptr_tmp_indices; + ptr_tmp_indices.push_back(const_int32_29); + ptr_tmp_indices.push_back(const_int32_29); + Instruction* ptr_tmp = new GetElementPtrInst(ptr_args, ptr_tmp_indices.begin(), ptr_tmp_indices.end(), "tmp", label_entry_90); + CastInst* ptr_conv = new BitCastInst(ptr_results, PointerTy_0, "conv", label_entry_90); + StoreInst* void_91 = new StoreInst(ptr_conv, ptr_tmp, false, label_entry_90); + std::vector<Value*> ptr_tmp2_indices; + ptr_tmp2_indices.push_back(const_int32_29); + ptr_tmp2_indices.push_back(const_int32_31); + Instruction* ptr_tmp2 = new GetElementPtrInst(ptr_args, ptr_tmp2_indices.begin(), ptr_tmp2_indices.end(), "tmp2", label_entry_90); + CastInst* ptr_conv4 = new BitCastInst(ptr_inputs, PointerTy_0, "conv4", label_entry_90); + StoreInst* void_92 = new StoreInst(ptr_conv4, ptr_tmp2, false, label_entry_90); ICmpInst* int1_cmp_i = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_consts, const_int32_29, "cmp.i", label_entry_90); new BranchInst(label_forbody_preheader_i, label_from_consts_exit, int1_cmp_i, label_entry_90); @@ -544,17 +553,17 @@ Module* createBaseShader() { new BranchInst(label_forbody_i, label_forbody_preheader_i); // Block forbody.i (label_forbody_i) - Argument* fwdref_96 = new Argument(IntegerType::get(32)); + Argument* fwdref_95 = new Argument(IntegerType::get(32)); PHINode* int32_i_0_reg2mem_0_i = new PHINode(IntegerType::get(32), "i.0.reg2mem.0.i", label_forbody_i); int32_i_0_reg2mem_0_i->reserveOperandSpace(2); int32_i_0_reg2mem_0_i->addIncoming(const_int32_29, label_forbody_preheader_i); - int32_i_0_reg2mem_0_i->addIncoming(fwdref_96, label_forbody_i); + int32_i_0_reg2mem_0_i->addIncoming(fwdref_95, label_forbody_i); - Argument* fwdref_97 = new Argument(VectorTy_1); + Argument* fwdref_96 = new Argument(VectorTy_1); PHINode* packed_vec_0_reg2mem_0_i = new PHINode(VectorTy_1, "vec.0.reg2mem.0.i", label_forbody_i); packed_vec_0_reg2mem_0_i->reserveOperandSpace(2); packed_vec_0_reg2mem_0_i->addIncoming(const_packed_32, label_forbody_preheader_i); - packed_vec_0_reg2mem_0_i->addIncoming(fwdref_97, label_forbody_i); + packed_vec_0_reg2mem_0_i->addIncoming(fwdref_96, label_forbody_i); std::vector<Value*> ptr_arraydecay_i_indices; ptr_arraydecay_i_indices.push_back(int32_i_0_reg2mem_0_i); @@ -584,80 +593,40 @@ Module* createBaseShader() { ptr_arrayidx34_i_indices.push_back(const_int32_29); ptr_arrayidx34_i_indices.push_back(int32_i_0_reg2mem_0_i); Instruction* ptr_arrayidx34_i = new GetElementPtrInst(ptr_consts, ptr_arrayidx34_i_indices.begin(), ptr_arrayidx34_i_indices.end(), "arrayidx34.i", label_forbody_i); - StoreInst* void_98 = new StoreInst(packed_tmp31_i, ptr_arrayidx34_i, false, label_forbody_i); - BinaryOperator* int32_indvar_next8 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_i, const_int32_31, "indvar.next8", label_forbody_i); - ICmpInst* int1_exitcond9 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next8, int32_tmp10_i, "exitcond9", label_forbody_i); - new BranchInst(label_from_consts_exit, label_forbody_i, int1_exitcond9, label_forbody_i); + StoreInst* void_97 = new StoreInst(packed_tmp31_i, ptr_arrayidx34_i, false, label_forbody_i); + BinaryOperator* int32_indvar_next_98 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_i, const_int32_31, "indvar.next", label_forbody_i); + ICmpInst* int1_exitcond_99 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next_98, int32_tmp10_i, "exitcond", label_forbody_i); + new BranchInst(label_from_consts_exit, label_forbody_i, int1_exitcond_99, label_forbody_i); // Block from_consts.exit (label_from_consts_exit) - std::vector<Value*> ptr_tmp2_indices; - ptr_tmp2_indices.push_back(const_int32_29); - ptr_tmp2_indices.push_back(const_int32_34); - Instruction* ptr_tmp2 = new GetElementPtrInst(ptr_args, ptr_tmp2_indices.begin(), ptr_tmp2_indices.end(), "tmp2", label_from_consts_exit); - std::vector<Value*> ptr_arraydecay3_indices; - ptr_arraydecay3_indices.push_back(const_int32_29); - ptr_arraydecay3_indices.push_back(const_int32_29); - Instruction* ptr_arraydecay3 = new GetElementPtrInst(ptr_consts, ptr_arraydecay3_indices.begin(), ptr_arraydecay3_indices.end(), "arraydecay3", label_from_consts_exit); - StoreInst* void_100 = new StoreInst(ptr_arraydecay3, ptr_tmp2, false, label_from_consts_exit); - std::vector<Value*> ptr_tmp4_indices; - ptr_tmp4_indices.push_back(const_int32_29); - ptr_tmp4_indices.push_back(const_int32_33); - Instruction* ptr_tmp4 = new GetElementPtrInst(ptr_args, ptr_tmp4_indices.begin(), ptr_tmp4_indices.end(), "tmp4", label_from_consts_exit); - std::vector<Value*> ptr_arraydecay5_indices; - ptr_arraydecay5_indices.push_back(const_int32_29); - ptr_arraydecay5_indices.push_back(const_int32_29); - Instruction* ptr_arraydecay5 = new GetElementPtrInst(ptr_temps, ptr_arraydecay5_indices.begin(), ptr_arraydecay5_indices.end(), "arraydecay5", label_from_consts_exit); - StoreInst* void_101 = new StoreInst(ptr_arraydecay5, ptr_tmp4, false, label_from_consts_exit); - ICmpInst* int1_cmp_102 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_vertices, const_int32_29, "cmp", label_from_consts_exit); - new BranchInst(label_forbody_preheader_91, label_afterfor_93, int1_cmp_102, label_from_consts_exit); - - // Block forbody.preheader (label_forbody_preheader_91) - std::vector<Value*> ptr_tmp8_indices; - ptr_tmp8_indices.push_back(const_int32_29); - ptr_tmp8_indices.push_back(const_int32_29); - Instruction* ptr_tmp8 = new GetElementPtrInst(ptr_args, ptr_tmp8_indices.begin(), ptr_tmp8_indices.end(), "tmp8", label_forbody_preheader_91); - std::vector<Value*> ptr_tmp12_indices; - ptr_tmp12_indices.push_back(const_int32_29); - ptr_tmp12_indices.push_back(const_int32_31); - Instruction* ptr_tmp12 = new GetElementPtrInst(ptr_args, ptr_tmp12_indices.begin(), ptr_tmp12_indices.end(), "tmp12", label_forbody_preheader_91); - BinaryOperator* int32_tmp_104 = BinaryOperator::create(Instruction::Add, int32_num_vertices, const_int32_30, "tmp", label_forbody_preheader_91); - ICmpInst* int1_tmp6 = new ICmpInst(ICmpInst::ICMP_SLT, int32_tmp_104, const_int32_29, "tmp6", label_forbody_preheader_91); - SelectInst* int32_tmp7 = new SelectInst(int1_tmp6, const_int32_31, int32_num_vertices, "tmp7", label_forbody_preheader_91); - new BranchInst(label_forbody_92, label_forbody_preheader_91); - - // Block forbody (label_forbody_92) - Argument* fwdref_107 = new Argument(IntegerType::get(32)); - PHINode* int32_i_0_reg2mem_0_106 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0", label_forbody_92); - int32_i_0_reg2mem_0_106->reserveOperandSpace(2); - int32_i_0_reg2mem_0_106->addIncoming(const_int32_29, label_forbody_preheader_91); - int32_i_0_reg2mem_0_106->addIncoming(fwdref_107, label_forbody_92); - - std::vector<Value*> ptr_arraydecay11_108_indices; - ptr_arraydecay11_108_indices.push_back(int32_i_0_reg2mem_0_106); - ptr_arraydecay11_108_indices.push_back(const_int32_29); - Instruction* ptr_arraydecay11_108 = new GetElementPtrInst(ptr_results, ptr_arraydecay11_108_indices.begin(), ptr_arraydecay11_108_indices.end(), "arraydecay11", label_forbody_92); - StoreInst* void_109 = new StoreInst(ptr_arraydecay11_108, ptr_tmp8, false, label_forbody_92); - std::vector<Value*> ptr_arraydecay16_indices; - ptr_arraydecay16_indices.push_back(int32_i_0_reg2mem_0_106); - ptr_arraydecay16_indices.push_back(const_int32_29); - Instruction* ptr_arraydecay16 = new GetElementPtrInst(ptr_inputs, ptr_arraydecay16_indices.begin(), ptr_arraydecay16_indices.end(), "arraydecay16", label_forbody_92); - StoreInst* void_110 = new StoreInst(ptr_arraydecay16, ptr_tmp12, false, label_forbody_92); - CallInst* void_111 = new CallInst(func_execute_shader, ptr_args, "", label_forbody_92); - void_111->setCallingConv(CallingConv::C); - void_111->setTailCall(false);const ParamAttrsList *void_111_PAL = 0; - void_111->setParamAttrs(void_111_PAL); - - BinaryOperator* int32_indvar_next_112 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_106, const_int32_31, "indvar.next", label_forbody_92); - ICmpInst* int1_exitcond_113 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next_112, int32_tmp7, "exitcond", label_forbody_92); - new BranchInst(label_afterfor_93, label_forbody_92, int1_exitcond_113, label_forbody_92); - - // Block afterfor (label_afterfor_93) - new ReturnInst(label_afterfor_93); + std::vector<Value*> ptr_tmp7_indices; + ptr_tmp7_indices.push_back(const_int32_29); + ptr_tmp7_indices.push_back(const_int32_34); + Instruction* ptr_tmp7 = new GetElementPtrInst(ptr_args, ptr_tmp7_indices.begin(), ptr_tmp7_indices.end(), "tmp7", label_from_consts_exit); + std::vector<Value*> ptr_arraydecay8_indices; + ptr_arraydecay8_indices.push_back(const_int32_29); + ptr_arraydecay8_indices.push_back(const_int32_29); + Instruction* ptr_arraydecay8 = new GetElementPtrInst(ptr_consts, ptr_arraydecay8_indices.begin(), ptr_arraydecay8_indices.end(), "arraydecay8", label_from_consts_exit); + StoreInst* void_101 = new StoreInst(ptr_arraydecay8, ptr_tmp7, false, label_from_consts_exit); + std::vector<Value*> ptr_tmp9_indices; + ptr_tmp9_indices.push_back(const_int32_29); + ptr_tmp9_indices.push_back(const_int32_33); + Instruction* ptr_tmp9 = new GetElementPtrInst(ptr_args, ptr_tmp9_indices.begin(), ptr_tmp9_indices.end(), "tmp9", label_from_consts_exit); + std::vector<Value*> ptr_arraydecay10_indices; + ptr_arraydecay10_indices.push_back(const_int32_29); + ptr_arraydecay10_indices.push_back(const_int32_29); + Instruction* ptr_arraydecay10 = new GetElementPtrInst(ptr_temps, ptr_arraydecay10_indices.begin(), ptr_arraydecay10_indices.end(), "arraydecay10", label_from_consts_exit); + StoreInst* void_102 = new StoreInst(ptr_arraydecay10, ptr_tmp9, false, label_from_consts_exit); + CallInst* void_103 = new CallInst(func_execute_shader, ptr_args, "", label_from_consts_exit); + void_103->setCallingConv(CallingConv::C); + void_103->setTailCall(false);const ParamAttrsList *void_103_PAL = 0; + void_103->setParamAttrs(void_103_PAL); + + new ReturnInst(label_from_consts_exit); // Resolve Forward References - fwdref_107->replaceAllUsesWith(int32_indvar_next_112); delete fwdref_107; - fwdref_97->replaceAllUsesWith(packed_tmp31_i); delete fwdref_97; - fwdref_96->replaceAllUsesWith(int32_indvar_next8); delete fwdref_96; + fwdref_96->replaceAllUsesWith(packed_tmp31_i); delete fwdref_96; + fwdref_95->replaceAllUsesWith(int32_indvar_next_98); delete fwdref_95; } @@ -668,195 +637,195 @@ Module* createBaseShader() { float_x->setName("x"); Value* float_y = args++; float_y->setName("y"); - Value* ptr_results_116 = args++; - ptr_results_116->setName("results"); - Value* ptr_inputs_117 = args++; - ptr_inputs_117->setName("inputs"); - Value* int32_num_inputs_118 = args++; - int32_num_inputs_118->setName("num_inputs"); - Value* ptr_aconsts_119 = args++; - ptr_aconsts_119->setName("aconsts"); - Value* int32_num_consts_120 = args++; - int32_num_consts_120->setName("num_consts"); + Value* ptr_results_105 = args++; + ptr_results_105->setName("results"); + Value* ptr_inputs_106 = args++; + ptr_inputs_106->setName("inputs"); + Value* int32_num_inputs_107 = args++; + int32_num_inputs_107->setName("num_inputs"); + Value* ptr_aconsts_108 = args++; + ptr_aconsts_108->setName("aconsts"); + Value* int32_num_consts_109 = args++; + int32_num_consts_109->setName("num_consts"); Value* ptr_samplers = args++; ptr_samplers->setName("samplers"); - BasicBlock* label_entry_121 = new BasicBlock("entry",func_run_fragment_shader,0); - BasicBlock* label_forbody_preheader_i_122 = new BasicBlock("forbody.preheader.i",func_run_fragment_shader,0); - BasicBlock* label_forbody_i_123 = new BasicBlock("forbody.i",func_run_fragment_shader,0); - BasicBlock* label_from_consts_exit_124 = new BasicBlock("from_consts.exit",func_run_fragment_shader,0); - - // Block entry (label_entry_121) - AllocaInst* ptr_consts_125 = new AllocaInst(ArrayTy_20, "consts", label_entry_121); - AllocaInst* ptr_temps_126 = new AllocaInst(ArrayTy_22, "temps", label_entry_121); - AllocaInst* ptr_args_127 = new AllocaInst(StructTy_struct_ShaderInput, "args", label_entry_121); - std::vector<Value*> ptr_tmp_indices; - ptr_tmp_indices.push_back(const_int32_29); - ptr_tmp_indices.push_back(const_int32_35); - Instruction* ptr_tmp = new GetElementPtrInst(ptr_args_127, ptr_tmp_indices.begin(), ptr_tmp_indices.end(), "tmp", label_entry_121); - StoreInst* void_128 = new StoreInst(const_int32_29, ptr_tmp, false, label_entry_121); - ICmpInst* int1_cmp_i_129 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_consts_120, const_int32_29, "cmp.i", label_entry_121); - new BranchInst(label_forbody_preheader_i_122, label_from_consts_exit_124, int1_cmp_i_129, label_entry_121); - - // Block forbody.preheader.i (label_forbody_preheader_i_122) - BinaryOperator* int32_tmp_i_131 = BinaryOperator::create(Instruction::Add, int32_num_consts_120, const_int32_30, "tmp.i", label_forbody_preheader_i_122); - ICmpInst* int1_tmp9_i_132 = new ICmpInst(ICmpInst::ICMP_SLT, int32_tmp_i_131, const_int32_29, "tmp9.i", label_forbody_preheader_i_122); - SelectInst* int32_tmp10_i_133 = new SelectInst(int1_tmp9_i_132, const_int32_31, int32_num_consts_120, "tmp10.i", label_forbody_preheader_i_122); - new BranchInst(label_forbody_i_123, label_forbody_preheader_i_122); - - // Block forbody.i (label_forbody_i_123) - Argument* fwdref_136 = new Argument(IntegerType::get(32)); - PHINode* int32_i_0_reg2mem_0_i_135 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0.i", label_forbody_i_123); - int32_i_0_reg2mem_0_i_135->reserveOperandSpace(2); - int32_i_0_reg2mem_0_i_135->addIncoming(const_int32_29, label_forbody_preheader_i_122); - int32_i_0_reg2mem_0_i_135->addIncoming(fwdref_136, label_forbody_i_123); - - Argument* fwdref_138 = new Argument(VectorTy_1); - PHINode* packed_vec_0_reg2mem_0_i_137 = new PHINode(VectorTy_1, "vec.0.reg2mem.0.i", label_forbody_i_123); - packed_vec_0_reg2mem_0_i_137->reserveOperandSpace(2); - packed_vec_0_reg2mem_0_i_137->addIncoming(const_packed_32, label_forbody_preheader_i_122); - packed_vec_0_reg2mem_0_i_137->addIncoming(fwdref_138, label_forbody_i_123); - - std::vector<Value*> ptr_arraydecay_i_139_indices; - ptr_arraydecay_i_139_indices.push_back(int32_i_0_reg2mem_0_i_135); - ptr_arraydecay_i_139_indices.push_back(const_int32_29); - Instruction* ptr_arraydecay_i_139 = new GetElementPtrInst(ptr_aconsts_119, ptr_arraydecay_i_139_indices.begin(), ptr_arraydecay_i_139_indices.end(), "arraydecay.i", label_forbody_i_123); - LoadInst* float_tmp5_i_140 = new LoadInst(ptr_arraydecay_i_139, "tmp5.i", false, label_forbody_i_123); - InsertElementInst* packed_tmp7_i_141 = new InsertElementInst(packed_vec_0_reg2mem_0_i_137, float_tmp5_i_140, const_int32_29, "tmp7.i", label_forbody_i_123); - std::vector<Value*> ptr_arrayidx12_i_142_indices; - ptr_arrayidx12_i_142_indices.push_back(int32_i_0_reg2mem_0_i_135); - ptr_arrayidx12_i_142_indices.push_back(const_int32_31); - Instruction* ptr_arrayidx12_i_142 = new GetElementPtrInst(ptr_aconsts_119, ptr_arrayidx12_i_142_indices.begin(), ptr_arrayidx12_i_142_indices.end(), "arrayidx12.i", label_forbody_i_123); - LoadInst* float_tmp13_i_143 = new LoadInst(ptr_arrayidx12_i_142, "tmp13.i", false, label_forbody_i_123); - InsertElementInst* packed_tmp15_i_144 = new InsertElementInst(packed_tmp7_i_141, float_tmp13_i_143, const_int32_31, "tmp15.i", label_forbody_i_123); - std::vector<Value*> ptr_arrayidx20_i_145_indices; - ptr_arrayidx20_i_145_indices.push_back(int32_i_0_reg2mem_0_i_135); - ptr_arrayidx20_i_145_indices.push_back(const_int32_33); - Instruction* ptr_arrayidx20_i_145 = new GetElementPtrInst(ptr_aconsts_119, ptr_arrayidx20_i_145_indices.begin(), ptr_arrayidx20_i_145_indices.end(), "arrayidx20.i", label_forbody_i_123); - LoadInst* float_tmp21_i_146 = new LoadInst(ptr_arrayidx20_i_145, "tmp21.i", false, label_forbody_i_123); - InsertElementInst* packed_tmp23_i_147 = new InsertElementInst(packed_tmp15_i_144, float_tmp21_i_146, const_int32_33, "tmp23.i", label_forbody_i_123); - std::vector<Value*> ptr_arrayidx28_i_148_indices; - ptr_arrayidx28_i_148_indices.push_back(int32_i_0_reg2mem_0_i_135); - ptr_arrayidx28_i_148_indices.push_back(const_int32_34); - Instruction* ptr_arrayidx28_i_148 = new GetElementPtrInst(ptr_aconsts_119, ptr_arrayidx28_i_148_indices.begin(), ptr_arrayidx28_i_148_indices.end(), "arrayidx28.i", label_forbody_i_123); - LoadInst* float_tmp29_i_149 = new LoadInst(ptr_arrayidx28_i_148, "tmp29.i", false, label_forbody_i_123); - InsertElementInst* packed_tmp31_i_150 = new InsertElementInst(packed_tmp23_i_147, float_tmp29_i_149, const_int32_34, "tmp31.i", label_forbody_i_123); - std::vector<Value*> ptr_arrayidx34_i_151_indices; - ptr_arrayidx34_i_151_indices.push_back(const_int32_29); - ptr_arrayidx34_i_151_indices.push_back(int32_i_0_reg2mem_0_i_135); - Instruction* ptr_arrayidx34_i_151 = new GetElementPtrInst(ptr_consts_125, ptr_arrayidx34_i_151_indices.begin(), ptr_arrayidx34_i_151_indices.end(), "arrayidx34.i", label_forbody_i_123); - StoreInst* void_152 = new StoreInst(packed_tmp31_i_150, ptr_arrayidx34_i_151, false, label_forbody_i_123); - BinaryOperator* int32_indvar_next7 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_i_135, const_int32_31, "indvar.next7", label_forbody_i_123); - ICmpInst* int1_exitcond8 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next7, int32_tmp10_i_133, "exitcond8", label_forbody_i_123); - new BranchInst(label_from_consts_exit_124, label_forbody_i_123, int1_exitcond8, label_forbody_i_123); - - // Block from_consts.exit (label_from_consts_exit_124) + BasicBlock* label_entry_110 = new BasicBlock("entry",func_run_fragment_shader,0); + BasicBlock* label_forbody_preheader_i_111 = new BasicBlock("forbody.preheader.i",func_run_fragment_shader,0); + BasicBlock* label_forbody_i_112 = new BasicBlock("forbody.i",func_run_fragment_shader,0); + BasicBlock* label_from_consts_exit_113 = new BasicBlock("from_consts.exit",func_run_fragment_shader,0); + + // Block entry (label_entry_110) + AllocaInst* ptr_consts_114 = new AllocaInst(ArrayTy_20, "consts", label_entry_110); + AllocaInst* ptr_temps_115 = new AllocaInst(ArrayTy_22, "temps", label_entry_110); + AllocaInst* ptr_args_116 = new AllocaInst(StructTy_struct_ShaderInput, "args", label_entry_110); + std::vector<Value*> ptr_tmp_117_indices; + ptr_tmp_117_indices.push_back(const_int32_29); + ptr_tmp_117_indices.push_back(const_int32_35); + Instruction* ptr_tmp_117 = new GetElementPtrInst(ptr_args_116, ptr_tmp_117_indices.begin(), ptr_tmp_117_indices.end(), "tmp", label_entry_110); + StoreInst* void_118 = new StoreInst(const_int32_29, ptr_tmp_117, false, label_entry_110); + ICmpInst* int1_cmp_i_119 = new ICmpInst(ICmpInst::ICMP_SGT, int32_num_consts_109, const_int32_29, "cmp.i", label_entry_110); + new BranchInst(label_forbody_preheader_i_111, label_from_consts_exit_113, int1_cmp_i_119, label_entry_110); + + // Block forbody.preheader.i (label_forbody_preheader_i_111) + BinaryOperator* int32_tmp_i_121 = BinaryOperator::create(Instruction::Add, int32_num_consts_109, const_int32_30, "tmp.i", label_forbody_preheader_i_111); + ICmpInst* int1_tmp9_i_122 = new ICmpInst(ICmpInst::ICMP_SLT, int32_tmp_i_121, const_int32_29, "tmp9.i", label_forbody_preheader_i_111); + SelectInst* int32_tmp10_i_123 = new SelectInst(int1_tmp9_i_122, const_int32_31, int32_num_consts_109, "tmp10.i", label_forbody_preheader_i_111); + new BranchInst(label_forbody_i_112, label_forbody_preheader_i_111); + + // Block forbody.i (label_forbody_i_112) + Argument* fwdref_126 = new Argument(IntegerType::get(32)); + PHINode* int32_i_0_reg2mem_0_i_125 = new PHINode(IntegerType::get(32), "i.0.reg2mem.0.i", label_forbody_i_112); + int32_i_0_reg2mem_0_i_125->reserveOperandSpace(2); + int32_i_0_reg2mem_0_i_125->addIncoming(const_int32_29, label_forbody_preheader_i_111); + int32_i_0_reg2mem_0_i_125->addIncoming(fwdref_126, label_forbody_i_112); + + Argument* fwdref_128 = new Argument(VectorTy_1); + PHINode* packed_vec_0_reg2mem_0_i_127 = new PHINode(VectorTy_1, "vec.0.reg2mem.0.i", label_forbody_i_112); + packed_vec_0_reg2mem_0_i_127->reserveOperandSpace(2); + packed_vec_0_reg2mem_0_i_127->addIncoming(const_packed_32, label_forbody_preheader_i_111); + packed_vec_0_reg2mem_0_i_127->addIncoming(fwdref_128, label_forbody_i_112); + + std::vector<Value*> ptr_arraydecay_i_129_indices; + ptr_arraydecay_i_129_indices.push_back(int32_i_0_reg2mem_0_i_125); + ptr_arraydecay_i_129_indices.push_back(const_int32_29); + Instruction* ptr_arraydecay_i_129 = new GetElementPtrInst(ptr_aconsts_108, ptr_arraydecay_i_129_indices.begin(), ptr_arraydecay_i_129_indices.end(), "arraydecay.i", label_forbody_i_112); + LoadInst* float_tmp5_i_130 = new LoadInst(ptr_arraydecay_i_129, "tmp5.i", false, label_forbody_i_112); + InsertElementInst* packed_tmp7_i_131 = new InsertElementInst(packed_vec_0_reg2mem_0_i_127, float_tmp5_i_130, const_int32_29, "tmp7.i", label_forbody_i_112); + std::vector<Value*> ptr_arrayidx12_i_132_indices; + ptr_arrayidx12_i_132_indices.push_back(int32_i_0_reg2mem_0_i_125); + ptr_arrayidx12_i_132_indices.push_back(const_int32_31); + Instruction* ptr_arrayidx12_i_132 = new GetElementPtrInst(ptr_aconsts_108, ptr_arrayidx12_i_132_indices.begin(), ptr_arrayidx12_i_132_indices.end(), "arrayidx12.i", label_forbody_i_112); + LoadInst* float_tmp13_i_133 = new LoadInst(ptr_arrayidx12_i_132, "tmp13.i", false, label_forbody_i_112); + InsertElementInst* packed_tmp15_i_134 = new InsertElementInst(packed_tmp7_i_131, float_tmp13_i_133, const_int32_31, "tmp15.i", label_forbody_i_112); + std::vector<Value*> ptr_arrayidx20_i_135_indices; + ptr_arrayidx20_i_135_indices.push_back(int32_i_0_reg2mem_0_i_125); + ptr_arrayidx20_i_135_indices.push_back(const_int32_33); + Instruction* ptr_arrayidx20_i_135 = new GetElementPtrInst(ptr_aconsts_108, ptr_arrayidx20_i_135_indices.begin(), ptr_arrayidx20_i_135_indices.end(), "arrayidx20.i", label_forbody_i_112); + LoadInst* float_tmp21_i_136 = new LoadInst(ptr_arrayidx20_i_135, "tmp21.i", false, label_forbody_i_112); + InsertElementInst* packed_tmp23_i_137 = new InsertElementInst(packed_tmp15_i_134, float_tmp21_i_136, const_int32_33, "tmp23.i", label_forbody_i_112); + std::vector<Value*> ptr_arrayidx28_i_138_indices; + ptr_arrayidx28_i_138_indices.push_back(int32_i_0_reg2mem_0_i_125); + ptr_arrayidx28_i_138_indices.push_back(const_int32_34); + Instruction* ptr_arrayidx28_i_138 = new GetElementPtrInst(ptr_aconsts_108, ptr_arrayidx28_i_138_indices.begin(), ptr_arrayidx28_i_138_indices.end(), "arrayidx28.i", label_forbody_i_112); + LoadInst* float_tmp29_i_139 = new LoadInst(ptr_arrayidx28_i_138, "tmp29.i", false, label_forbody_i_112); + InsertElementInst* packed_tmp31_i_140 = new InsertElementInst(packed_tmp23_i_137, float_tmp29_i_139, const_int32_34, "tmp31.i", label_forbody_i_112); + std::vector<Value*> ptr_arrayidx34_i_141_indices; + ptr_arrayidx34_i_141_indices.push_back(const_int32_29); + ptr_arrayidx34_i_141_indices.push_back(int32_i_0_reg2mem_0_i_125); + Instruction* ptr_arrayidx34_i_141 = new GetElementPtrInst(ptr_consts_114, ptr_arrayidx34_i_141_indices.begin(), ptr_arrayidx34_i_141_indices.end(), "arrayidx34.i", label_forbody_i_112); + StoreInst* void_142 = new StoreInst(packed_tmp31_i_140, ptr_arrayidx34_i_141, false, label_forbody_i_112); + BinaryOperator* int32_indvar_next7 = BinaryOperator::create(Instruction::Add, int32_i_0_reg2mem_0_i_125, const_int32_31, "indvar.next7", label_forbody_i_112); + ICmpInst* int1_exitcond8 = new ICmpInst(ICmpInst::ICMP_EQ, int32_indvar_next7, int32_tmp10_i_123, "exitcond8", label_forbody_i_112); + new BranchInst(label_from_consts_exit_113, label_forbody_i_112, int1_exitcond8, label_forbody_i_112); + + // Block from_consts.exit (label_from_consts_exit_113) std::vector<Value*> ptr_tmp3_indices; ptr_tmp3_indices.push_back(const_int32_29); ptr_tmp3_indices.push_back(const_int32_34); - Instruction* ptr_tmp3 = new GetElementPtrInst(ptr_args_127, ptr_tmp3_indices.begin(), ptr_tmp3_indices.end(), "tmp3", label_from_consts_exit_124); + Instruction* ptr_tmp3 = new GetElementPtrInst(ptr_args_116, ptr_tmp3_indices.begin(), ptr_tmp3_indices.end(), "tmp3", label_from_consts_exit_113); std::vector<Value*> ptr_arraydecay4_indices; ptr_arraydecay4_indices.push_back(const_int32_29); ptr_arraydecay4_indices.push_back(const_int32_29); - Instruction* ptr_arraydecay4 = new GetElementPtrInst(ptr_consts_125, ptr_arraydecay4_indices.begin(), ptr_arraydecay4_indices.end(), "arraydecay4", label_from_consts_exit_124); - StoreInst* void_154 = new StoreInst(ptr_arraydecay4, ptr_tmp3, false, label_from_consts_exit_124); + Instruction* ptr_arraydecay4 = new GetElementPtrInst(ptr_consts_114, ptr_arraydecay4_indices.begin(), ptr_arraydecay4_indices.end(), "arraydecay4", label_from_consts_exit_113); + StoreInst* void_144 = new StoreInst(ptr_arraydecay4, ptr_tmp3, false, label_from_consts_exit_113); std::vector<Value*> ptr_tmp5_indices; ptr_tmp5_indices.push_back(const_int32_29); ptr_tmp5_indices.push_back(const_int32_33); - Instruction* ptr_tmp5 = new GetElementPtrInst(ptr_args_127, ptr_tmp5_indices.begin(), ptr_tmp5_indices.end(), "tmp5", label_from_consts_exit_124); + Instruction* ptr_tmp5 = new GetElementPtrInst(ptr_args_116, ptr_tmp5_indices.begin(), ptr_tmp5_indices.end(), "tmp5", label_from_consts_exit_113); std::vector<Value*> ptr_arraydecay6_indices; ptr_arraydecay6_indices.push_back(const_int32_29); ptr_arraydecay6_indices.push_back(const_int32_29); - Instruction* ptr_arraydecay6 = new GetElementPtrInst(ptr_temps_126, ptr_arraydecay6_indices.begin(), ptr_arraydecay6_indices.end(), "arraydecay6", label_from_consts_exit_124); - StoreInst* void_155 = new StoreInst(ptr_arraydecay6, ptr_tmp5, false, label_from_consts_exit_124); - std::vector<Value*> ptr_tmp8_156_indices; - ptr_tmp8_156_indices.push_back(const_int32_29); - ptr_tmp8_156_indices.push_back(const_int32_31); - Instruction* ptr_tmp8_156 = new GetElementPtrInst(ptr_args_127, ptr_tmp8_156_indices.begin(), ptr_tmp8_156_indices.end(), "tmp8", label_from_consts_exit_124); - std::vector<Value*> ptr_tmp12_157_indices; - ptr_tmp12_157_indices.push_back(const_int32_29); - ptr_tmp12_157_indices.push_back(const_int32_29); - Instruction* ptr_tmp12_157 = new GetElementPtrInst(ptr_args_127, ptr_tmp12_157_indices.begin(), ptr_tmp12_157_indices.end(), "tmp12", label_from_consts_exit_124); - std::vector<Value*> ptr_arraydecay11_158_indices; - ptr_arraydecay11_158_indices.push_back(const_int32_29); - ptr_arraydecay11_158_indices.push_back(const_int32_29); - Instruction* ptr_arraydecay11_158 = new GetElementPtrInst(ptr_inputs_117, ptr_arraydecay11_158_indices.begin(), ptr_arraydecay11_158_indices.end(), "arraydecay11", label_from_consts_exit_124); - StoreInst* void_159 = new StoreInst(ptr_arraydecay11_158, ptr_tmp8_156, false, label_from_consts_exit_124); - std::vector<Value*> ptr_arraydecay16_160_indices; - ptr_arraydecay16_160_indices.push_back(const_int32_29); - ptr_arraydecay16_160_indices.push_back(const_int32_29); - Instruction* ptr_arraydecay16_160 = new GetElementPtrInst(ptr_results_116, ptr_arraydecay16_160_indices.begin(), ptr_arraydecay16_160_indices.end(), "arraydecay16", label_from_consts_exit_124); - StoreInst* void_161 = new StoreInst(ptr_arraydecay16_160, ptr_tmp12_157, false, label_from_consts_exit_124); - StoreInst* void_162 = new StoreInst(const_int32_29, ptr_tmp, false, label_from_consts_exit_124); - CallInst* void_163 = new CallInst(func_execute_shader, ptr_args_127, "", label_from_consts_exit_124); - void_163->setCallingConv(CallingConv::C); - void_163->setTailCall(false);const ParamAttrsList *void_163_PAL = 0; - void_163->setParamAttrs(void_163_PAL); - - LoadInst* int32_tmp23 = new LoadInst(ptr_tmp, "tmp23", false, label_from_consts_exit_124); + Instruction* ptr_arraydecay6 = new GetElementPtrInst(ptr_temps_115, ptr_arraydecay6_indices.begin(), ptr_arraydecay6_indices.end(), "arraydecay6", label_from_consts_exit_113); + StoreInst* void_145 = new StoreInst(ptr_arraydecay6, ptr_tmp5, false, label_from_consts_exit_113); + std::vector<Value*> ptr_tmp8_indices; + ptr_tmp8_indices.push_back(const_int32_29); + ptr_tmp8_indices.push_back(const_int32_31); + Instruction* ptr_tmp8 = new GetElementPtrInst(ptr_args_116, ptr_tmp8_indices.begin(), ptr_tmp8_indices.end(), "tmp8", label_from_consts_exit_113); + std::vector<Value*> ptr_tmp12_indices; + ptr_tmp12_indices.push_back(const_int32_29); + ptr_tmp12_indices.push_back(const_int32_29); + Instruction* ptr_tmp12 = new GetElementPtrInst(ptr_args_116, ptr_tmp12_indices.begin(), ptr_tmp12_indices.end(), "tmp12", label_from_consts_exit_113); + std::vector<Value*> ptr_arraydecay11_146_indices; + ptr_arraydecay11_146_indices.push_back(const_int32_29); + ptr_arraydecay11_146_indices.push_back(const_int32_29); + Instruction* ptr_arraydecay11_146 = new GetElementPtrInst(ptr_inputs_106, ptr_arraydecay11_146_indices.begin(), ptr_arraydecay11_146_indices.end(), "arraydecay11", label_from_consts_exit_113); + StoreInst* void_147 = new StoreInst(ptr_arraydecay11_146, ptr_tmp8, false, label_from_consts_exit_113); + std::vector<Value*> ptr_arraydecay16_indices; + ptr_arraydecay16_indices.push_back(const_int32_29); + ptr_arraydecay16_indices.push_back(const_int32_29); + Instruction* ptr_arraydecay16 = new GetElementPtrInst(ptr_results_105, ptr_arraydecay16_indices.begin(), ptr_arraydecay16_indices.end(), "arraydecay16", label_from_consts_exit_113); + StoreInst* void_148 = new StoreInst(ptr_arraydecay16, ptr_tmp12, false, label_from_consts_exit_113); + StoreInst* void_149 = new StoreInst(const_int32_29, ptr_tmp_117, false, label_from_consts_exit_113); + CallInst* void_150 = new CallInst(func_execute_shader, ptr_args_116, "", label_from_consts_exit_113); + void_150->setCallingConv(CallingConv::C); + void_150->setTailCall(false);const ParamAttrsList *void_150_PAL = 0; + void_150->setParamAttrs(void_150_PAL); + + LoadInst* int32_tmp23 = new LoadInst(ptr_tmp_117, "tmp23", false, label_from_consts_exit_113); std::vector<Value*> ptr_arraydecay11_1_indices; ptr_arraydecay11_1_indices.push_back(const_int32_31); ptr_arraydecay11_1_indices.push_back(const_int32_29); - Instruction* ptr_arraydecay11_1 = new GetElementPtrInst(ptr_inputs_117, ptr_arraydecay11_1_indices.begin(), ptr_arraydecay11_1_indices.end(), "arraydecay11.1", label_from_consts_exit_124); - StoreInst* void_164 = new StoreInst(ptr_arraydecay11_1, ptr_tmp8_156, false, label_from_consts_exit_124); + Instruction* ptr_arraydecay11_1 = new GetElementPtrInst(ptr_inputs_106, ptr_arraydecay11_1_indices.begin(), ptr_arraydecay11_1_indices.end(), "arraydecay11.1", label_from_consts_exit_113); + StoreInst* void_151 = new StoreInst(ptr_arraydecay11_1, ptr_tmp8, false, label_from_consts_exit_113); std::vector<Value*> ptr_arraydecay16_1_indices; ptr_arraydecay16_1_indices.push_back(const_int32_31); ptr_arraydecay16_1_indices.push_back(const_int32_29); - Instruction* ptr_arraydecay16_1 = new GetElementPtrInst(ptr_results_116, ptr_arraydecay16_1_indices.begin(), ptr_arraydecay16_1_indices.end(), "arraydecay16.1", label_from_consts_exit_124); - StoreInst* void_165 = new StoreInst(ptr_arraydecay16_1, ptr_tmp12_157, false, label_from_consts_exit_124); - StoreInst* void_166 = new StoreInst(const_int32_29, ptr_tmp, false, label_from_consts_exit_124); - CallInst* void_167 = new CallInst(func_execute_shader, ptr_args_127, "", label_from_consts_exit_124); - void_167->setCallingConv(CallingConv::C); - void_167->setTailCall(false);const ParamAttrsList *void_167_PAL = 0; - void_167->setParamAttrs(void_167_PAL); - - LoadInst* int32_tmp23_1 = new LoadInst(ptr_tmp, "tmp23.1", false, label_from_consts_exit_124); - BinaryOperator* int32_shl_1 = BinaryOperator::create(Instruction::Shl, int32_tmp23_1, const_int32_31, "shl.1", label_from_consts_exit_124); - BinaryOperator* int32_or_1 = BinaryOperator::create(Instruction::Or, int32_shl_1, int32_tmp23, "or.1", label_from_consts_exit_124); + Instruction* ptr_arraydecay16_1 = new GetElementPtrInst(ptr_results_105, ptr_arraydecay16_1_indices.begin(), ptr_arraydecay16_1_indices.end(), "arraydecay16.1", label_from_consts_exit_113); + StoreInst* void_152 = new StoreInst(ptr_arraydecay16_1, ptr_tmp12, false, label_from_consts_exit_113); + StoreInst* void_153 = new StoreInst(const_int32_29, ptr_tmp_117, false, label_from_consts_exit_113); + CallInst* void_154 = new CallInst(func_execute_shader, ptr_args_116, "", label_from_consts_exit_113); + void_154->setCallingConv(CallingConv::C); + void_154->setTailCall(false);const ParamAttrsList *void_154_PAL = 0; + void_154->setParamAttrs(void_154_PAL); + + LoadInst* int32_tmp23_1 = new LoadInst(ptr_tmp_117, "tmp23.1", false, label_from_consts_exit_113); + BinaryOperator* int32_shl_1 = BinaryOperator::create(Instruction::Shl, int32_tmp23_1, const_int32_31, "shl.1", label_from_consts_exit_113); + BinaryOperator* int32_or_1 = BinaryOperator::create(Instruction::Or, int32_shl_1, int32_tmp23, "or.1", label_from_consts_exit_113); std::vector<Value*> ptr_arraydecay11_2_indices; ptr_arraydecay11_2_indices.push_back(const_int32_33); ptr_arraydecay11_2_indices.push_back(const_int32_29); - Instruction* ptr_arraydecay11_2 = new GetElementPtrInst(ptr_inputs_117, ptr_arraydecay11_2_indices.begin(), ptr_arraydecay11_2_indices.end(), "arraydecay11.2", label_from_consts_exit_124); - StoreInst* void_168 = new StoreInst(ptr_arraydecay11_2, ptr_tmp8_156, false, label_from_consts_exit_124); + Instruction* ptr_arraydecay11_2 = new GetElementPtrInst(ptr_inputs_106, ptr_arraydecay11_2_indices.begin(), ptr_arraydecay11_2_indices.end(), "arraydecay11.2", label_from_consts_exit_113); + StoreInst* void_155 = new StoreInst(ptr_arraydecay11_2, ptr_tmp8, false, label_from_consts_exit_113); std::vector<Value*> ptr_arraydecay16_2_indices; ptr_arraydecay16_2_indices.push_back(const_int32_33); ptr_arraydecay16_2_indices.push_back(const_int32_29); - Instruction* ptr_arraydecay16_2 = new GetElementPtrInst(ptr_results_116, ptr_arraydecay16_2_indices.begin(), ptr_arraydecay16_2_indices.end(), "arraydecay16.2", label_from_consts_exit_124); - StoreInst* void_169 = new StoreInst(ptr_arraydecay16_2, ptr_tmp12_157, false, label_from_consts_exit_124); - StoreInst* void_170 = new StoreInst(const_int32_29, ptr_tmp, false, label_from_consts_exit_124); - CallInst* void_171 = new CallInst(func_execute_shader, ptr_args_127, "", label_from_consts_exit_124); - void_171->setCallingConv(CallingConv::C); - void_171->setTailCall(false);const ParamAttrsList *void_171_PAL = 0; - void_171->setParamAttrs(void_171_PAL); - - LoadInst* int32_tmp23_2 = new LoadInst(ptr_tmp, "tmp23.2", false, label_from_consts_exit_124); - BinaryOperator* int32_shl_2 = BinaryOperator::create(Instruction::Shl, int32_tmp23_2, const_int32_33, "shl.2", label_from_consts_exit_124); - BinaryOperator* int32_or_2 = BinaryOperator::create(Instruction::Or, int32_shl_2, int32_or_1, "or.2", label_from_consts_exit_124); + Instruction* ptr_arraydecay16_2 = new GetElementPtrInst(ptr_results_105, ptr_arraydecay16_2_indices.begin(), ptr_arraydecay16_2_indices.end(), "arraydecay16.2", label_from_consts_exit_113); + StoreInst* void_156 = new StoreInst(ptr_arraydecay16_2, ptr_tmp12, false, label_from_consts_exit_113); + StoreInst* void_157 = new StoreInst(const_int32_29, ptr_tmp_117, false, label_from_consts_exit_113); + CallInst* void_158 = new CallInst(func_execute_shader, ptr_args_116, "", label_from_consts_exit_113); + void_158->setCallingConv(CallingConv::C); + void_158->setTailCall(false);const ParamAttrsList *void_158_PAL = 0; + void_158->setParamAttrs(void_158_PAL); + + LoadInst* int32_tmp23_2 = new LoadInst(ptr_tmp_117, "tmp23.2", false, label_from_consts_exit_113); + BinaryOperator* int32_shl_2 = BinaryOperator::create(Instruction::Shl, int32_tmp23_2, const_int32_33, "shl.2", label_from_consts_exit_113); + BinaryOperator* int32_or_2 = BinaryOperator::create(Instruction::Or, int32_shl_2, int32_or_1, "or.2", label_from_consts_exit_113); std::vector<Value*> ptr_arraydecay11_3_indices; ptr_arraydecay11_3_indices.push_back(const_int32_34); ptr_arraydecay11_3_indices.push_back(const_int32_29); - Instruction* ptr_arraydecay11_3 = new GetElementPtrInst(ptr_inputs_117, ptr_arraydecay11_3_indices.begin(), ptr_arraydecay11_3_indices.end(), "arraydecay11.3", label_from_consts_exit_124); - StoreInst* void_172 = new StoreInst(ptr_arraydecay11_3, ptr_tmp8_156, false, label_from_consts_exit_124); + Instruction* ptr_arraydecay11_3 = new GetElementPtrInst(ptr_inputs_106, ptr_arraydecay11_3_indices.begin(), ptr_arraydecay11_3_indices.end(), "arraydecay11.3", label_from_consts_exit_113); + StoreInst* void_159 = new StoreInst(ptr_arraydecay11_3, ptr_tmp8, false, label_from_consts_exit_113); std::vector<Value*> ptr_arraydecay16_3_indices; ptr_arraydecay16_3_indices.push_back(const_int32_34); ptr_arraydecay16_3_indices.push_back(const_int32_29); - Instruction* ptr_arraydecay16_3 = new GetElementPtrInst(ptr_results_116, ptr_arraydecay16_3_indices.begin(), ptr_arraydecay16_3_indices.end(), "arraydecay16.3", label_from_consts_exit_124); - StoreInst* void_173 = new StoreInst(ptr_arraydecay16_3, ptr_tmp12_157, false, label_from_consts_exit_124); - StoreInst* void_174 = new StoreInst(const_int32_29, ptr_tmp, false, label_from_consts_exit_124); - CallInst* void_175 = new CallInst(func_execute_shader, ptr_args_127, "", label_from_consts_exit_124); - void_175->setCallingConv(CallingConv::C); - void_175->setTailCall(false);const ParamAttrsList *void_175_PAL = 0; - void_175->setParamAttrs(void_175_PAL); - - LoadInst* int32_tmp23_3 = new LoadInst(ptr_tmp, "tmp23.3", false, label_from_consts_exit_124); - BinaryOperator* int32_shl_3 = BinaryOperator::create(Instruction::Shl, int32_tmp23_3, const_int32_34, "shl.3", label_from_consts_exit_124); - BinaryOperator* int32_or_3 = BinaryOperator::create(Instruction::Or, int32_shl_3, int32_or_2, "or.3", label_from_consts_exit_124); - BinaryOperator* int32_neg = BinaryOperator::create(Instruction::Xor, int32_or_3, const_int32_30, "neg", label_from_consts_exit_124); - new ReturnInst(int32_neg, label_from_consts_exit_124); + Instruction* ptr_arraydecay16_3 = new GetElementPtrInst(ptr_results_105, ptr_arraydecay16_3_indices.begin(), ptr_arraydecay16_3_indices.end(), "arraydecay16.3", label_from_consts_exit_113); + StoreInst* void_160 = new StoreInst(ptr_arraydecay16_3, ptr_tmp12, false, label_from_consts_exit_113); + StoreInst* void_161 = new StoreInst(const_int32_29, ptr_tmp_117, false, label_from_consts_exit_113); + CallInst* void_162 = new CallInst(func_execute_shader, ptr_args_116, "", label_from_consts_exit_113); + void_162->setCallingConv(CallingConv::C); + void_162->setTailCall(false);const ParamAttrsList *void_162_PAL = 0; + void_162->setParamAttrs(void_162_PAL); + + LoadInst* int32_tmp23_3 = new LoadInst(ptr_tmp_117, "tmp23.3", false, label_from_consts_exit_113); + BinaryOperator* int32_shl_3 = BinaryOperator::create(Instruction::Shl, int32_tmp23_3, const_int32_34, "shl.3", label_from_consts_exit_113); + BinaryOperator* int32_or_3 = BinaryOperator::create(Instruction::Or, int32_shl_3, int32_or_2, "or.3", label_from_consts_exit_113); + BinaryOperator* int32_neg = BinaryOperator::create(Instruction::Xor, int32_or_3, const_int32_30, "neg", label_from_consts_exit_113); + new ReturnInst(int32_neg, label_from_consts_exit_113); // Resolve Forward References - fwdref_138->replaceAllUsesWith(packed_tmp31_i_150); delete fwdref_138; - fwdref_136->replaceAllUsesWith(int32_indvar_next7); delete fwdref_136; + fwdref_128->replaceAllUsesWith(packed_tmp31_i_140); delete fwdref_128; + fwdref_126->replaceAllUsesWith(int32_indvar_next7); delete fwdref_126; } diff --git a/src/mesa/pipe/llvm/llvm_entry.c b/src/mesa/pipe/llvm/llvm_entry.c index c3b34584e16..fa50b60e663 100644 --- a/src/mesa/pipe/llvm/llvm_entry.c +++ b/src/mesa/pipe/llvm/llvm_entry.c @@ -86,8 +86,8 @@ struct ShaderInput extern void execute_shader(struct ShaderInput *input); -void run_vertex_shader(float4 (*inputs)[16], - float4 (*results)[16], +void run_vertex_shader(void *inputs, + void *results, float (*aconsts)[4], int num_vertices, int num_inputs, @@ -98,16 +98,16 @@ void run_vertex_shader(float4 (*inputs)[16], float4 temps[128];//MAX_PROGRAM_TEMPS struct ShaderInput args; + args.dests = results; + args.inputs = inputs; + /*printf("XXX LLVM run_vertex_shader vertices = %d, inputs = %d, attribs = %d, consts = %d\n", num_vertices, num_inputs, num_attribs, num_consts);*/ from_consts(consts, aconsts, num_consts); args.consts = consts; args.temps = temps; - for (int i = 0; i < num_vertices; ++i) { - args.dests = results[i]; - args.inputs = inputs[i]; - execute_shader(&args); - } + + execute_shader(&args); } diff --git a/src/mesa/pipe/llvm/loweringpass.cpp b/src/mesa/pipe/llvm/loweringpass.cpp new file mode 100644 index 00000000000..556dbec3661 --- /dev/null +++ b/src/mesa/pipe/llvm/loweringpass.cpp @@ -0,0 +1,17 @@ +#include "loweringpass.h" + +using namespace llvm; + +char LoweringPass::ID = 0; +RegisterPass<LoweringPass> X("lowering", "Lowering Pass"); + +LoweringPass::LoweringPass() + : ModulePass((intptr_t)&ID) +{ +} + +bool LoweringPass::runOnModule(Module &m) +{ + llvm::cerr << "Hello: " << m.getModuleIdentifier() << "\n"; + return false; +} diff --git a/src/mesa/pipe/llvm/loweringpass.h b/src/mesa/pipe/llvm/loweringpass.h new file mode 100644 index 00000000000..f62dcf6ba73 --- /dev/null +++ b/src/mesa/pipe/llvm/loweringpass.h @@ -0,0 +1,15 @@ +#ifndef LOWERINGPASS_H +#define LOWERINGPASS_H + +#include "llvm/Pass.h" +#include "llvm/Module.h" + +struct LoweringPass : public llvm::ModulePass +{ + static char ID; + LoweringPass(); + + virtual bool runOnModule(llvm::Module &m); +}; + +#endif diff --git a/src/mesa/pipe/llvm/storagesoa.cpp b/src/mesa/pipe/llvm/storagesoa.cpp new file mode 100644 index 00000000000..b2aca3557a9 --- /dev/null +++ b/src/mesa/pipe/llvm/storagesoa.cpp @@ -0,0 +1,117 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "storagesoa.h" + + +#include "pipe/p_shader_tokens.h" +#include <llvm/BasicBlock.h> +#include <llvm/Module.h> +#include <llvm/Value.h> + +#include <llvm/CallingConv.h> +#include <llvm/Constants.h> +#include <llvm/DerivedTypes.h> +#include <llvm/InstrTypes.h> +#include <llvm/Instructions.h> + +using namespace llvm; + +StorageSoa::StorageSoa(llvm::BasicBlock *block, + llvm::Value *input, + llvm::Value *output, + llvm::Value *consts) +{ +} + +void StorageSoa::addImmediate(float *vec) +{ +} + +llvm::Value *StorageSoa::addrElement(int idx) const +{ + return 0; +} + +std::vector<llvm::Value*> StorageSoa::inputElement(int idx, int swizzle, + llvm::Value *indIdx) +{ + std::vector<llvm::Value*> res(4); + + return res; +} + +std::vector<llvm::Value*> StorageSoa::constElement(int idx, int swizzle, + llvm::Value *indIdx) +{ + std::vector<llvm::Value*> res(4); + + return res; +} + +std::vector<llvm::Value*> StorageSoa::outputElement(int idx, int swizzle, + llvm::Value *indIdx) +{ + std::vector<llvm::Value*> res(4); + + return res; +} + +std::vector<llvm::Value*> StorageSoa::tempElement(int idx, int swizzle, + llvm::Value *indIdx) +{ + std::vector<llvm::Value*> res(4); + + return res; +} + +std::vector<llvm::Value*> StorageSoa::immediateElement(int idx, int swizzle) +{ + std::vector<llvm::Value*> res(4); + + return res; +} + +llvm::Value * StorageSoa::extractIndex(llvm::Value *vec) +{ + return 0; +} + +void StorageSoa::storeOutput(int dstIdx, const std::vector<llvm::Value*> &val, + int mask) +{ +} + +void StorageSoa::storeTemp(int idx, const std::vector<llvm::Value*> &val, + int mask) +{ +} + +void StorageSoa::storeAddress(int idx, const std::vector<llvm::Value*> &val, + int mask) +{ +} diff --git a/src/mesa/pipe/llvm/storagesoa.h b/src/mesa/pipe/llvm/storagesoa.h new file mode 100644 index 00000000000..551b0b97342 --- /dev/null +++ b/src/mesa/pipe/llvm/storagesoa.h @@ -0,0 +1,71 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef STORAGESOA_H +#define STORAGESOA_H + +#include <vector> + +namespace llvm { + class BasicBlock; + class Constant; + class ConstantInt; + class LoadInst; + class Value; + class VectorType; +} + +class StorageSoa +{ +public: + StorageSoa(llvm::BasicBlock *block, + llvm::Value *input, + llvm::Value *output, + llvm::Value *consts); + + void addImmediate(float *vec); + + llvm::Value * addrElement(int idx) const; + + std::vector<llvm::Value*> inputElement(int idx, int swizzle, llvm::Value *indIdx =0); + std::vector<llvm::Value*> constElement(int idx, int swizzle, llvm::Value *indIdx =0); + std::vector<llvm::Value*> outputElement(int idx, int swizzle, llvm::Value *indIdx =0); + std::vector<llvm::Value*> tempElement(int idx, int swizzle, llvm::Value *indIdx =0); + std::vector<llvm::Value*> immediateElement(int idx, int swizzle); + + llvm::Value *extractIndex(llvm::Value *vec); + + void storeOutput(int dstIdx, const std::vector<llvm::Value*> &val, + int mask); + void storeTemp(int idx, const std::vector<llvm::Value*> &val, + int mask); + void storeAddress(int idx, const std::vector<llvm::Value*> &val, + int mask); + +}; + +#endif diff --git a/src/mesa/pipe/llvm/tgsitollvm.cpp b/src/mesa/pipe/llvm/tgsitollvm.cpp new file mode 100644 index 00000000000..bc4df610717 --- /dev/null +++ b/src/mesa/pipe/llvm/tgsitollvm.cpp @@ -0,0 +1,1174 @@ +#include "tgsitollvm.h" + +#include "gallivm.h" +#include "gallivm_p.h" + +#include "storage.h" +#include "instructions.h" +#include "storagesoa.h" +#include "instructionssoa.h" + +#include "pipe/p_shader_tokens.h" + +#include "pipe/tgsi/util/tgsi_parse.h" +#include "pipe/tgsi/exec/tgsi_exec.h" +#include "pipe/tgsi/util/tgsi_util.h" +#include "pipe/tgsi/util/tgsi_build.h" +#include "pipe/tgsi/util/tgsi_dump.h" + + +#include <llvm/Module.h> +#include <llvm/CallingConv.h> +#include <llvm/Constants.h> +#include <llvm/DerivedTypes.h> +#include <llvm/Instructions.h> +#include <llvm/ModuleProvider.h> +#include <llvm/Pass.h> +#include <llvm/PassManager.h> +#include <llvm/ParameterAttributes.h> +#include <llvm/Support/PatternMatch.h> +#include <llvm/ExecutionEngine/JIT.h> +#include <llvm/ExecutionEngine/Interpreter.h> +#include <llvm/ExecutionEngine/GenericValue.h> +#include <llvm/Support/MemoryBuffer.h> +#include <llvm/LinkAllPasses.h> +#include <llvm/Analysis/Verifier.h> +#include <llvm/Analysis/LoopPass.h> +#include <llvm/Target/TargetData.h> +#include <llvm/Bitcode/ReaderWriter.h> +#include <llvm/Transforms/Utils/Cloning.h> + + +#include <sstream> +#include <fstream> +#include <iostream> + +using namespace llvm; +#include "llvm_base_shader.cpp" + +static inline void +add_interpolator(struct gallivm_ir *ir, + struct gallivm_interpolate *interp) +{ + ir->interpolators[ir->num_interp] = *interp; + ++ir->num_interp; +} + +static void +translate_declaration(struct gallivm_ir *prog, + llvm::Module *module, + Storage *storage, + struct tgsi_full_declaration *decl, + struct tgsi_full_declaration *fd) +{ + if (decl->Declaration.File == TGSI_FILE_INPUT) { + unsigned first, last, mask; + uint interp_method; + + assert(decl->Declaration.Declare == TGSI_DECLARE_RANGE); + + first = decl->u.DeclarationRange.First; + last = decl->u.DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + /* Do not touch WPOS.xy */ + if (first == 0) { + mask &= ~TGSI_WRITEMASK_XY; + if (mask == TGSI_WRITEMASK_NONE) { + first++; + if (first > last) { + return; + } + } + } + + interp_method = decl->Interpolation.Interpolate; + + if (mask == TGSI_WRITEMASK_XYZW) { + unsigned i, j; + + for (i = first; i <= last; i++) { + for (j = 0; j < NUM_CHANNELS; j++) { + //interp( mach, i, j ); + struct gallivm_interpolate interp; + interp.type = interp_method; + interp.attrib = i; + interp.chan = j; + add_interpolator(prog, &interp); + } + } + } else { + unsigned i, j; + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + for( i = first; i <= last; i++ ) { + struct gallivm_interpolate interp; + interp.type = interp_method; + interp.attrib = i; + interp.chan = j; + add_interpolator(prog, &interp); + } + } + } + } + } +} + +static void +translate_declarationir(struct gallivm_ir *, + llvm::Module *, + StorageSoa *, + struct tgsi_full_declaration *, + struct tgsi_full_declaration *) +{ +} + +static void +translate_immediate(Storage *storage, + struct tgsi_full_immediate *imm) +{ + float vec[4]; + int i; + for (i = 0; i < imm->Immediate.Size - 1; ++i) { + switch (imm->Immediate.DataType) { + case TGSI_IMM_FLOAT32: + vec[i] = imm->u.ImmediateFloat32[i].Float; + break; + default: + assert(0); + } + } + storage->addImmediate(vec); +} + + +static void +translate_immediateir(StorageSoa *storage, + struct tgsi_full_immediate *imm) +{ + float vec[4]; + int i; + for (i = 0; i < imm->Immediate.Size - 1; ++i) { + switch (imm->Immediate.DataType) { + case TGSI_IMM_FLOAT32: + vec[i] = imm->u.ImmediateFloat32[i].Float; + break; + default: + assert(0); + } + } + storage->addImmediate(vec); +} + +static inline int +swizzleInt(struct tgsi_full_src_register *src) +{ + int swizzle = 0; + int start = 1000; + + for (int k = 0; k < 4; ++k) { + swizzle += tgsi_util_get_full_src_register_extswizzle(src, k) * start; + start /= 10; + } + return swizzle; +} + +static inline llvm::Value * +swizzleVector(llvm::Value *val, struct tgsi_full_src_register *src, + Storage *storage) +{ + int swizzle = swizzleInt(src); + const int NO_SWIZZLE = TGSI_SWIZZLE_X * 1000 + TGSI_SWIZZLE_Y * 100 + + TGSI_SWIZZLE_Z * 10 + TGSI_SWIZZLE_W; + if (swizzle != NO_SWIZZLE) { + /*fprintf(stderr, "XXXXXXXX swizzle = %d\n", swizzle);*/ + val = storage->shuffleVector(val, swizzle); + } + return val; +} + +static void +translate_instruction(llvm::Module *module, + Storage *storage, + Instructions *instr, + struct tgsi_full_instruction *inst, + struct tgsi_full_instruction *fi, + unsigned instno) +{ + llvm::Value *inputs[4]; + inputs[0] = 0; + inputs[1] = 0; + inputs[2] = 0; + inputs[3] = 0; + + for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + llvm::Value *val = 0; + llvm::Value *indIdx = 0; + + if (src->SrcRegister.Indirect) { + indIdx = storage->addrElement(src->SrcRegisterInd.Index); + indIdx = storage->extractIndex(indIdx); + } + if (src->SrcRegister.File == TGSI_FILE_CONSTANT) { + val = storage->constElement(src->SrcRegister.Index, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { + val = storage->inputElement(src->SrcRegister.Index, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { + val = storage->tempElement(src->SrcRegister.Index); + } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { + val = storage->outputElement(src->SrcRegister.Index, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { + val = storage->immediateElement(src->SrcRegister.Index); + } else { + fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File); + return; + } + + inputs[i] = swizzleVector(val, src, storage); + } + + /*if (inputs[0]) + instr->printVector(inputs[0]); + if (inputs[1]) + instr->printVector(inputs[1]);*/ + llvm::Value *out = 0; + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: { + out = instr->arl(inputs[0]); + } + break; + case TGSI_OPCODE_MOV: { + out = inputs[0]; + } + break; + case TGSI_OPCODE_LIT: { + out = instr->lit(inputs[0]); + } + break; + case TGSI_OPCODE_RCP: { + out = instr->rcp(inputs[0]); + } + break; + case TGSI_OPCODE_RSQ: { + out = instr->rsq(inputs[0]); + } + break; + case TGSI_OPCODE_EXP: + break; + case TGSI_OPCODE_LOG: + break; + case TGSI_OPCODE_MUL: { + out = instr->mul(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_ADD: { + out = instr->add(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP3: { + out = instr->dp3(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP4: { + out = instr->dp4(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DST: { + out = instr->dst(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MIN: { + out = instr->min(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MAX: { + out = instr->max(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SLT: { + out = instr->slt(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SGE: { + out = instr->sge(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MAD: { + out = instr->madd(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_SUB: { + out = instr->sub(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_LERP: { + out = instr->lerp(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_CND: + break; + case TGSI_OPCODE_CND0: + break; + case TGSI_OPCODE_DOT2ADD: + break; + case TGSI_OPCODE_INDEX: + break; + case TGSI_OPCODE_NEGATE: + break; + case TGSI_OPCODE_FRAC: { + out = instr->frc(inputs[0]); + } + break; + case TGSI_OPCODE_CLAMP: + break; + case TGSI_OPCODE_FLOOR: { + out = instr->floor(inputs[0]); + } + break; + case TGSI_OPCODE_ROUND: + break; + case TGSI_OPCODE_EXPBASE2: { + out = instr->ex2(inputs[0]); + } + break; + case TGSI_OPCODE_LOGBASE2: { + out = instr->lg2(inputs[0]); + } + break; + case TGSI_OPCODE_POWER: { + out = instr->pow(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_CROSSPRODUCT: { + out = instr->cross(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MULTIPLYMATRIX: + break; + case TGSI_OPCODE_ABS: { + out = instr->abs(inputs[0]); + } + break; + case TGSI_OPCODE_RCC: + break; + case TGSI_OPCODE_DPH: { + out = instr->dph(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_COS: { + out = instr->cos(inputs[0]); + } + break; + case TGSI_OPCODE_DDX: + break; + case TGSI_OPCODE_DDY: + break; + case TGSI_OPCODE_KILP: { + out = instr->kilp(inputs[0]); + storage->setKilElement(out); + return; + } + break; + case TGSI_OPCODE_PK2H: + break; + case TGSI_OPCODE_PK2US: + break; + case TGSI_OPCODE_PK4B: + break; + case TGSI_OPCODE_PK4UB: + break; + case TGSI_OPCODE_RFL: + break; + case TGSI_OPCODE_SEQ: + break; + case TGSI_OPCODE_SFL: + break; + case TGSI_OPCODE_SGT: { + out = instr->sgt(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SIN: { + out = instr->sin(inputs[0]); + } + break; + case TGSI_OPCODE_SLE: + break; + case TGSI_OPCODE_SNE: + break; + case TGSI_OPCODE_STR: + break; + case TGSI_OPCODE_TEX: + break; + case TGSI_OPCODE_TXD: + break; + case TGSI_OPCODE_UP2H: + break; + case TGSI_OPCODE_UP2US: + break; + case TGSI_OPCODE_UP4B: + break; + case TGSI_OPCODE_UP4UB: + break; + case TGSI_OPCODE_X2D: + break; + case TGSI_OPCODE_ARA: + break; + case TGSI_OPCODE_ARR: + break; + case TGSI_OPCODE_BRA: + break; + case TGSI_OPCODE_CAL: { + instr->cal(inst->InstructionExtLabel.Label, storage->inputPtr()); + return; + } + break; + case TGSI_OPCODE_RET: { + instr->end(); + return; + } + break; + case TGSI_OPCODE_SSG: + break; + case TGSI_OPCODE_CMP: { + out = instr->cmp(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_SCS: { + out = instr->scs(inputs[0]); + } + break; + case TGSI_OPCODE_TXB: + break; + case TGSI_OPCODE_NRM: + break; + case TGSI_OPCODE_DIV: + break; + case TGSI_OPCODE_DP2: + break; + case TGSI_OPCODE_TXL: + break; + case TGSI_OPCODE_BRK: { + instr->brk(); + return; + } + break; + case TGSI_OPCODE_IF: { + instr->ifop(inputs[0]); + storage->setCurrentBlock(instr->currentBlock()); + return; //just update the state + } + break; + case TGSI_OPCODE_LOOP: + break; + case TGSI_OPCODE_REP: + break; + case TGSI_OPCODE_ELSE: { + instr->elseop(); + storage->setCurrentBlock(instr->currentBlock()); + return; //only state update + } + break; + case TGSI_OPCODE_ENDIF: { + instr->endif(); + storage->setCurrentBlock(instr->currentBlock()); + return; //just update the state + } + break; + case TGSI_OPCODE_ENDLOOP: + break; + case TGSI_OPCODE_ENDREP: + break; + case TGSI_OPCODE_PUSHA: + break; + case TGSI_OPCODE_POPA: + break; + case TGSI_OPCODE_CEIL: + break; + case TGSI_OPCODE_I2F: + break; + case TGSI_OPCODE_NOT: + break; + case TGSI_OPCODE_TRUNC: { + out = instr->trunc(inputs[0]); + } + break; + case TGSI_OPCODE_SHL: + break; + case TGSI_OPCODE_SHR: + break; + case TGSI_OPCODE_AND: + break; + case TGSI_OPCODE_OR: + break; + case TGSI_OPCODE_MOD: + break; + case TGSI_OPCODE_XOR: + break; + case TGSI_OPCODE_SAD: + break; + case TGSI_OPCODE_TXF: + break; + case TGSI_OPCODE_TXQ: + break; + case TGSI_OPCODE_CONT: + break; + case TGSI_OPCODE_EMIT: + break; + case TGSI_OPCODE_ENDPRIM: + break; + case TGSI_OPCODE_BGNLOOP2: { + instr->beginLoop(); + storage->setCurrentBlock(instr->currentBlock()); + return; + } + break; + case TGSI_OPCODE_BGNSUB: { + instr->bgnSub(instno); + storage->setCurrentBlock(instr->currentBlock()); + storage->pushTemps(); + return; + } + break; + case TGSI_OPCODE_ENDLOOP2: { + instr->endLoop(); + storage->setCurrentBlock(instr->currentBlock()); + return; + } + break; + case TGSI_OPCODE_ENDSUB: { + instr->endSub(); + storage->setCurrentBlock(instr->currentBlock()); + storage->popArguments(); + storage->popTemps(); + return; + } + break; + case TGSI_OPCODE_NOISE1: + break; + case TGSI_OPCODE_NOISE2: + break; + case TGSI_OPCODE_NOISE3: + break; + case TGSI_OPCODE_NOISE4: + break; + case TGSI_OPCODE_NOP: + break; + case TGSI_OPCODE_TEXBEM: + break; + case TGSI_OPCODE_TEXBEML: + break; + case TGSI_OPCODE_TEXREG2AR: + break; + case TGSI_OPCODE_TEXM3X2PAD: + break; + case TGSI_OPCODE_TEXM3X2TEX: + break; + case TGSI_OPCODE_TEXM3X3PAD: + break; + case TGSI_OPCODE_TEXM3X3TEX: + break; + case TGSI_OPCODE_TEXM3X3SPEC: + break; + case TGSI_OPCODE_TEXM3X3VSPEC: + break; + case TGSI_OPCODE_TEXREG2GB: + break; + case TGSI_OPCODE_TEXREG2RGB: + break; + case TGSI_OPCODE_TEXDP3TEX: + break; + case TGSI_OPCODE_TEXDP3: + break; + case TGSI_OPCODE_TEXM3X3: + break; + case TGSI_OPCODE_TEXM3X2DEPTH: + break; + case TGSI_OPCODE_TEXDEPTH: + break; + case TGSI_OPCODE_BEM: + break; + case TGSI_OPCODE_M4X3: + break; + case TGSI_OPCODE_M3X4: + break; + case TGSI_OPCODE_M3X3: + break; + case TGSI_OPCODE_M3X2: + break; + case TGSI_OPCODE_NRM4: + break; + case TGSI_OPCODE_CALLNZ: + break; + case TGSI_OPCODE_IFC: + break; + case TGSI_OPCODE_BREAKC: + break; + case TGSI_OPCODE_KIL: + break; + case TGSI_OPCODE_END: + instr->end(); + return; + break; + default: + fprintf(stderr, "ERROR: Unknown opcode %d\n", + inst->Instruction.Opcode); + assert(0); + break; + } + + if (!out) { + fprintf(stderr, "ERROR: unsupported opcode %d\n", + inst->Instruction.Opcode); + assert(!"Unsupported opcode"); + } + + /* # not sure if we need this */ + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + case TGSI_SAT_ZERO_ONE: + /*TXT( "_SAT" );*/ + break; + case TGSI_SAT_MINUS_PLUS_ONE: + /*TXT( "_SAT[-1,1]" );*/ + break; + default: + assert( 0 ); + } + + /* store results */ + for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + + if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { + storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { + storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { + storage->setAddrElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else { + fprintf(stderr, "ERROR: unsupported LLVM destination!"); + assert(!"wrong destination"); + } + } +} + + +static void +translate_instructionir(llvm::Module *module, + StorageSoa *storage, + InstructionsSoa *instr, + struct tgsi_full_instruction *inst, + struct tgsi_full_instruction *fi, + unsigned instno) +{ + std::vector< std::vector<llvm::Value*> > inputs(inst->Instruction.NumSrcRegs); + + for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + std::vector<llvm::Value*> val; + llvm::Value *indIdx = 0; + int swizzle = swizzleInt(src); + + if (src->SrcRegister.Indirect) { + indIdx = storage->addrElement(src->SrcRegisterInd.Index); + indIdx = storage->extractIndex(indIdx); + } + if (src->SrcRegister.File == TGSI_FILE_CONSTANT) { + val = storage->constElement(src->SrcRegister.Index, swizzle, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { + val = storage->inputElement(src->SrcRegister.Index, swizzle, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { + val = storage->tempElement(src->SrcRegister.Index, swizzle); + } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { + val = storage->outputElement(src->SrcRegister.Index, swizzle, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { + val = storage->immediateElement(src->SrcRegister.Index, swizzle); + } else { + fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File); + return; + } + + inputs[i] = val; + } + + std::vector<llvm::Value*> out(4); + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: { + } + break; + case TGSI_OPCODE_MOV: { + out = inputs[0]; + } + break; + case TGSI_OPCODE_LIT: { + } + break; + case TGSI_OPCODE_RCP: { + } + break; + case TGSI_OPCODE_RSQ: { + } + break; + case TGSI_OPCODE_EXP: + break; + case TGSI_OPCODE_LOG: + break; + case TGSI_OPCODE_MUL: { + out = instr->mul(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_ADD: { + out = instr->add(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP3: { + } + break; + case TGSI_OPCODE_DP4: { + } + break; + case TGSI_OPCODE_DST: { + } + break; + case TGSI_OPCODE_MIN: { + } + break; + case TGSI_OPCODE_MAX: { + } + break; + case TGSI_OPCODE_SLT: { + } + break; + case TGSI_OPCODE_SGE: { + } + break; + case TGSI_OPCODE_MAD: { + } + break; + case TGSI_OPCODE_SUB: { + } + break; + case TGSI_OPCODE_LERP: { + } + break; + case TGSI_OPCODE_CND: + break; + case TGSI_OPCODE_CND0: + break; + case TGSI_OPCODE_DOT2ADD: + break; + case TGSI_OPCODE_INDEX: + break; + case TGSI_OPCODE_NEGATE: + break; + case TGSI_OPCODE_FRAC: { + } + break; + case TGSI_OPCODE_CLAMP: + break; + case TGSI_OPCODE_FLOOR: { + } + break; + case TGSI_OPCODE_ROUND: + break; + case TGSI_OPCODE_EXPBASE2: { + } + break; + case TGSI_OPCODE_LOGBASE2: { + } + break; + case TGSI_OPCODE_POWER: { + } + break; + case TGSI_OPCODE_CROSSPRODUCT: { + } + break; + case TGSI_OPCODE_MULTIPLYMATRIX: + break; + case TGSI_OPCODE_ABS: { + } + break; + case TGSI_OPCODE_RCC: + break; + case TGSI_OPCODE_DPH: { + } + break; + case TGSI_OPCODE_COS: { + } + break; + case TGSI_OPCODE_DDX: + break; + case TGSI_OPCODE_DDY: + break; + case TGSI_OPCODE_KILP: { + } + break; + case TGSI_OPCODE_PK2H: + break; + case TGSI_OPCODE_PK2US: + break; + case TGSI_OPCODE_PK4B: + break; + case TGSI_OPCODE_PK4UB: + break; + case TGSI_OPCODE_RFL: + break; + case TGSI_OPCODE_SEQ: + break; + case TGSI_OPCODE_SFL: + break; + case TGSI_OPCODE_SGT: { + } + break; + case TGSI_OPCODE_SIN: { + } + break; + case TGSI_OPCODE_SLE: + break; + case TGSI_OPCODE_SNE: + break; + case TGSI_OPCODE_STR: + break; + case TGSI_OPCODE_TEX: + break; + case TGSI_OPCODE_TXD: + break; + case TGSI_OPCODE_UP2H: + break; + case TGSI_OPCODE_UP2US: + break; + case TGSI_OPCODE_UP4B: + break; + case TGSI_OPCODE_UP4UB: + break; + case TGSI_OPCODE_X2D: + break; + case TGSI_OPCODE_ARA: + break; + case TGSI_OPCODE_ARR: + break; + case TGSI_OPCODE_BRA: + break; + case TGSI_OPCODE_CAL: { + } + break; + case TGSI_OPCODE_RET: { + } + break; + case TGSI_OPCODE_SSG: + break; + case TGSI_OPCODE_CMP: { + } + break; + case TGSI_OPCODE_SCS: { + } + break; + case TGSI_OPCODE_TXB: + break; + case TGSI_OPCODE_NRM: + break; + case TGSI_OPCODE_DIV: + break; + case TGSI_OPCODE_DP2: + break; + case TGSI_OPCODE_TXL: + break; + case TGSI_OPCODE_BRK: { + } + break; + case TGSI_OPCODE_IF: { + } + break; + case TGSI_OPCODE_LOOP: + break; + case TGSI_OPCODE_REP: + break; + case TGSI_OPCODE_ELSE: { + } + break; + case TGSI_OPCODE_ENDIF: { + } + break; + case TGSI_OPCODE_ENDLOOP: + break; + case TGSI_OPCODE_ENDREP: + break; + case TGSI_OPCODE_PUSHA: + break; + case TGSI_OPCODE_POPA: + break; + case TGSI_OPCODE_CEIL: + break; + case TGSI_OPCODE_I2F: + break; + case TGSI_OPCODE_NOT: + break; + case TGSI_OPCODE_TRUNC: { + } + break; + case TGSI_OPCODE_SHL: + break; + case TGSI_OPCODE_SHR: + break; + case TGSI_OPCODE_AND: + break; + case TGSI_OPCODE_OR: + break; + case TGSI_OPCODE_MOD: + break; + case TGSI_OPCODE_XOR: + break; + case TGSI_OPCODE_SAD: + break; + case TGSI_OPCODE_TXF: + break; + case TGSI_OPCODE_TXQ: + break; + case TGSI_OPCODE_CONT: + break; + case TGSI_OPCODE_EMIT: + break; + case TGSI_OPCODE_ENDPRIM: + break; + case TGSI_OPCODE_BGNLOOP2: { + } + break; + case TGSI_OPCODE_BGNSUB: { + } + break; + case TGSI_OPCODE_ENDLOOP2: { + } + break; + case TGSI_OPCODE_ENDSUB: { + } + break; + case TGSI_OPCODE_NOISE1: + break; + case TGSI_OPCODE_NOISE2: + break; + case TGSI_OPCODE_NOISE3: + break; + case TGSI_OPCODE_NOISE4: + break; + case TGSI_OPCODE_NOP: + break; + case TGSI_OPCODE_TEXBEM: + break; + case TGSI_OPCODE_TEXBEML: + break; + case TGSI_OPCODE_TEXREG2AR: + break; + case TGSI_OPCODE_TEXM3X2PAD: + break; + case TGSI_OPCODE_TEXM3X2TEX: + break; + case TGSI_OPCODE_TEXM3X3PAD: + break; + case TGSI_OPCODE_TEXM3X3TEX: + break; + case TGSI_OPCODE_TEXM3X3SPEC: + break; + case TGSI_OPCODE_TEXM3X3VSPEC: + break; + case TGSI_OPCODE_TEXREG2GB: + break; + case TGSI_OPCODE_TEXREG2RGB: + break; + case TGSI_OPCODE_TEXDP3TEX: + break; + case TGSI_OPCODE_TEXDP3: + break; + case TGSI_OPCODE_TEXM3X3: + break; + case TGSI_OPCODE_TEXM3X2DEPTH: + break; + case TGSI_OPCODE_TEXDEPTH: + break; + case TGSI_OPCODE_BEM: + break; + case TGSI_OPCODE_M4X3: + break; + case TGSI_OPCODE_M3X4: + break; + case TGSI_OPCODE_M3X3: + break; + case TGSI_OPCODE_M3X2: + break; + case TGSI_OPCODE_NRM4: + break; + case TGSI_OPCODE_CALLNZ: + break; + case TGSI_OPCODE_IFC: + break; + case TGSI_OPCODE_BREAKC: + break; + case TGSI_OPCODE_KIL: + break; + case TGSI_OPCODE_END: + instr->end(); + return; + break; + default: + fprintf(stderr, "ERROR: Unknown opcode %d\n", + inst->Instruction.Opcode); + assert(0); + break; + } + + if (!out[0]) { + fprintf(stderr, "ERROR: unsupported opcode %d\n", + inst->Instruction.Opcode); + assert(!"Unsupported opcode"); + } + + /* store results */ + for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + + if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { + storage->storeOutput(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { + storage->storeTemp(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { + storage->storeAddress(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else { + fprintf(stderr, "ERROR: unsupported LLVM destination!"); + assert(!"wrong destination"); + } + } +} + +llvm::Module * +tgsi_to_llvm(struct gallivm_ir *ir, const struct tgsi_token *tokens) +{ + llvm::Module *mod = createBaseShader(); + struct tgsi_parse_context parse; + struct tgsi_full_instruction fi; + struct tgsi_full_declaration fd; + unsigned instno = 0; + Function* shader = mod->getFunction("execute_shader"); + std::ostringstream stream; + if (ir->type == GALLIVM_VS) { + stream << "vs_shader"; + } else { + stream << "fs_shader"; + } + stream << ir->id; + std::string func_name = stream.str(); + shader->setName(func_name.c_str()); + + Function::arg_iterator args = shader->arg_begin(); + Value *ptr_INPUT = args++; + ptr_INPUT->setName("input"); + + BasicBlock *label_entry = new BasicBlock("entry", shader, 0); + + tgsi_parse_init(&parse, tokens); + + fi = tgsi_default_full_instruction(); + fd = tgsi_default_full_declaration(); + Storage storage(label_entry, ptr_INPUT); + Instructions instr(mod, shader, label_entry, &storage); + while(!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + translate_declaration(ir, mod, &storage, + &parse.FullToken.FullDeclaration, + &fd); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + translate_immediate(&storage, + &parse.FullToken.FullImmediate); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + translate_instruction(mod, &storage, &instr, + &parse.FullToken.FullInstruction, + &fi, instno); + ++instno; + break; + + default: + assert(0); + } + } + + tgsi_parse_free(&parse); + + ir->num_consts = storage.numConsts(); + return mod; +} + +llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, + const struct tgsi_token *tokens) +{ + llvm::Module *mod = createBaseShader(); + struct tgsi_parse_context parse; + struct tgsi_full_instruction fi; + struct tgsi_full_declaration fd; + unsigned instno = 0; + Function* shader = mod->getFunction("execute_shader"); + std::ostringstream stream; + if (ir->type == GALLIVM_VS) { + stream << "vs_shader"; + } else { + stream << "fs_shader"; + } + stream << ir->id; + std::string func_name = stream.str(); + shader->setName(func_name.c_str()); + + Function::arg_iterator args = shader->arg_begin(); + Value *input = args++; + input->setName("input"); + Value *output = args++; + output->setName("output"); + Value *consts = args++; + consts->setName("consts"); + + BasicBlock *label_entry = new BasicBlock("entry", shader, 0); + + tgsi_parse_init(&parse, tokens); + + fi = tgsi_default_full_instruction(); + fd = tgsi_default_full_declaration(); + + StorageSoa storage(label_entry, input, output, consts); + InstructionsSoa instr(mod, shader, label_entry, &storage); + + while(!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + translate_declarationir(ir, mod, &storage, + &parse.FullToken.FullDeclaration, + &fd); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + translate_immediateir(&storage, + &parse.FullToken.FullImmediate); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + translate_instructionir(mod, &storage, &instr, + &parse.FullToken.FullInstruction, + &fi, instno); + ++instno; + break; + + default: + assert(0); + } + } + + tgsi_parse_free(&parse); + + return mod; +} diff --git a/src/mesa/pipe/llvm/tgsitollvm.h b/src/mesa/pipe/llvm/tgsitollvm.h new file mode 100644 index 00000000000..7ada04d6299 --- /dev/null +++ b/src/mesa/pipe/llvm/tgsitollvm.h @@ -0,0 +1,20 @@ +#ifndef TGSITOLLVM_H +#define TGSITOLLVM_H + + +namespace llvm { + class Module; +} + +struct gallivm_ir; +struct tgsi_token; + + +llvm::Module * tgsi_to_llvm(struct gallivm_ir *ir, + const struct tgsi_token *tokens); + + +llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, + const struct tgsi_token *tokens); + +#endif diff --git a/src/mesa/pipe/p_state.h b/src/mesa/pipe/p_state.h index 83ca43f6785..4d3a6b2f413 100644 --- a/src/mesa/pipe/p_state.h +++ b/src/mesa/pipe/p_state.h @@ -234,14 +234,9 @@ struct pipe_sampler_state unsigned compare_mode:1; /**< PIPE_TEX_COMPARE_x */ unsigned compare_func:3; /**< PIPE_FUNC_x */ unsigned normalized_coords:1; /**< Are coords normalized to [0,1]? */ - float shadow_ambient; /**< shadow test fail color/intensity */ - float min_lod; - float max_lod; - float lod_bias; -#if 0 /* need these? */ - int BaseLevel; /**< min mipmap level, OpenGL 1.2 */ - int MaxLevel; /**< max mipmap level, OpenGL 1.2 */ -#endif + float shadow_ambient; /**< shadow test fail color/intensity */ + float lod_bias; /**< LOD/lambda bias */ + float min_lod, max_lod; /**< LOD clamp range, after bias */ float border_color[4]; float max_anisotropy; }; @@ -277,8 +272,7 @@ struct pipe_texture enum pipe_texture_target target; /**< PIPE_TEXTURE_x */ enum pipe_format format; /**< PIPE_FORMAT_x */ - unsigned first_level; - unsigned last_level; + unsigned last_level; /**< Index of last mipmap level present/defined */ unsigned width[PIPE_MAX_TEXTURE_LEVELS]; unsigned height[PIPE_MAX_TEXTURE_LEVELS]; diff --git a/src/mesa/pipe/p_winsys.h b/src/mesa/pipe/p_winsys.h index 95e3684008d..1e81eebd782 100644 --- a/src/mesa/pipe/p_winsys.h +++ b/src/mesa/pipe/p_winsys.h @@ -112,7 +112,7 @@ struct pipe_winsys /** * Map the entire data store of a buffer object into the client's address. - * flags is bitmask of PIPE_BUFFER_FLAG_READ/WRITE. + * flags is bitmask of PIPE_BUFFER_USAGE_CPU_READ/WRITE flags. */ void *(*buffer_map)( struct pipe_winsys *sws, struct pipe_buffer *buf, diff --git a/src/mesa/pipe/softpipe/sp_state_fs.c b/src/mesa/pipe/softpipe/sp_state_fs.c index 1430be7869a..0b814fc2847 100644 --- a/src/mesa/pipe/softpipe/sp_state_fs.c +++ b/src/mesa/pipe/softpipe/sp_state_fs.c @@ -61,12 +61,16 @@ softpipe_create_fs_state(struct pipe_context *pipe, } #ifdef MESA_LLVM - state->llvm_prog = gallivm_from_tgsi(state->shader.tokens, GALLIVM_FS); + state->llvm_prog = 0; + +#if 0 if (!gallivm_global_cpu_engine()) { gallivm_cpu_engine_create(state->llvm_prog); } else gallivm_cpu_jit_compile(gallivm_global_cpu_engine(), state->llvm_prog); +#endif + #elif defined(__i386__) || defined(__386__) if (softpipe->use_sse) { x86_init_func( &state->sse2_program ); diff --git a/src/mesa/pipe/softpipe/sp_state_sampler.c b/src/mesa/pipe/softpipe/sp_state_sampler.c index 291bbc40ad0..6a5a643c89e 100644 --- a/src/mesa/pipe/softpipe/sp_state_sampler.c +++ b/src/mesa/pipe/softpipe/sp_state_sampler.c @@ -34,6 +34,8 @@ #include "sp_state.h" #include "sp_texture.h" #include "sp_tile_cache.h" +#include "pipe/draw/draw_context.h" + void * @@ -73,6 +75,8 @@ softpipe_set_sampler_texture(struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); + draw_flush(softpipe->draw); + assert(unit < PIPE_MAX_SAMPLERS); softpipe->texture[unit] = softpipe_texture(texture); /* ptr, not struct */ diff --git a/src/mesa/pipe/softpipe/sp_tex_sample.c b/src/mesa/pipe/softpipe/sp_tex_sample.c index 5e215c433ad..325bdb86da5 100644 --- a/src/mesa/pipe/softpipe/sp_tex_sample.c +++ b/src/mesa/pipe/softpipe/sp_tex_sample.c @@ -449,7 +449,6 @@ compute_lambda(struct tgsi_sampler *sampler, } lambda = LOG2(rho); - lambda += lodbias + sampler->state->lod_bias; lambda = CLAMP(lambda, sampler->state->min_lod, sampler->state->max_lod); @@ -457,7 +456,6 @@ compute_lambda(struct tgsi_sampler *sampler, } - /** * Do several things here: * 1. Compute lambda from the texcoords, if needed @@ -477,7 +475,7 @@ choose_mipmap_levels(struct tgsi_sampler *sampler, if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { /* no mipmap selection needed */ *imgFilter = sampler->state->mag_img_filter; - *level0 = *level1 = sampler->texture->first_level; + *level0 = *level1 = (int) sampler->state->min_lod; } else { float lambda; @@ -492,7 +490,7 @@ choose_mipmap_levels(struct tgsi_sampler *sampler, if (lambda < 0.0) { /* XXX threshold depends on the filter */ /* magnifying */ *imgFilter = sampler->state->mag_img_filter; - *level0 = *level1 = sampler->texture->first_level; + *level0 = *level1 = 0; } else { /* minifying */ @@ -503,19 +501,13 @@ choose_mipmap_levels(struct tgsi_sampler *sampler, /* Nearest mipmap level */ const int lvl = (int) (lambda + 0.5); *level0 = - *level1 = CLAMP(lvl, - (int) sampler->texture->first_level, - (int) sampler->texture->last_level); + *level1 = CLAMP(lvl, 0, (int) sampler->texture->last_level); } else { /* Linear interpolation between mipmap levels */ const int lvl = (int) lambda; - *level0 = CLAMP(lvl, - (int) sampler->texture->first_level, - (int) sampler->texture->last_level); - *level1 = CLAMP(lvl + 1, - (int) sampler->texture->first_level, - (int) sampler->texture->last_level); + *level0 = CLAMP(lvl, 0, (int) sampler->texture->last_level); + *level1 = CLAMP(lvl + 1, 0, (int) sampler->texture->last_level); *levelBlend = FRAC(lambda); /* blending weight between levels */ } } diff --git a/src/mesa/pipe/softpipe/sp_texture.c b/src/mesa/pipe/softpipe/sp_texture.c index fd2cc3dbbb4..6de7a9b543b 100644 --- a/src/mesa/pipe/softpipe/sp_texture.c +++ b/src/mesa/pipe/softpipe/sp_texture.c @@ -61,7 +61,7 @@ softpipe_texture_layout(struct softpipe_texture * spt) spt->buffer_size = 0; - for ( level = pt->first_level ; level <= pt->last_level ; level++ ) { + for (level = 0; level <= pt->last_level; level++) { pt->width[level] = width; pt->height[level] = height; pt->depth[level] = depth; @@ -139,6 +139,8 @@ softpipe_get_tex_surface(struct pipe_context *pipe, struct softpipe_texture *spt = softpipe_texture(pt); struct pipe_surface *ps; + assert(level <= pt->last_level); + ps = pipe->winsys->surface_alloc(pipe->winsys); if (ps) { assert(ps->refcount); diff --git a/src/mesa/sources b/src/mesa/sources index 84492c91ac8..96ae3dbca00 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -174,7 +174,6 @@ DRAW_SOURCES = \ pipe/draw/draw_vertex_cache.c \ pipe/draw/draw_vertex_fetch.c \ pipe/draw/draw_vertex_shader.c \ - pipe/draw/draw_vertex_shader_llvm.c \ pipe/draw/draw_vf.c \ pipe/draw/draw_vf_generic.c \ pipe/draw/draw_vf_sse.c \ diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c index 6241e70b550..92263cb6881 100644 --- a/src/mesa/state_tracker/st_atom_sampler.c +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -147,7 +147,7 @@ update_samplers(struct st_context *st) sampler.lod_bias = st->ctx->Texture.Unit[su].LodBias; #if 1 - sampler.min_lod = texobj->MinLod; + sampler.min_lod = (texobj->MinLod) < 0.0 ? 0.0 : texobj->MinLod; sampler.max_lod = texobj->MaxLod; #else /* min/max lod should really be as follows (untested). diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 91969185093..2c6ec8421b0 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -158,7 +158,7 @@ find_translated_vp(struct st_context *st, /* * Translate fragment program if needed. */ - if (!stfp->fs) { + if (!stfp->cso) { GLuint inAttr, numIn = 0; for (inAttr = 0; inAttr < FRAG_ATTRIB_MAX; inAttr++) { @@ -179,7 +179,7 @@ find_translated_vp(struct st_context *st, stfp->input_to_slot, stfp->tokens, ST_MAX_SHADER_TOKENS); - assert(stfp->fs); + assert(stfp->cso); } @@ -227,7 +227,7 @@ find_translated_vp(struct st_context *st, if (fpInAttrib >= 0) { GLuint fpInSlot = stfp->input_to_slot[fpInAttrib]; if (fpInSlot != ~0) { - GLuint vpOutSlot = stfp->fs->state.input_map[fpInSlot]; + GLuint vpOutSlot = stfp->cso->state.input_map[fpInSlot]; xvp->output_to_slot[outAttr] = vpOutSlot; numVpOuts++; } @@ -300,7 +300,7 @@ update_linkage( struct st_context *st ) st->pipe->bind_vs_state(st->pipe, st->state.vs->cso->data); st->fp = stfp; - st->state.fs = stfp->fs; + st->state.fs = stfp->cso; st->pipe->bind_fs_state(st->pipe, st->state.fs->data); st->vertex_result_to_slot = xvp->output_to_slot; diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index ab98b54bab4..410062e1e80 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -341,7 +341,7 @@ clear_with_quad(GLcontext *ctx, if (!stfp) { stfp = make_frag_shader(st); } - pipe->bind_fs_state(pipe, stfp->fs->data); + pipe->bind_fs_state(pipe, stfp->cso->data); } /* vertex shader state: color/position pass-through */ diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 07886e79829..3245a7488bc 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -471,7 +471,7 @@ make_texture(struct st_context *st, assert(pipeFormat); cpp = st_sizeof_format(pipeFormat); - pt = st_texture_create(st, PIPE_TEXTURE_2D, pipeFormat, 0, 0, width, height, + pt = st_texture_create(st, PIPE_TEXTURE_2D, pipeFormat, 0, width, height, 1, 0); if (!pt) return NULL; @@ -665,7 +665,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, } /* fragment shader state: TEX lookup program */ - pipe->bind_fs_state(pipe, stfp->fs->data); + pipe->bind_fs_state(pipe, stfp->cso->data); /* vertex shader state: position + texcoord pass-through */ pipe->bind_vs_state(pipe, stvp->cso->data); @@ -1017,7 +1017,7 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, /** * Create a texture. */ - pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, format, 0, 0, width, height, + pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, format, 0, width, height, 1, 0); if (!pt) return NULL; @@ -1241,7 +1241,7 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, psRead = rbRead->surface; format = psRead->format; - pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, format, 0, 0, width, height, + pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, format, 0, width, height, 1, 0); if (!pt) return; diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index f1f33fb0dd8..af3ee655048 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -168,10 +168,10 @@ static void st_program_string_notify( GLcontext *ctx, stfp->serialNo++; - if (stfp->fs) { + if (stfp->cso) { /* free the TGSI code */ // cso_delete(stfp->vs); - stfp->fs = NULL; + stfp->cso = NULL; } stfp->param_state = stfp->Base.Base.Parameters->StateFlags; diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index a1bbb3a8312..868c5f3c5f9 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -180,20 +180,13 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, if (!strb) return; + if (format == GL_RGBA && type == GL_FLOAT) { /* write tile(row) directly into user's buffer */ df = (GLfloat *) _mesa_image_address2d(&clippedPacking, dest, width, height, format, type, 0, 0); dfStride = width * 4; } -#if 0 - else if (format == GL_DEPTH_COMPONENT && type == GL_FLOAT) { - /* write tile(row) directly into user's buffer */ - df = (GLfloat *) _mesa_image_address2d(&clippedPacking, dest, width, - height, format, type, 0, 0); - dfStride = width; - } -#endif else { /* write tile(row) into temp row buffer */ df = (GLfloat *) temp; @@ -209,22 +202,86 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, yStep = 1; } - /* Do a row at a time to flip image data vertically */ - for (i = 0; i < height; i++) { - pipe_get_tile_rgba(pipe, strb->surface, x, y, width, 1, df); - y += yStep; - df += dfStride; - if (!dfStride) { - /* convert GLfloat to user's format/type */ - GLvoid *dst = _mesa_image_address2d(&clippedPacking, dest, width, - height, format, type, i, 0); + /* + * Copy pixels from pipe_surface to user memory + */ + { + /* dest of first pixel in client memory */ + GLubyte *dst = _mesa_image_address2d(&clippedPacking, dest, width, + height, format, type, 0, 0); + /* dest row stride */ + const GLint dstStride = _mesa_image_row_stride(&clippedPacking, width, + format, type); + + if (strb->surface->format == PIPE_FORMAT_S8Z24_UNORM) { if (format == GL_DEPTH_COMPONENT) { - _mesa_pack_depth_span(ctx, width, dst, type, - (GLfloat *) temp, &clippedPacking); + for (i = 0; i < height; i++) { + GLuint ztemp[MAX_WIDTH], j; + GLfloat zfloat[MAX_WIDTH]; + const double scale = 1.0 / ((1 << 24) - 1); + pipe_get_tile_raw(pipe, strb->surface, x, y, + width, 1, ztemp, 0); + y += yStep; + for (j = 0; j < width; j++) { + zfloat[j] = (float) (scale * (ztemp[j] & 0xffffff)); + } + _mesa_pack_depth_span(ctx, width, dst, type, + zfloat, &clippedPacking); + dst += dstStride; + } } else { - _mesa_pack_rgba_span_float(ctx, width, temp, format, type, dst, - &clippedPacking, transferOps); + /* untested, but simple: */ + assert(format == GL_DEPTH_STENCIL_EXT); + for (i = 0; i < height; i++) { + pipe_get_tile_raw(pipe, strb->surface, x, y, width, 1, dst, 0); + y += yStep; + dst += dstStride; + } + } + } + else if (strb->surface->format == PIPE_FORMAT_Z16_UNORM) { + for (i = 0; i < height; i++) { + GLshort ztemp[MAX_WIDTH], j; + GLfloat zfloat[MAX_WIDTH]; + const double scale = 1.0 / 0xffff; + pipe_get_tile_raw(pipe, strb->surface, x, y, width, 1, ztemp, 0); + y += yStep; + for (j = 0; j < width; j++) { + zfloat[j] = (float) (scale * ztemp[j]); + } + _mesa_pack_depth_span(ctx, width, dst, type, + zfloat, &clippedPacking); + dst += dstStride; + } + } + else if (strb->surface->format == PIPE_FORMAT_Z32_UNORM) { + for (i = 0; i < height; i++) { + GLuint ztemp[MAX_WIDTH], j; + GLfloat zfloat[MAX_WIDTH]; + const double scale = 1.0 / 0xffffffff; + pipe_get_tile_raw(pipe, strb->surface, x, y, width, 1, ztemp, 0); + y += yStep; + for (j = 0; j < width; j++) { + zfloat[j] = (float) (scale * ztemp[j]); + } + _mesa_pack_depth_span(ctx, width, dst, type, + zfloat, &clippedPacking); + dst += dstStride; + } + } + else { + /* RGBA format */ + /* Do a row at a time to flip image data vertically */ + for (i = 0; i < height; i++) { + pipe_get_tile_rgba(pipe, strb->surface, x, y, width, 1, df); + y += yStep; + df += dfStride; + if (!dfStride) { + _mesa_pack_rgba_span_float(ctx, width, temp, format, type, dst, + &clippedPacking, transferOps); + dst += dstStride; + } } } } diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 3350254654b..0ea367549b8 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -57,15 +57,10 @@ struct st_texture_object { struct gl_texture_object base; /* The "parent" object */ - /* The texture must include at least these levels once validated: + /* The texture must include at levels [0..lastLevel] once validated: */ - GLuint firstLevel; GLuint lastLevel; - /* Offset for firstLevel image: - */ - GLuint textureOffset; - /* On validation any active images held in main memory or in other * textures will be copied to this texture and the old storage freed. */ @@ -368,7 +363,6 @@ guess_and_alloc_texture(struct st_context *st, stObj->pt = st_texture_create(st, gl_target_to_pipe(stObj->base.Target), st_mesa_format_to_pipe_format(stImage->base.TexFormat->MesaFormat), - firstLevel, lastLevel, width, height, @@ -379,26 +373,6 @@ guess_and_alloc_texture(struct st_context *st, } - - -static GLuint -target_to_face(GLenum target) -{ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: - return ((GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X); - default: - return 0; - } -} - - - /* There are actually quite a few combinations this will work for, * more than what I've listed here. */ @@ -487,6 +461,43 @@ try_pbo_upload(GLcontext *ctx, } +/** + * Adjust pixel unpack params and image dimensions to strip off the + * texture border. + * Gallium doesn't support texture borders. They've seldem been used + * and seldom been implemented correctly anyway. + * \param unpackNew returns the new pixel unpack parameters + */ +static void +strip_texture_border(GLint border, + GLint *width, GLint *height, GLint *depth, + const struct gl_pixelstore_attrib *unpack, + struct gl_pixelstore_attrib *unpackNew) +{ + assert(border > 0); /* sanity check */ + + *unpackNew = *unpack; + + if (unpackNew->RowLength == 0) + unpackNew->RowLength = *width; + + if (depth && unpackNew->ImageHeight == 0) + unpackNew->ImageHeight = *height; + + unpackNew->SkipPixels += border; + if (height) + unpackNew->SkipRows += border; + if (depth) + unpackNew->SkipImages += border; + + assert(*width >= 3); + *width = *width - 2 * border; + if (height && *height >= 3) + *height = *height - 2 * border; + if (depth && *depth >= 3) + *depth = *depth - 2 * border; +} + static void st_TexImage(GLcontext * ctx, @@ -503,16 +514,26 @@ st_TexImage(GLcontext * ctx, { struct st_texture_object *stObj = st_texture_object(texObj); struct st_texture_image *stImage = st_texture_image(texImage); - GLint postConvWidth = width; - GLint postConvHeight = height; + GLint postConvWidth, postConvHeight; GLint texelBytes, sizeInBytes; GLuint dstRowStride; - + struct gl_pixelstore_attrib unpackNB; DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), level, width, height, depth, border); - stImage->face = target_to_face(target); + /* gallium does not support texture borders, strip it off */ + if (border) { + strip_texture_border(border, &width, &height, &depth, + unpack, &unpackNB); + unpack = &unpackNB; + border = 0; + } + + postConvWidth = width; + postConvHeight = height; + + stImage->face = _mesa_tex_target_to_face(target); stImage->level = level; if (ctx->_ImageTransferState & IMAGE_CONVOLUTION_BIT) { @@ -558,12 +579,12 @@ st_TexImage(GLcontext * ctx, _mesa_align_free(texImage->Data); } - /* If this is the only texture image in the texture, could call + /* If this is the only mipmap level in the texture, could call * bmBufferData with NULL data to free the old block and avoid * waiting on any outstanding fences. */ if (stObj->pt && - stObj->pt->first_level == level && + /*stObj->pt->first_level == level &&*/ stObj->pt->last_level == level && stObj->pt->target != PIPE_TEXTURE_CUBE && !st_texture_match_image(stObj->pt, &stImage->base, @@ -1336,13 +1357,8 @@ calculate_first_last_level(struct st_texture_object *stObj) firstLevel = lastLevel = tObj->BaseLevel; } else { - firstLevel = tObj->BaseLevel + (GLint) (tObj->MinLod + 0.5); - firstLevel = MAX2(firstLevel, tObj->BaseLevel); - lastLevel = tObj->BaseLevel + (GLint) (tObj->MaxLod + 0.5); - lastLevel = MAX2(lastLevel, tObj->BaseLevel); - lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2); - lastLevel = MIN2(lastLevel, tObj->MaxLevel); - lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */ + firstLevel = 0; + lastLevel = MIN2(tObj->MaxLevel - tObj->BaseLevel, baseImage->MaxLog2); } break; case GL_TEXTURE_RECTANGLE_NV: @@ -1353,8 +1369,6 @@ calculate_first_last_level(struct st_texture_object *stObj) return; } - /* save these values */ - stObj->firstLevel = firstLevel; stObj->lastLevel = lastLevel; } @@ -1362,15 +1376,16 @@ calculate_first_last_level(struct st_texture_object *stObj) static void copy_image_data_to_texture(struct st_context *st, struct st_texture_object *stObj, + GLuint dstLevel, struct st_texture_image *stImage) { if (stImage->pt) { /* Copy potentially with the blitter: */ st_texture_image_copy(st->pipe, - stObj->pt, /* dest texture */ - stImage->face, stImage->level, - stImage->pt /* src texture */ + stObj->pt, dstLevel, /* dest texture, level */ + stImage->pt, /* src texture */ + stImage->face ); st->pipe->texture_release(st->pipe, &stImage->pt); @@ -1411,7 +1426,7 @@ st_finalize_texture(GLcontext *ctx, const GLuint nr_faces = (stObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; int comp_byte = 0; int cpp; - GLuint face, i; + GLuint face; struct st_texture_image *firstImage; *needFlush = GL_FALSE; @@ -1423,7 +1438,7 @@ st_finalize_texture(GLcontext *ctx, /* What levels must the texture include at a minimum? */ calculate_first_last_level(stObj); - firstImage = st_texture_image(stObj->base.Image[0][stObj->firstLevel]); + firstImage = st_texture_image(stObj->base.Image[0][stObj->base.BaseLevel]); /* Fallback case: */ @@ -1442,7 +1457,6 @@ st_finalize_texture(GLcontext *ctx, */ if (firstImage->pt && firstImage->pt != stObj->pt && - firstImage->pt->first_level <= stObj->firstLevel && firstImage->pt->last_level >= stObj->lastLevel) { if (stObj->pt) @@ -1461,18 +1475,11 @@ st_finalize_texture(GLcontext *ctx, /* Check texture can hold all active levels. Check texture matches * target, imageFormat, etc. - * - * XXX: For some layouts (eg i945?), the test might have to be - * first_level == firstLevel, as the texture isn't valid except at the - * original start level. Hope to get around this by - * programming minLod, maxLod, baseLevel into the hardware and - * leaving the texture alone. */ if (stObj->pt && (stObj->pt->target != gl_target_to_pipe(stObj->base.Target) || stObj->pt->format != st_mesa_format_to_pipe_format(firstImage->base.TexFormat->MesaFormat) || - stObj->pt->first_level != stObj->firstLevel || stObj->pt->last_level != stObj->lastLevel || stObj->pt->width[0] != firstImage->base.Width || stObj->pt->height[0] != firstImage->base.Height || @@ -1489,7 +1496,6 @@ st_finalize_texture(GLcontext *ctx, stObj->pt = st_texture_create(ctx->st, gl_target_to_pipe(stObj->base.Target), st_mesa_format_to_pipe_format(firstImage->base.TexFormat->MesaFormat), - stObj->firstLevel, stObj->lastLevel, firstImage->base.Width, firstImage->base.Height, @@ -1500,14 +1506,15 @@ st_finalize_texture(GLcontext *ctx, /* Pull in any images not in the object's texture: */ for (face = 0; face < nr_faces; face++) { - for (i = stObj->firstLevel; i <= stObj->lastLevel; i++) { + GLuint level; + for (level = 0; level <= stObj->lastLevel; level++) { struct st_texture_image *stImage = - st_texture_image(stObj->base.Image[face][i]); + st_texture_image(stObj->base.Image[face][stObj->base.BaseLevel + level]); /* Need to import images in main memory or held in other textures. */ - if (stObj->pt != stImage->pt) { - copy_image_data_to_texture(ctx->st, stObj, stImage); + if (stImage && stObj->pt != stImage->pt) { + copy_image_data_to_texture(ctx->st, stObj, level, stImage); *needFlush = GL_TRUE; } } diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index a756055898d..59d1590f053 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -166,6 +166,15 @@ struct st_context struct st_fragment_program *combined_prog; } bitmap; + /** For gen/render mipmap feature */ + struct { + void *blend_cso; + void *depthstencil_cso; + void *rasterizer_cso; + struct st_fragment_program *stfp; + struct st_vertex_program *stvp; + } gen_mipmap; + struct cso_cache *cache; }; diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index a6ac9a55fb3..459941cca87 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -29,11 +29,14 @@ #include "main/imports.h" #include "main/mipmap.h" #include "main/teximage.h" +#include "main/texformat.h" #include "shader/prog_instruction.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" #include "pipe/cso_cache/cso_cache.h" #include "st_context.h" @@ -45,16 +48,6 @@ -static void *blend_cso = NULL; -static void *depthstencil_cso = NULL; -static void *rasterizer_cso = NULL; -static void *sampler_cso = NULL; - -static struct st_fragment_program *stfp = NULL; -static struct st_vertex_program *stvp = NULL; - - - static struct st_fragment_program * make_tex_fragment_program(GLcontext *ctx) { @@ -115,33 +108,20 @@ st_init_generate_mipmap(struct st_context *st) struct pipe_context *pipe = st->pipe; struct pipe_blend_state blend; struct pipe_rasterizer_state rasterizer; - struct pipe_sampler_state sampler; struct pipe_depth_stencil_alpha_state depthstencil; - assert(!blend_cso); - memset(&blend, 0, sizeof(blend)); blend.colormask = PIPE_MASK_RGBA; - blend_cso = pipe->create_blend_state(pipe, &blend); + st->gen_mipmap.blend_cso = pipe->create_blend_state(pipe, &blend); memset(&depthstencil, 0, sizeof(depthstencil)); - depthstencil_cso = pipe->create_depth_stencil_alpha_state(pipe, &depthstencil); + st->gen_mipmap.depthstencil_cso = pipe->create_depth_stencil_alpha_state(pipe, &depthstencil); memset(&rasterizer, 0, sizeof(rasterizer)); - rasterizer_cso = pipe->create_rasterizer_state(pipe, &rasterizer); - - memset(&sampler, 0, sizeof(sampler)); - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; - sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; - sampler.normalized_coords = 1; - sampler_cso = pipe->create_sampler_state(pipe, &sampler); + st->gen_mipmap.rasterizer_cso = pipe->create_rasterizer_state(pipe, &rasterizer); - stfp = make_tex_fragment_program(st->ctx); - stvp = st_make_passthrough_vertex_shader(st, GL_FALSE); + st->gen_mipmap.stfp = make_tex_fragment_program(st->ctx); + st->gen_mipmap.stvp = st_make_passthrough_vertex_shader(st, GL_FALSE); } @@ -150,17 +130,11 @@ st_destroy_generate_mipmpap(struct st_context *st) { struct pipe_context *pipe = st->pipe; - pipe->delete_blend_state(pipe, blend_cso); - pipe->delete_depth_stencil_alpha_state(pipe, depthstencil_cso); - pipe->delete_rasterizer_state(pipe, rasterizer_cso); - pipe->delete_sampler_state(pipe, sampler_cso); + pipe->delete_blend_state(pipe, st->gen_mipmap.blend_cso); + pipe->delete_depth_stencil_alpha_state(pipe, st->gen_mipmap.depthstencil_cso); + pipe->delete_rasterizer_state(pipe, st->gen_mipmap.rasterizer_cso); /* XXX free stfp, stvp */ - - blend_cso = NULL; - depthstencil_cso = NULL; - rasterizer_cso = NULL; - sampler_cso = NULL; } @@ -239,15 +213,20 @@ draw_quad(GLcontext *ctx) */ static boolean st_render_mipmap(struct st_context *st, + GLenum target, struct pipe_texture *pt, uint baseLevel, uint lastLevel) { struct pipe_context *pipe = st->pipe; struct pipe_framebuffer_state fb; - const uint face = 0, zslice = 0; - const uint first_level_save = pt->first_level; + struct pipe_sampler_state sampler; + void *sampler_cso; + const uint face = _mesa_tex_target_to_face(target), zslice = 0; + /*const uint first_level_save = pt->first_level;*/ uint dstLevel; + assert(target != GL_TEXTURE_3D); /* not done yet */ + /* check if we can render in the texture's format */ if (!pipe->is_format_supported(pipe, pt->format, PIPE_SURFACE)) { return FALSE; @@ -257,16 +236,30 @@ st_render_mipmap(struct st_context *st, memset(&fb, 0, sizeof(fb)); fb.num_cbufs = 1; + /* sampler state */ + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.normalized_coords = 1; + + /* bind CSOs */ - pipe->bind_blend_state(pipe, blend_cso); - pipe->bind_depth_stencil_alpha_state(pipe, depthstencil_cso); - pipe->bind_rasterizer_state(pipe, rasterizer_cso); - pipe->bind_sampler_state(pipe, 0, sampler_cso); + pipe->bind_blend_state(pipe, st->gen_mipmap.blend_cso); + pipe->bind_depth_stencil_alpha_state(pipe, st->gen_mipmap.depthstencil_cso); + pipe->bind_rasterizer_state(pipe, st->gen_mipmap.rasterizer_cso); /* bind shaders */ - pipe->bind_fs_state(pipe, stfp->fs->data); - pipe->bind_vs_state(pipe, stvp->cso->data); + pipe->bind_fs_state(pipe, st->gen_mipmap.stfp->cso->data); + pipe->bind_vs_state(pipe, st->gen_mipmap.stvp->cso->data); + /* + * XXX for small mipmap levels, it may be faster to use the software + * fallback path... + */ for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { const uint srcLevel = dstLevel - 1; @@ -276,20 +269,29 @@ st_render_mipmap(struct st_context *st, fb.cbufs[0] = pipe->get_tex_surface(pipe, pt, face, dstLevel, zslice); pipe->set_framebuffer_state(pipe, &fb); + /* + * Setup sampler state + */ + sampler.min_lod = sampler.max_lod = srcLevel; + sampler_cso = pipe->create_sampler_state(pipe, &sampler); + pipe->bind_sampler_state(pipe, 0, sampler_cso); + simple_viewport(pipe, pt->width[dstLevel], pt->height[dstLevel]); /* * Setup src texture, override pt->first_level so we sample from * the right mipmap level. */ - pt->first_level = srcLevel; + /*pt->first_level = srcLevel;*/ pipe->set_sampler_texture(pipe, 0, pt); draw_quad(st->ctx); + + pipe->delete_sampler_state(pipe, sampler_cso); } /* restore first_level */ - pt->first_level = first_level_save; + /*pt->first_level = first_level_save;*/ /* restore pipe state */ if (st->state.rasterizer) @@ -307,6 +309,58 @@ st_render_mipmap(struct st_context *st, } +static void +fallback_generate_mipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + struct pipe_context *pipe = ctx->st->pipe; + struct pipe_winsys *ws = pipe->winsys; + struct pipe_texture *pt = st_get_texobj_texture(texObj); + const uint baseLevel = texObj->BaseLevel; + const uint lastLevel = pt->last_level; + const uint face = _mesa_tex_target_to_face(target), zslice = 0; + uint dstLevel; + GLenum datatype; + GLuint comps; + + assert(target != GL_TEXTURE_3D); /* not done yet */ + + _mesa_format_to_type_and_comps(texObj->Image[face][baseLevel]->TexFormat, + &datatype, &comps); + + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + struct pipe_surface *srcSurf, *dstSurf; + const ubyte *srcData; + ubyte *dstData; + + srcSurf = pipe->get_tex_surface(pipe, pt, face, srcLevel, zslice); + dstSurf = pipe->get_tex_surface(pipe, pt, face, dstLevel, zslice); + + srcData = (ubyte *) ws->buffer_map(ws, srcSurf->buffer, + PIPE_BUFFER_USAGE_CPU_READ) + + srcSurf->offset; + dstData = (ubyte *) ws->buffer_map(ws, dstSurf->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE) + + dstSurf->offset; + + _mesa_generate_mipmap_level(target, datatype, comps, + 0 /*border*/, + pt->width[srcLevel], pt->height[srcLevel], pt->depth[srcLevel], + srcSurf->pitch * srcSurf->cpp, /* stride in bytes */ + srcData, + pt->width[dstLevel], pt->height[dstLevel], pt->depth[dstLevel], + dstSurf->pitch * dstSurf->cpp, /* stride in bytes */ + dstData); + + ws->buffer_unmap(ws, srcSurf->buffer); + ws->buffer_unmap(ws, dstSurf->buffer); + + pipe_surface_reference(&srcSurf, NULL); + pipe_surface_reference(&dstSurf, NULL); + } +} + void st_generate_mipmap(GLcontext *ctx, GLenum target, @@ -318,13 +372,11 @@ st_generate_mipmap(GLcontext *ctx, GLenum target, const uint lastLevel = pt->last_level; uint dstLevel; - if (!st_render_mipmap(st, pt, baseLevel, lastLevel)) { - abort(); - /* XXX the following won't really work at this time */ - _mesa_generate_mipmap(ctx, target, texObj); - return; + if (!st_render_mipmap(st, target, pt, baseLevel, lastLevel)) { + fallback_generate_mipmap(ctx, target, texObj); } + /* Fill in the Mesa gl_texture_image fields */ for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { const uint srcLevel = dstLevel - 1; const struct gl_texture_image *srcImage @@ -336,7 +388,6 @@ st_generate_mipmap(GLcontext *ctx, GLenum target, uint dstDepth = pt->depth[dstLevel]; uint border = srcImage->Border; - dstImage = _mesa_get_tex_image(ctx, texObj, target, dstLevel); if (!dstImage) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps"); @@ -359,5 +410,4 @@ st_generate_mipmap(GLcontext *ctx, GLenum target, stImage = (struct st_texture_image *) dstImage; stImage->pt = pt; } - } diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 84a90940018..c8297badedd 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -420,7 +420,7 @@ st_translate_fragment_program(struct st_context *st, fs.tokens = tokensOut; cso = st_cached_fs_state(st, &fs); - stfp->fs = cso; + stfp->cso = cso; if (0) _mesa_print_program(&stfp->Base.Base); diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index de02c3185fd..ea1dde4a7a3 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -62,7 +62,7 @@ struct st_fragment_program struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; /** Pointer to the corresponding cached shader */ - const struct cso_fragment_shader *fs; + const struct cso_fragment_shader *cso; GLuint param_state; diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index 844a9f80d85..2622d009530 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -62,12 +62,14 @@ target_to_target(GLenum target) /** * Allocate a new pipe_texture object + * width0, height0, depth0 are the dimensions of the level 0 image + * (the highest resolution). last_level indicates how many mipmap levels + * to allocate storage for. For non-mipmapped textures, this will be zero. */ struct pipe_texture * st_texture_create(struct st_context *st, enum pipe_texture_target target, enum pipe_format format, - GLuint first_level, GLuint last_level, GLuint width0, GLuint height0, @@ -78,15 +80,15 @@ st_texture_create(struct st_context *st, assert(target <= PIPE_TEXTURE_CUBE); - DBG("%s target %s format %s level %d..%d\n", __FUNCTION__, + DBG("%s target %s format %s last_level %d\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), - _mesa_lookup_enum_by_nr(format), first_level, last_level); + _mesa_lookup_enum_by_nr(format), last_level); assert(format); + memset(&pt, 0, sizeof(pt)); pt.target = target; pt.format = format; - pt.first_level = first_level; pt.last_level = last_level; pt.width[0] = width0; pt.height[0] = height0; @@ -266,23 +268,36 @@ st_texture_image_data(struct pipe_context *pipe, */ void st_texture_image_copy(struct pipe_context *pipe, - struct pipe_texture *dst, - GLuint face, GLuint level, - struct pipe_texture *src) + struct pipe_texture *dst, GLuint dstLevel, + struct pipe_texture *src, + GLuint face) { - GLuint width = src->width[level]; - GLuint height = src->height[level]; - GLuint depth = src->depth[level]; + GLuint width = dst->width[dstLevel]; + GLuint height = dst->height[dstLevel]; + GLuint depth = dst->depth[dstLevel]; struct pipe_surface *src_surface; struct pipe_surface *dst_surface; GLuint i; + /* XXX this is a hack */ if (dst->compressed) height /= 4; for (i = 0; i < depth; i++) { - dst_surface = pipe->get_tex_surface(pipe, dst, face, level, i); - src_surface = pipe->get_tex_surface(pipe, src, face, level, i); + GLuint srcLevel; + + /* find src texture level of needed size */ + for (srcLevel = 0; srcLevel <= src->last_level; srcLevel++) { + if (src->width[srcLevel] == width && + src->height[srcLevel] == height) { + break; + } + } + assert(src->width[srcLevel] == width); + assert(src->height[srcLevel] == height); + + dst_surface = pipe->get_tex_surface(pipe, dst, face, dstLevel, i); + src_surface = pipe->get_tex_surface(pipe, src, face, srcLevel, i); pipe->surface_copy(pipe, dst_surface, diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h index 0b87a494c38..72324cd9ab6 100644 --- a/src/mesa/state_tracker/st_texture.h +++ b/src/mesa/state_tracker/st_texture.h @@ -39,7 +39,6 @@ extern struct pipe_texture * st_texture_create(struct st_context *st, enum pipe_texture_target target, enum pipe_format format, - GLuint first_level, GLuint last_level, GLuint width0, GLuint height0, @@ -98,9 +97,9 @@ st_texture_image_data(struct pipe_context *pipe, */ extern void st_texture_image_copy(struct pipe_context *pipe, - struct pipe_texture *dst, - GLuint face, GLuint level, - struct pipe_texture *src); + struct pipe_texture *dst, GLuint dstLevel, + struct pipe_texture *src, + GLuint face); #endif |