diff options
author | Eric Anholt <[email protected]> | 2010-11-02 20:15:45 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2010-11-03 06:08:27 -0700 |
commit | 6ad0283f482d96f060555f8973557a4d10f9addf (patch) | |
tree | 509998cc83be8d84ef6226347b730f29e28d4956 | |
parent | bb1540835056cdea5db6f55b19c0c87358f14cd1 (diff) |
intel: Remove the magic unaligned memcpy code.
In testing on Ironlake, the histogram of clocks/pixel results for the
system memcpy and magic unaligned memcpy show no noticeable difference
(and no statistically significant difference with the 5510 samples
taken, though the stddev is large due to what looks like the cache
effects from the different texture sizes used).
-rw-r--r-- | src/mesa/drivers/dri/intel/intel_tex.c | 89 |
1 files changed, 0 insertions, 89 deletions
diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c index 743a0432e43..2c21ea0576e 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.c +++ b/src/mesa/drivers/dri/intel/intel_tex.c @@ -61,88 +61,6 @@ intelFreeTextureImageData(struct gl_context * ctx, struct gl_texture_image *texI } } - -/* The system memcpy (at least on ubuntu 5.10) has problems copying - * to agp (writecombined) memory from a source which isn't 64-byte - * aligned - there is a 4x performance falloff. - * - * The x86 __memcpy is immune to this but is slightly slower - * (10%-ish) than the system memcpy. - * - * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but - * isn't much faster than x86_memcpy for agp copies. - * - * TODO: switch dynamically. - */ -static void * -do_memcpy(void *dest, const void *src, size_t n) -{ - if ((((unsigned long) src) & 63) || (((unsigned long) dest) & 63)) { - return __memcpy(dest, src, n); - } - else - return memcpy(dest, src, n); -} - - -#if DO_DEBUG && !defined(__ia64__) - -#ifndef __x86_64__ -static unsigned -fastrdtsc(void) -{ - unsigned eax; - __asm__ volatile ("\t" - "pushl %%ebx\n\t" - "cpuid\n\t" ".byte 0x0f, 0x31\n\t" - "popl %%ebx\n":"=a" (eax) - :"0"(0) - :"ecx", "edx", "cc"); - - return eax; -} -#else -static unsigned -fastrdtsc(void) -{ - unsigned eax; - __asm__ volatile ("\t" "cpuid\n\t" ".byte 0x0f, 0x31\n\t":"=a" (eax) - :"0"(0) - :"ecx", "edx", "ebx", "cc"); - - return eax; -} -#endif - -static unsigned -time_diff(unsigned t, unsigned t2) -{ - return ((t < t2) ? t2 - t : 0xFFFFFFFFU - (t - t2 - 1)); -} - - -static void * -timed_memcpy(void *dest, const void *src, size_t n) -{ - void *ret; - unsigned t1, t2; - double rate; - - if ((((unsigned) src) & 63) || (((unsigned) dest) & 63)) - printf("Warning - non-aligned texture copy!\n"); - - t1 = fastrdtsc(); - ret = do_memcpy(dest, src, n); - t2 = fastrdtsc(); - - rate = time_diff(t1, t2); - rate /= (double) n; - printf("timed_memcpy: %u %u --> %f clocks/byte\n", t1, t2, rate); - return ret; -} -#endif /* DO_DEBUG */ - - /** * Called via ctx->Driver.GenerateMipmap() * This is basically a wrapper for _mesa_meta_GenerateMipmap() which checks @@ -202,11 +120,4 @@ intelInitTextureFuncs(struct dd_function_table *functions) functions->NewTextureImage = intelNewTextureImage; functions->DeleteTexture = intelDeleteTextureObject; functions->FreeTexImageData = intelFreeTextureImageData; - -#if DO_DEBUG && !defined(__ia64__) - if (INTEL_DEBUG & DEBUG_BUFMGR) - functions->TextureMemCpy = timed_memcpy; - else -#endif - functions->TextureMemCpy = do_memcpy; } |